patch-2.3.99-pre8 linux/scripts/cramfs/mkcramfs.c

Next file: linux/Documentation/Changes
Previous file: linux/net/unix/af_unix.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.99-pre7/linux/scripts/cramfs/mkcramfs.c linux/scripts/cramfs/mkcramfs.c
@@ -59,6 +59,9 @@
 
 	/* FS data */
 	void *uncompressed;
+        /* points to other identical file */
+        struct entry *same;
+        unsigned int offset;            /* pointer to compressed data in archive */
 	unsigned int dir_offset;	/* Where in the archive is the directory entry? */
 
 	/* organization */
@@ -84,7 +87,28 @@
  */
 #define MAX_INPUT_NAMELEN 255
 
-static unsigned int parse_directory(const char *name, struct entry **prev, loff_t *fslen_ub)
+static int find_identical_file(struct entry *orig,struct entry *newfile)
+{
+        if(orig==newfile) return 1;
+        if(!orig) return 0;
+        if(orig->size==newfile->size && orig->uncompressed && !memcmp(orig->uncompressed,newfile->uncompressed,orig->size)) {
+                newfile->same=orig;
+                return 0;
+        }
+        return find_identical_file(orig->child,newfile) ||
+                   find_identical_file(orig->next,newfile);
+}
+
+static void eliminate_doubles(struct entry *root,struct entry *orig) {
+        if(orig) {
+                if(orig->size && orig->uncompressed) 
+			find_identical_file(root,orig);
+                eliminate_doubles(root,orig->child);
+                eliminate_doubles(root,orig->next);
+        }
+}
+
+static unsigned int parse_directory(struct entry *root_entry, const char *name, struct entry **prev, loff_t *fslen_ub)
 {
 	DIR *dir;
 	int count = 0, totalsize = 0;
@@ -173,7 +197,7 @@
 		size = sizeof(struct cramfs_inode) + ((namelen + 3) & ~3);
 		*fslen_ub += size;
 		if (S_ISDIR(st.st_mode)) {
-			entry->size = parse_directory(path, &entry->child, fslen_ub);
+			entry->size = parse_directory(root_entry, path, &entry->child, fslen_ub);
 		} else if (S_ISREG(st.st_mode)) {
 			/* TODO: We ought to open files in do_compress, one
 			   at a time, instead of amassing all these memory
@@ -233,10 +257,14 @@
 				warn_dev = 1;
 		}
 
-		if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode))
+		if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
 			/* block pointers & data expansion allowance + data */
-			*fslen_ub += ((4+26)*((entry->size - 1) / blksize + 1)
-				      + MIN(entry->size + 3, st.st_blocks << 9));
+                        if(entry->size) 
+                                *fslen_ub += ((4+26)*((entry->size - 1) / blksize + 1)
+                                              + MIN(entry->size + 3, st.st_blocks << 9));
+                        else 
+                                *fslen_ub += MIN(entry->size + 3, st.st_blocks << 9);
+                }
 
 		/* Link it into the list */
 		*prev = entry;
@@ -448,7 +476,7 @@
 	   st_blocks * 512.  But if you say that then perhaps
 	   administrative data should also be included in both. */
 	change = new_size - original_size;
-	printf("%5.2f%% (%d bytes)\t%s\n",
+	printf("%6.2f%% (%+d bytes)\t%s\n",
 	       (change * 100) / (double) original_size, change, name);
 
 	return curr;
@@ -459,26 +487,23 @@
  * Traverse the entry tree, writing data for every item that has
  * non-null entry->compressed (i.e. every symlink and non-empty
  * regfile).
- *
- * Frees the entry pointers as it goes.
  */
 static unsigned int write_data(struct entry *entry, char *base, unsigned int offset)
 {
 	do {
 		if (entry->uncompressed) {
-			set_data_offset(entry, base, offset);
-			offset = do_compress(base, offset, entry->name, entry->uncompressed, entry->size);
+                        if(entry->same) {
+                                set_data_offset(entry, base, entry->same->offset);
+                                entry->offset=entry->same->offset;
+                        } else {
+                                set_data_offset(entry, base, offset);
+                                entry->offset=offset;
+                                offset = do_compress(base, offset, entry->name, entry->uncompressed, entry->size);
+                        }
 		}
 		else if (entry->child)
 			offset = write_data(entry->child, base, offset);
-
-		/* Free the old before processing the next. */
-		{
-			struct entry *tmp = entry;
-			entry = entry->next;
-			free(tmp->name);
-			free(tmp);
-		}
+                entry=entry->next;
 	} while (entry);
 	return offset;
 }
@@ -537,7 +562,7 @@
 	root_entry->uid = st.st_uid;
 	root_entry->gid = st.st_gid;
 
-	root_entry->size = parse_directory(argv[1], &root_entry->child, &fslen_ub);
+	root_entry->size = parse_directory(root_entry, argv[1], &root_entry->child, &fslen_ub);
 	if (fslen_ub > MAXFSLEN) {
 		fprintf(stderr,
 			"warning: guestimate of required size (upper bound) is %luMB, but maximum image size is %uMB.  We might die prematurely.\n",
@@ -545,6 +570,11 @@
 			MAXFSLEN >> 20);
 		fslen_ub = MAXFSLEN;
 	}
+
+        /* find duplicate files. TODO: uses the most inefficient algorithm
+           possible. */
+        eliminate_doubles(root_entry,root_entry);
+
 
 	/* TODO: Why do we use a private/anonymous mapping here
            followed by a write below, instead of just a shared mapping


FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)