patch-2.3.20 linux/fs/inode.c

Next file: linux/fs/isofs/inode.c
Previous file: linux/fs/fat/file.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.19/linux/fs/inode.c linux/fs/inode.c
@@ -10,6 +10,7 @@
 #include <linux/dcache.h>
 #include <linux/init.h>
 #include <linux/quotaops.h>
+#include <linux/slab.h>
 
 /*
  * New inode.c implementation.
@@ -21,6 +22,8 @@
  * Famous last words.
  */
 
+/* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */
+
 #define INODE_PARANOIA 1
 /* #define INODE_DEBUG 1 */
 
@@ -28,7 +31,7 @@
  * Inode lookup is no longer as critical as it used to be:
  * most of the lookups are going to be through the dcache.
  */
-#define HASH_BITS	8
+#define HASH_BITS	14
 #define HASH_SIZE	(1UL << HASH_BITS)
 #define HASH_MASK	(HASH_SIZE-1)
 
@@ -36,9 +39,9 @@
  * Each inode can be on two separate lists. One is
  * the hash list of the inode, used for lookups. The
  * other linked list is the "type" list:
- *  "in_use" - valid inode, hashed if i_nlink > 0
- *  "dirty"  - valid inode, hashed if i_nlink > 0, dirty.
- *  "unused" - ready to be re-used. Not hashed.
+ *  "in_use" - valid inode, i_count > 0, i_nlink > 0
+ *  "dirty"  - as "in_use" but also dirty
+ *  "unused" - valid inode, i_count = 0
  *
  * A "dirty" list is maintained for each super block,
  * allowing for low-overhead inode sync() operations.
@@ -61,11 +64,36 @@
  */
 struct {
 	int nr_inodes;
-	int nr_free_inodes;
+	int nr_unused;
 	int dummy[5];
 } inodes_stat = {0, 0,};
 
-int max_inodes;
+static kmem_cache_t * inode_cachep;
+
+#define alloc_inode() \
+	 ((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))
+#define destroy_inode(inode) kmem_cache_free(inode_cachep, (inode))
+
+/*
+ * These are initializations that only need to be done
+ * once, because the fields are idempotent across use
+ * of the inode, so let the slab aware of that.
+ */
+static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+	struct inode * inode = (struct inode *) foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR)
+	{
+		memset(inode, 0, sizeof(*inode));
+		init_waitqueue_head(&inode->i_wait);
+		INIT_LIST_HEAD(&inode->i_hash);
+		INIT_LIST_HEAD(&inode->i_dentry);
+		sema_init(&inode->i_sem, 1);
+		spin_lock_init(&inode->i_shared_lock);
+	}
+}
 
 /*
  * Put the inode on the super block's dirty list.
@@ -118,20 +146,6 @@
 		__wait_on_inode(inode);
 }
 
-/*
- * These are initializations that only need to be done
- * once, because the fields are idempotent across use
- * of the inode..
- */
-static inline void init_once(struct inode * inode)
-{
-	memset(inode, 0, sizeof(*inode));
-	init_waitqueue_head(&inode->i_wait);
-	INIT_LIST_HEAD(&inode->i_hash);
-	INIT_LIST_HEAD(&inode->i_dentry);
-	sema_init(&inode->i_sem, 1);
-	spin_lock_init(&inode->i_shared_lock);
-}
 
 static inline void write_inode(struct inode *inode)
 {
@@ -147,7 +161,8 @@
 		spin_lock(&inode_lock);
 	} else {
 		list_del(&inode->i_list);
-		list_add(&inode->i_list, &inode_in_use);
+		list_add(&inode->i_list,
+			 inode->i_count ? &inode_in_use : &inode_unused);
 		/* Set I_LOCK, reset I_DIRTY */
 		inode->i_state ^= I_DIRTY | I_LOCK;
 		spin_unlock(&inode_lock);
@@ -233,6 +248,8 @@
 {
 	if (inode->i_nrpages)
 		BUG();
+	if (!(inode->i_state & I_FREEING))
+		BUG();
 	wait_on_inode(inode);
 	if (IS_QUOTAINIT(inode))
 		DQUOT_DROP(inode);
@@ -243,35 +260,24 @@
 }
 
 /*
- * Dispose-list gets a local list, so it doesn't need to
- * worry about list corruption. It releases the inode lock
- * while clearing the inodes.
+ * Dispose-list gets a local list with local inodes in it, so it doesn't
+ * need to worry about list corruption and SMP locks.
  */
 static void dispose_list(struct list_head * head)
 {
-	struct list_head *next;
-	int count = 0;
+	struct list_head * inode_entry;
+	struct inode * inode;
 
-	spin_unlock(&inode_lock);
-	next = head->next;
-	for (;;) {
-		struct list_head * tmp = next;
-		struct inode * inode;
+	while ((inode_entry = head->next) != head)
+	{
+		list_del(inode_entry);
 
-		next = next->next;
-		if (tmp == head)
-			break;
-		inode = list_entry(tmp, struct inode, i_list);
+		inode = list_entry(inode_entry, struct inode, i_list);
 		if (inode->i_nrpages)
 			truncate_inode_pages(inode, 0);
 		clear_inode(inode);
-		count++;
+		destroy_inode(inode);
 	}
-
-	/* Add them all to the unused list in one fell swoop */
-	spin_lock(&inode_lock);
-	list_splice(head, &inode_unused);
-	inodes_stat.nr_free_inodes += count;
 }
 
 /*
@@ -280,7 +286,7 @@
 static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
 {
 	struct list_head *next;
-	int busy = 0;
+	int busy = 0, count = 0;
 
 	next = head->next;
 	for (;;) {
@@ -299,10 +305,13 @@
 			list_del(&inode->i_list);
 			list_add(&inode->i_list, dispose);
 			inode->i_state |= I_FREEING;
+			count++;
 			continue;
 		}
 		busy = 1;
 	}
+	/* only unused inodes may be cached with i_count zero */
+	inodes_stat.nr_unused -= count;
 	return busy;
 }
 
@@ -320,10 +329,12 @@
 
 	spin_lock(&inode_lock);
 	busy = invalidate_list(&inode_in_use, sb, &throw_away);
+	busy |= invalidate_list(&inode_unused, sb, &throw_away);
 	busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
-	dispose_list(&throw_away);
 	spin_unlock(&inode_lock);
 
+	dispose_list(&throw_away);
+
 	return busy;
 }
 
@@ -339,155 +350,84 @@
  *      dispose_list.
  */
 #define CAN_UNUSE(inode) \
-	(((inode)->i_count | (inode)->i_state | (inode)->i_nrpages) == 0)
+	(((inode)->i_state | (inode)->i_nrpages) == 0)
 #define INODE(entry)	(list_entry(entry, struct inode, i_list))
 
-static int free_inodes(void)
+void prune_icache(int goal)
 {
-	struct list_head list, *entry, *freeable = &list;
-	int found = 0;
+	LIST_HEAD(list);
+	struct list_head *entry, *freeable = &list;
+	int count = 0;
+	struct inode * inode;
 
-	INIT_LIST_HEAD(freeable);
-	entry = inode_in_use.next;
-	while (entry != &inode_in_use) {
+	spin_lock(&inode_lock);
+	/* go simple and safe syncing everything before starting */
+	sync_all_inodes();
+
+	entry = inode_unused.prev;
+	while (entry != &inode_unused)
+	{
 		struct list_head *tmp = entry;
 
-		entry = entry->next;
-		if (!CAN_UNUSE(INODE(tmp)))
+		entry = entry->prev;
+		inode = INODE(tmp);
+		if (!CAN_UNUSE(inode))
 			continue;
+		if (inode->i_count)
+			BUG();
 		list_del(tmp);
-		list_del(&INODE(tmp)->i_hash);
-		INIT_LIST_HEAD(&INODE(tmp)->i_hash);
+		list_del(&inode->i_hash);
+		INIT_LIST_HEAD(&inode->i_hash);
 		list_add(tmp, freeable);
-		list_entry(tmp, struct inode, i_list)->i_state = I_FREEING;
-		found = 1;
+		inode->i_state |= I_FREEING;
+		count++;
+		if (!--goal)
+			break;
 	}
+	inodes_stat.nr_unused -= count;
+	spin_unlock(&inode_lock);
 
-	if (found)
-		dispose_list(freeable);
-
-	return found;
+	dispose_list(freeable);
 }
 
-/*
- * Searches the inodes list for freeable inodes,
- * shrinking the dcache before (and possible after,
- * if we're low)
- */
-static void try_to_free_inodes(int goal)
+int shrink_icache_memory(int priority, int gfp_mask)
 {
-	/*
-	 * First stry to just get rid of unused inodes.
-	 *
-	 * If we can't reach our goal that way, we'll have
-	 * to try to shrink the dcache and sync existing
-	 * inodes..
-	 */
-	free_inodes();
-	goal -= inodes_stat.nr_free_inodes;
-	if (goal > 0) {
-		spin_unlock(&inode_lock);
-		select_dcache(goal, 0);
-		prune_dcache(goal);
-		spin_lock(&inode_lock);
-		sync_all_inodes();
-		free_inodes();
+	if (gfp_mask & __GFP_IO)
+	{
+		int count = 0;
+		
+		if (priority)
+			count = inodes_stat.nr_unused / priority;
+		prune_icache(count);
+		/* FIXME: kmem_cache_shrink here should tell us
+		   the number of pages freed, and it should
+		   work in a __GFP_DMA/__GFP_BIGMEM behaviour
+		   to free only the interesting pages in
+		   function of the needs of the current allocation. */
+		kmem_cache_shrink(inode_cachep);
 	}
-}
 
-/*
- * This is the externally visible routine for
- * inode memory management.
- */
-void free_inode_memory(int goal)
-{
-	spin_lock(&inode_lock);
-	free_inodes();
-	spin_unlock(&inode_lock);
+	return 0;
 }
 
-
-/*
- * This is called with the spinlock held, but releases
- * the lock when freeing or allocating inodes.
- * Look out! This returns with the inode lock held if
- * it got an inode..
- *
- * We do inode allocations two pages at a time to reduce
- * fragmentation.
- */
-#define INODE_PAGE_ORDER	1
-#define INODE_ALLOCATION_SIZE	(PAGE_SIZE << INODE_PAGE_ORDER)
-#define INODES_PER_ALLOCATION	(INODE_ALLOCATION_SIZE/sizeof(struct inode))
-
-static struct inode * grow_inodes(void)
+static inline void __iget(struct inode * inode)
 {
-	struct inode * inode;
-
-	/*
-	 * Check whether to restock the unused list.
-	 */
-	if (inodes_stat.nr_inodes > max_inodes) {
-		struct list_head *tmp;
-		try_to_free_inodes(inodes_stat.nr_inodes >> 2);
-		tmp = inode_unused.next;
-		if (tmp != &inode_unused) {
-			inodes_stat.nr_free_inodes--;
-			list_del(tmp);
-			inode = list_entry(tmp, struct inode, i_list);
-			return inode;
-		}
-	}
-		
-	spin_unlock(&inode_lock);
-	inode = (struct inode *)__get_free_pages(GFP_KERNEL,INODE_PAGE_ORDER);
-	if (inode) {
-		int size;
-		struct inode * tmp;
-
-		size = INODE_ALLOCATION_SIZE - 2*sizeof(struct inode);
-		tmp = inode;
-		spin_lock(&inode_lock);
-		do {
-			tmp++;
-			init_once(tmp);
-			list_add(&tmp->i_list, &inode_unused);
-			size -= sizeof(struct inode);
-		} while (size >= 0);
-		init_once(inode);
-		/*
-		 * Update the inode statistics
-		 */
-		inodes_stat.nr_inodes += INODES_PER_ALLOCATION;
-		inodes_stat.nr_free_inodes += INODES_PER_ALLOCATION - 1;
-		return inode;
-	}
-
-	/*
-	 * If the allocation failed, do an extensive pruning of 
-	 * the dcache and then try again to free some inodes.
-	 */
-	prune_dcache(inodes_stat.nr_inodes >> 2);
-
-	spin_lock(&inode_lock);
-	free_inodes();
+	if (!inode->i_count++)
 	{
-		struct list_head *tmp = inode_unused.next;
-		if (tmp != &inode_unused) {
-			inodes_stat.nr_free_inodes--;
-			list_del(tmp);
-			inode = list_entry(tmp, struct inode, i_list);
-			return inode;
+		if (!(inode->i_state & I_DIRTY))
+		{
+			list_del(&inode->i_list);
+			list_add(&inode->i_list, &inode_in_use);
 		}
+		inodes_stat.nr_unused--;
 	}
-	spin_unlock(&inode_lock);
-
-	printk("grow_inodes: allocation failed\n");
-	return NULL;
 }
 
 /*
  * Called with the inode lock held.
+ * NOTE: we are not increasing the inode-refcount, you must call __iget()
+ * by hand after calling find_inode now! This simplify iunique and won't
+ * add any additional branch in the common code.
  */
 static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head)
 {
@@ -505,7 +445,6 @@
 			continue;
 		if (inode->i_ino != ino)
 			continue;
-		inode->i_count++;
 		break;
 	}
 	return inode;
@@ -518,7 +457,7 @@
  * i_sb, i_ino, i_count, i_state and the lists have
  * been initialized elsewhere..
  */
-void clean_inode(struct inode *inode)
+static void clean_inode(struct inode *inode)
 {
 	memset(&inode->u, 0, sizeof(inode->u));
 	inode->i_sock = 0;
@@ -528,7 +467,6 @@
 	inode->i_size = 0;
 	inode->i_generation = 0;
 	memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
-	sema_init(&inode->i_sem, 1);
 	inode->i_pipe = NULL;
 }
 
@@ -542,15 +480,11 @@
 {
 	static unsigned long last_ino = 0;
 	struct inode * inode;
-	struct list_head * tmp;
 
-	spin_lock(&inode_lock);
-	tmp = inode_unused.next;
-	if (tmp != &inode_unused) {
-		list_del(tmp);
-		inodes_stat.nr_free_inodes--;
-		inode = list_entry(tmp, struct inode, i_list);
-add_new_inode:
+	inode = alloc_inode();
+	if (inode)
+	{
+		spin_lock(&inode_lock);
 		list_add(&inode->i_list, &inode_in_use);
 		inode->i_sb = NULL;
 		inode->i_dev = 0;
@@ -560,22 +494,12 @@
 		inode->i_state = 0;
 		spin_unlock(&inode_lock);
 		clean_inode(inode);
-		return inode;
 	}
-
-	/*
-	 * Warning: if this succeeded, we will now
-	 * return with the inode lock.
-	 */
-	inode = grow_inodes();
-	if (inode)
-		goto add_new_inode;
-
 	return inode;
 }
 
 /*
- * This is called with the inode lock held.. Be careful.
+ * This is called without the inode lock held.. Be careful.
  *
  * We no longer cache the sb_flags in i_flags - see fs.h
  *	-- rmk@arm.uk.linux.org
@@ -583,56 +507,47 @@
 static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head)
 {
 	struct inode * inode;
-	struct list_head * tmp = inode_unused.next;
 
-	if (tmp != &inode_unused) {
-		list_del(tmp);
-		inodes_stat.nr_free_inodes--;
-		inode = list_entry(tmp, struct inode, i_list);
-add_new_inode:
-		list_add(&inode->i_list, &inode_in_use);
-		list_add(&inode->i_hash, head);
-		inode->i_sb = sb;
-		inode->i_dev = sb->s_dev;
-		inode->i_ino = ino;
-		inode->i_flags = 0;
-		inode->i_count = 1;
-		inode->i_state = I_LOCK;
-		spin_unlock(&inode_lock);
+	inode = alloc_inode();
+	if (inode) {
+		struct inode * old;
 
-		clean_inode(inode);
-		sb->s_op->read_inode(inode);
+		spin_lock(&inode_lock);
+		/* We released the lock, so.. */
+		old = find_inode(sb, ino, head);
+		if (!old)
+		{
+			list_add(&inode->i_list, &inode_in_use);
+			list_add(&inode->i_hash, head);
+			inode->i_sb = sb;
+			inode->i_dev = sb->s_dev;
+			inode->i_ino = ino;
+			inode->i_flags = 0;
+			inode->i_count = 1;
+			inode->i_state = I_LOCK;
+			spin_unlock(&inode_lock);
 
-		/*
-		 * This is special!  We do not need the spinlock
-		 * when clearing I_LOCK, because we're guaranteed
-		 * that nobody else tries to do anything about the
-		 * state of the inode when it is locked, as we
-		 * just created it (so there can be no old holders
-		 * that haven't tested I_LOCK).
-		 */
-		inode->i_state &= ~I_LOCK;
-		wake_up(&inode->i_wait);
+			clean_inode(inode);
+			sb->s_op->read_inode(inode);
 
-		return inode;
-	}
+			/*
+			 * This is special!  We do not need the spinlock
+			 * when clearing I_LOCK, because we're guaranteed
+			 * that nobody else tries to do anything about the
+			 * state of the inode when it is locked, as we
+			 * just created it (so there can be no old holders
+			 * that haven't tested I_LOCK).
+			 */
+			inode->i_state &= ~I_LOCK;
+			wake_up(&inode->i_wait);
 
-	/*
-	 * We need to expand. Note that "grow_inodes()" will
-	 * release the spinlock, but will return with the lock 
-	 * held again if the allocation succeeded.
-	 */
-	inode = grow_inodes();
-	if (inode) {
-		/* We released the lock, so.. */
-		struct inode * old = find_inode(sb, ino, head);
-		if (!old)
-			goto add_new_inode;
-		list_add(&inode->i_list, &inode_unused);
-		inodes_stat.nr_free_inodes++;
+			return inode;
+		}
+		__iget(inode);
 		spin_unlock(&inode_lock);
-		wait_on_inode(old);
-		return old;
+		destroy_inode(inode);
+		inode = old;
+		wait_on_inode(inode);
 	}
 	return inode;
 }
@@ -660,7 +575,6 @@
 			spin_unlock(&inode_lock);
 			return res;
 		}
-		inode->i_count--; /* compensate find_inode() */
 	} else {
 		counter = max_reserved + 1;
 	}
@@ -671,10 +585,10 @@
 struct inode *igrab(struct inode *inode)
 {
 	spin_lock(&inode_lock);
-	if (inode->i_state & I_FREEING)
-		inode = NULL;
+	if (!(inode->i_state & I_FREEING))
+		__iget(inode);
 	else
-		inode->i_count++;
+		inode = NULL;
 	spin_unlock(&inode_lock);
 	if (inode)
 		wait_on_inode(inode);
@@ -689,14 +603,16 @@
 	spin_lock(&inode_lock);
 	inode = find_inode(sb, ino, head);
 	if (inode) {
+		__iget(inode);
 		spin_unlock(&inode_lock);
 		wait_on_inode(inode);
 		return inode;
 	}
+	spin_unlock(&inode_lock);
+
 	/*
-	 * get_new_inode() will do the right thing, releasing
-	 * the inode lock and re-trying the search in case it
-	 * had to block at any point.
+	 * get_new_inode() will do the right thing, re-trying the search
+	 * in case it had to block at any point.
 	 */
 	return get_new_inode(sb, ino, head);
 }
@@ -721,6 +637,7 @@
 {
 	if (inode) {
 		struct super_operations *op = NULL;
+		int destroy = 0;
 
 		if (inode->i_sb && inode->i_sb->s_op)
 			op = inode->i_sb->s_op;
@@ -750,13 +667,17 @@
 				inode->i_state|=I_FREEING;
 				spin_unlock(&inode_lock);
 				clear_inode(inode);
+				destroy = 1;
 				spin_lock(&inode_lock);
-				list_add(&inode->i_list, &inode_unused);
-				inodes_stat.nr_free_inodes++;
 			}
-			else if (!(inode->i_state & I_DIRTY)) {
-				list_del(&inode->i_list);
-				list_add(&inode->i_list, &inode_in_use);
+			else
+			{
+				if (!(inode->i_state & I_DIRTY)) {
+					list_del(&inode->i_list);
+					list_add(&inode->i_list,
+						 &inode_unused);
+				}
+				inodes_stat.nr_unused++;
 			}
 #ifdef INODE_PARANOIA
 if (inode->i_flock)
@@ -778,6 +699,8 @@
 				kdevname(inode->i_dev), inode->i_ino);
 		}
 		spin_unlock(&inode_lock);
+		if (destroy)
+			destroy_inode(inode);
 	}
 }
 
@@ -795,14 +718,11 @@
 }
 
 /*
- * Initialize the hash tables and default
- * value for max inodes
+ * Initialize the hash tables.
  */
-#define MAX_INODE (16384)
-
 void __init inode_init(void)
 {
-	int i, max;
+	int i;
 	struct list_head *head = inode_hashtable;
 
 	i = HASH_SIZE;
@@ -812,11 +732,12 @@
 		i--;
 	} while (i);
 
-	/* Initial guess at reasonable inode number */
-	max = num_physpages >> 1;
-	if (max > MAX_INODE)
-		max = MAX_INODE;
-	max_inodes = max;
+	/* inode slab cache */
+	inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
+					 0, SLAB_HWCACHE_ALIGN, init_once,
+					 NULL);
+	if (!inode_cachep)
+		panic("cannot create inode slab cache");
 }
 
 void update_atime (struct inode *inode)

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)