patch-2.3.1 linux/fs/buffer.c

Next file: linux/fs/coda/cnode.c
Previous file: linux/fs/autofs/waitq.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.0/linux/fs/buffer.c linux/fs/buffer.c
@@ -70,7 +70,7 @@
 
 static struct buffer_head * unused_list = NULL;
 static struct buffer_head * reuse_list = NULL;
-static struct wait_queue * buffer_wait = NULL;
+static DECLARE_WAIT_QUEUE_HEAD(buffer_wait);
 
 static int nr_buffers = 0;
 static int nr_buffers_type[NR_LIST] = {0,};
@@ -100,8 +100,7 @@
 				each time we call refill */
 		int nref_dirt; /* Dirty buffer threshold for activating bdflush
 				  when trying to refill buffers. */
-		int interval;  /* Interval (seconds) between spontaneous
-				  bdflush runs */
+		int dummy1;    /* unused */
 		int age_buffer;  /* Time for normal buffer to age before 
 				    we flush it */
 		int age_super;  /* Time for superblock to age before we 
@@ -110,10 +109,10 @@
 		int dummy3;    /* unused */
 	} b_un;
 	unsigned int data[N_PARAM];
-} bdf_prm = {{40, 500, 64, 256, 5, 30*HZ, 5*HZ, 1884, 2}};
+} bdf_prm = {{40, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
 
 /* These are the min and max parameter values that we will allow to be assigned */
-int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  1,   1*HZ,   1*HZ, 1, 1};
+int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   1*HZ,   1*HZ, 1, 1};
 int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 600*HZ, 600*HZ, 2047, 5};
 
 void wakeup_bdflush(int);
@@ -130,10 +129,9 @@
 void __wait_on_buffer(struct buffer_head * bh)
 {
 	struct task_struct *tsk = current;
-	struct wait_queue wait;
+	DECLARE_WAITQUEUE(wait, tsk);
 
 	bh->b_count++;
-	wait.task = tsk;
 	add_wait_queue(&bh->b_wait, &wait);
 repeat:
 	tsk->state = TASK_UNINTERRUPTIBLE;
@@ -929,6 +927,7 @@
 	}
 
 	memset(bh,0,sizeof(*bh));
+	init_waitqueue_head(&bh->b_wait);
 	nr_unused_buffer_heads++;
 	bh->b_next_free = unused_list;
 	unused_list = bh;
@@ -986,6 +985,7 @@
 	 */
 	if((bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER)) != NULL) {
 		memset(bh, 0, sizeof(*bh));
+		init_waitqueue_head(&bh->b_wait);
 		nr_buffer_heads++;
 		return bh;
 	}
@@ -1010,6 +1010,7 @@
 	if(!async &&
 	   (bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL)) != NULL) {
 		memset(bh, 0, sizeof(*bh));
+		init_waitqueue_head(&bh->b_wait);
 		nr_buffer_heads++;
 		return bh;
 	}
@@ -1030,7 +1031,7 @@
 static struct buffer_head * create_buffers(unsigned long page, 
 						unsigned long size, int async)
 {
-	struct wait_queue wait = { current, NULL };
+	DECLARE_WAITQUEUE(wait, current);
 	struct buffer_head *bh, *head;
 	long offset;
 
@@ -1554,14 +1555,15 @@
  * response to dirty buffers.  Once this process is activated, we write back
  * a limited number of buffers to the disks and then go back to sleep again.
  */
-static struct wait_queue * bdflush_done = NULL;
+static DECLARE_WAIT_QUEUE_HEAD(bdflush_wait);
+static DECLARE_WAIT_QUEUE_HEAD(bdflush_done);
 struct task_struct *bdflush_tsk = 0;
 
 void wakeup_bdflush(int wait)
 {
 	if (current == bdflush_tsk)
 		return;
-	wake_up_process(bdflush_tsk);
+	wake_up(&bdflush_wait);
 	if (wait) {
 		run_task_queue(&tq_disk);
 		sleep_on(&bdflush_done);
@@ -1570,107 +1572,82 @@
 
 
 /* 
- * Here we attempt to write back old buffers.
- * To prevent deadlocks for a loop device:
- * 1) Do non-blocking writes to loop (avoids deadlock with running
- *	out of request blocks).
- * 2) But do a blocking write if the only dirty buffers are loop buffers
- *	(otherwise we go into an infinite busy-loop).
- * 3) Quit writing loop blocks if a freelist went low (avoids deadlock
- *	with running out of free buffers for loop's "real" device).
-*/
+ * Here we attempt to write back old buffers.  We also try to flush inodes 
+ * and supers as well, since this function is essentially "update", and 
+ * otherwise there would be no way of ensuring that these quantities ever 
+ * get written back.  Ideally, we would have a timestamp on the inodes
+ * and superblocks so that we could write back only the old ones as well
+ */
 
-static inline void sync_old_buffers(void)
+static int sync_old_buffers(void)
 {
 	int i;
-	int ndirty = 0;
-	int wrta_cmd = WRITEA;
-#ifdef DEBUG
-	int ncount = 0, nwritten = 0;
-#endif
+	int ndirty, nwritten;
+	int nlist;
+	int ncount;
 	struct buffer_head * bh, *next;
 
-#ifdef DEBUG
-	bh = lru_list[BUF_CLEAN];
-	if(bh)
-		for(i = nr_buffers_type[BUF_CLEAN]; --i > 0; bh = next) {
-			next = bh->b_next_free;
+	sync_supers(0);
+	sync_inodes(0);
 
-			/* Dirty/locked buffer on clean list?  Refile it */
-			if (buffer_locked(bh) || buffer_dirty(bh)) {
-				ncount++;
-				refile_buffer(bh);
-			}
-		}
+	ncount = 0;
+#ifdef DEBUG
+	for(nlist = 0; nlist < NR_LIST; nlist++)
+#else
+	for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++)
 #endif
+	{
+		ndirty = 0;
+		nwritten = 0;
+	repeat:
 
-	bh = lru_list[BUF_LOCKED];
-	if(bh)
-		for(i = nr_buffers_type[BUF_LOCKED]; --i > 0; bh = next) {
-			next = bh->b_next_free;
-
-			/* Unlocked buffer on locked list?  Refile it */
-			if (!buffer_locked(bh))
-				refile_buffer(bh);
-		}
-
- restart:
-	bh = lru_list[BUF_DIRTY];
-	if(bh) 
-		for (i = nr_buffers_type[BUF_DIRTY];
-		     i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
-		     bh = next) {
-			/* We may have stalled while waiting for
-			   I/O to complete. */
-			if(bh->b_list != BUF_DIRTY)
-				goto restart;
-			next = bh->b_next_free;
-			if(!lru_list[BUF_DIRTY]) {
-				printk("Dirty list empty %d\n", i);
-				break;
-			}
-					  
-			/* Clean buffer on dirty list?  Refile it */
-			if (!buffer_dirty(bh)) {
-				refile_buffer(bh);
-				continue;
-			}
-					  
-			if (buffer_locked(bh))
-				continue;
-			/* Should we write back buffers that are
-			   shared or not??  Currently dirty buffers
-			   are not shared, so it does not matter */
-			next->b_count++;
-			bh->b_count++;
-			ndirty++;
-			bh->b_flushtime = 0;
-			if (MAJOR(bh->b_dev) == LOOP_MAJOR) {
-				ll_rw_block(wrta_cmd,1, &bh);
-				wrta_cmd = WRITEA;
-				if (buffer_dirty(bh))
-					--ndirty;
-			}
-			else
-				ll_rw_block(WRITE, 1, &bh);
-			bh->b_count--;
-			next->b_count--;
-		}
-	/* If we didn't write anything, but there are still
-	 * dirty buffers, then make the next write to a
-	 * loop device to be a blocking write.
-	 * This lets us block--which we _must_ do! */
-	if (ndirty == 0
-	    && nr_buffers_type[BUF_DIRTY] > 0 && wrta_cmd != WRITE) {
-		wrta_cmd = WRITE;
-		goto restart;
+		bh = lru_list[nlist];
+		if(bh) 
+			 for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
+				 /* We may have stalled while waiting for I/O to complete. */
+				 if(bh->b_list != nlist) goto repeat;
+				 next = bh->b_next_free;
+				 if(!lru_list[nlist]) {
+					 printk("Dirty list empty %d\n", i);
+					 break;
+				 }
+				 
+				 /* Clean buffer on dirty list?  Refile it */
+				 if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh)) {
+					 refile_buffer(bh);
+					 continue;
+				 }
+				  
+				  /* Unlocked buffer on locked list?  Refile it */
+				  if (nlist == BUF_LOCKED && !buffer_locked(bh)) {
+					  refile_buffer(bh);
+					  continue;
+				  }
+				 
+				 if (buffer_locked(bh) || !buffer_dirty(bh))
+					  continue;
+				 ndirty++;
+				 if(time_before(jiffies, bh->b_flushtime))
+					continue;
+				 nwritten++;
+				 next->b_count++;
+				 bh->b_count++;
+				 bh->b_flushtime = 0;
+#ifdef DEBUG
+				 if(nlist != BUF_DIRTY) ncount++;
+#endif
+				 ll_rw_block(WRITE, 1, &bh);
+				 bh->b_count--;
+				 next->b_count--;
+			 }
 	}
-
+	run_task_queue(&tq_disk);
 #ifdef DEBUG
 	if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
-	printk("wrote %d/%d buffers...", nwritten, ndirty);
+	printk("Wrote %d/%d buffers\n", nwritten, ndirty);
 #endif
 	run_task_queue(&tq_disk);
+	return 0;
 }
 
 
@@ -1687,12 +1664,10 @@
 	if (!capable(CAP_SYS_ADMIN))
 		goto out;
 
-	if (func == 1)
-		/* Func 1 used to call sync_old_buffers; a user space
-		   daemon would call it periodically.  This is no
-		   longer necessary.  Returning -EPERM here makes the
-		   daemon silently exit.  */
-		goto out;
+	if (func == 1) {
+		 error = sync_old_buffers();
+		 goto out;
+	}
 
 	/* Basically func 1 means read param 1, 2 means write param 1, etc */
 	if (func >= 2) {
@@ -1721,17 +1696,27 @@
 	return error;
 }
 
-/* This is the actual bdflush daemon itself. It used to be started
- * from the syscall above, but now we launch it ourselves internally
- * with kernel_thread(...)  directly after the first thread in
- * init/main.c.  Every so often, or when woken up by another task that
- * needs memory, we call sync_old_buffers to partially clear the dirty list.
- */
+/* This is the actual bdflush daemon itself. It used to be started from
+ * the syscall above, but now we launch it ourselves internally with
+ * kernel_thread(...)  directly after the first thread in init/main.c */
 
+/* To prevent deadlocks for a loop device:
+ * 1) Do non-blocking writes to loop (avoids deadlock with running
+ *	out of request blocks).
+ * 2) But do a blocking write if the only dirty buffers are loop buffers
+ *	(otherwise we go into an infinite busy-loop).
+ * 3) Quit writing loop blocks if a freelist went low (avoids deadlock
+ *	with running out of free buffers for loop's "real" device).
+*/
 int bdflush(void * unused) 
 {
-	long remaining = HZ * bdf_prm.b_un.interval;
-	struct task_struct *tsk = current;
+	int i;
+	int ndirty;
+	int nlist;
+	int ncount;
+	struct buffer_head * bh, *next;
+	int major;
+	int wrta_cmd = WRITEA;	/* non-blocking write for LOOP */
 
 	/*
 	 *	We have a bare-bones task_struct, and really should fill
@@ -1739,12 +1724,10 @@
 	 *	display semi-sane things. Not real crucial though...  
 	 */
 
-	tsk->session = 1;
-	tsk->pgrp = 1;
-	tsk->dumpable = 0;  /* inhibit ptrace() */
-	strcpy(tsk->comm, "kflushd");
-	sigfillset(&tsk->blocked);
-	bdflush_tsk = tsk;
+	current->session = 1;
+	current->pgrp = 1;
+	sprintf(current->comm, "kflushd");
+	bdflush_tsk = current;
 
 	/*
 	 *	As a kernel thread we want to tamper with system buffers
@@ -1754,36 +1737,93 @@
 	lock_kernel();
 		 
 	for (;;) {
-		tsk->state = TASK_INTERRUPTIBLE;
-		remaining = schedule_timeout(remaining);
-
 #ifdef DEBUG
 		printk("bdflush() activated...");
 #endif
-		CHECK_EMERGENCY_SYNC
 
-		if (remaining == 0) {
-			/*
-			 * Also try to flush inodes and supers, since
-			 * otherwise there would be no way of ensuring
-			 * that these quantities ever get written
-			 * back.  Ideally, we would have a timestamp
-			 * on the inodes and superblocks so that we
-			 * could write back only the old ones.
-			 */
-			sync_supers(0);
-			sync_inodes(0);
-			remaining = HZ * bdf_prm.b_un.interval;
-		}
-			
-		/* Keep flushing till there aren't very many dirty buffers */
-		do {
-			sync_old_buffers();
-		} while(nr_buffers_type[BUF_DIRTY] > nr_buffers * bdf_prm.b_un.nfract/100);
+		CHECK_EMERGENCY_SYNC
 
-		wake_up(&bdflush_done);
+		ncount = 0;
 #ifdef DEBUG
+		for(nlist = 0; nlist < NR_LIST; nlist++)
+#else
+		for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++)
+#endif
+		 {
+			 ndirty = 0;
+		 repeat:
+
+			 bh = lru_list[nlist];
+			 if(bh) 
+				  for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
+				       bh = next) {
+					  /* We may have stalled while waiting for I/O to complete. */
+					  if(bh->b_list != nlist) goto repeat;
+					  next = bh->b_next_free;
+					  if(!lru_list[nlist]) {
+						  printk("Dirty list empty %d\n", i);
+						  break;
+					  }
+					  
+					  /* Clean buffer on dirty list?  Refile it */
+					  if (nlist == BUF_DIRTY && !buffer_dirty(bh)) {
+						  refile_buffer(bh);
+						  continue;
+					  }
+					  
+					  /* Unlocked buffer on locked list?  Refile it */
+					  if (nlist == BUF_LOCKED && !buffer_locked(bh)) {
+						  refile_buffer(bh);
+						  continue;
+					  }
+					  
+					  if (buffer_locked(bh) || !buffer_dirty(bh))
+						   continue;
+					  major = MAJOR(bh->b_dev);
+					  /* Should we write back buffers that are shared or not??
+					     currently dirty buffers are not shared, so it does not matter */
+					  next->b_count++;
+					  bh->b_count++;
+					  ndirty++;
+					  bh->b_flushtime = 0;
+					  if (major == LOOP_MAJOR) {
+						  ll_rw_block(wrta_cmd,1, &bh);
+						  wrta_cmd = WRITEA;
+						  if (buffer_dirty(bh))
+							  --ndirty;
+					  }
+					  else
+					  ll_rw_block(WRITE, 1, &bh);
+#ifdef DEBUG
+					  if(nlist != BUF_DIRTY) ncount++;
+#endif
+					  bh->b_count--;
+					  next->b_count--;
+				  }
+		 }
+#ifdef DEBUG
+		if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
 		printk("sleeping again.\n");
 #endif
+		/* If we didn't write anything, but there are still
+		 * dirty buffers, then make the next write to a
+		 * loop device to be a blocking write.
+		 * This lets us block--which we _must_ do! */
+		if (ndirty == 0 && nr_buffers_type[BUF_DIRTY] > 0 && wrta_cmd != WRITE) {
+			wrta_cmd = WRITE;
+			continue;
+		}
+		run_task_queue(&tq_disk);
+		wake_up(&bdflush_done);
+		
+		/* If there are still a lot of dirty buffers around, skip the sleep
+		   and flush some more */
+		if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) {
+			spin_lock_irq(&current->sigmask_lock);
+			flush_signals(current);
+			spin_unlock_irq(&current->sigmask_lock);
+
+			interruptible_sleep_on(&bdflush_wait);
+		}
 	}
 }

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)