patch-2.3.13 linux/fs/buffer.c

Next file: linux/fs/exec.c
Previous file: linux/fs/binfmt_script.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.12/linux/fs/buffer.c linux/fs/buffer.c
@@ -840,7 +840,7 @@
  * pressures on different devices - thus the (currently unused)
  * 'dev' parameter.
  */
-int too_many_dirty_buffers;
+static int too_many_dirty_buffers;
 
 void balance_dirty(kdev_t dev)
 {
@@ -1036,13 +1036,6 @@
 	}
 }
 
-static void put_unused_buffer_head(struct buffer_head *bh)
-{
-	spin_lock(&unused_list_lock);
-	__put_unused_buffer_head(bh);
-	spin_unlock(&unused_list_lock);
-}
-
 /*
  * Reserve NR_RESERVED buffer heads for async IO requests to avoid
  * no-buffer-head deadlock.  Return NULL on failure; waiting for
@@ -1148,11 +1141,13 @@
  */
 no_grow:
 	if (head) {
+		spin_lock(&unused_list_lock);
 		do {
 			bh = head;
 			head = head->b_this_page;
-			put_unused_buffer_head(bh);
+			__put_unused_buffer_head(bh);
 		} while (head);
+		spin_unlock(&unused_list_lock);
 
 		/* Wake up any waiters ... */
 		wake_up(&buffer_wait);
@@ -1201,8 +1196,8 @@
 		PAGE_BUG(page);
 	/*
 	 * Allocate async buffer heads pointing to this page, just for I/O.
-	 * They show up in the buffer hash table and are registered in
-	 * page->buffers.
+	 * They don't show up in the buffer hash table, but they *are*
+	 * registered in page->buffers.
 	 */
 	head = create_buffers(page_address(page), size, 1);
 	if (page->buffers)
@@ -1528,6 +1523,176 @@
 	return err;
 }
 
+/*
+ * For moronic filesystems that do not allow holes in file.
+ * we allow offset==PAGE_SIZE, bytes==0
+ */
+
+int block_write_cont_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf)
+{
+	struct dentry *dentry = file->f_dentry;
+	struct inode *inode = dentry->d_inode;
+	unsigned long block;
+	int err, partial;
+	unsigned long blocksize, start_block, end_block;
+	unsigned long start_offset, start_bytes, end_bytes;
+	unsigned long bbits, blocks, i, len;
+	struct buffer_head *bh, *head;
+	char * target_buf, *target_data;
+	unsigned long data_offset = offset;
+
+	offset = page->offset-inode->i_size;
+	if (offset < 0) 
+		offset = 0;
+	else if (offset >= data_offset)
+		offset = data_offset;
+	bytes += data_offset-offset;
+
+	target_buf = (char *)page_address(page) + offset;
+	target_data = (char *)page_address(page) + data_offset;
+
+	if (!PageLocked(page))
+		BUG();
+
+	blocksize = inode->i_sb->s_blocksize;
+	if (!page->buffers)
+		create_empty_buffers(page, inode, blocksize);
+	head = page->buffers;
+
+	bbits = inode->i_sb->s_blocksize_bits;
+	block = page->offset >> bbits;
+	blocks = PAGE_SIZE >> bbits;
+	start_block = offset >> bbits;
+	end_block = (offset + bytes - 1) >> bbits;
+	start_offset = offset & (blocksize - 1);
+	start_bytes = blocksize - start_offset;
+	if (start_bytes > bytes)
+		start_bytes = bytes;
+	end_bytes = (offset+bytes) & (blocksize - 1);
+	if (end_bytes > bytes)
+		end_bytes = bytes;
+
+	if (offset < 0 || offset > PAGE_SIZE)
+		BUG();
+	if (bytes+offset < 0 || bytes+offset > PAGE_SIZE)
+		BUG();
+	if (start_block < 0 || start_block > blocks)
+		BUG();
+	if (end_block < 0 || end_block >= blocks)
+		BUG();
+	// FIXME: currently we assume page alignment.
+	if (page->offset & (PAGE_SIZE-1))
+		BUG();
+
+	i = 0;
+	bh = head;
+	partial = 0;
+	do {
+		if (!bh)
+			BUG();
+
+		if ((i < start_block) || (i > end_block)) {
+			if (!buffer_uptodate(bh))
+				partial = 1;
+			goto skip;
+		}
+
+		/*
+		 * If the buffer is not up-to-date, we need to ask the low-level
+		 * FS to do something for us (we used to have assumptions about
+		 * the meaning of b_blocknr etc, that's bad).
+		 *
+		 * If "update" is set, that means that the low-level FS should
+		 * try to make sure that the block is up-to-date because we're
+		 * not going to fill it completely.
+		 */
+		bh->b_end_io = end_buffer_io_sync;
+		if (!buffer_mapped(bh)) {
+			err = inode->i_op->get_block(inode, block, bh, 1);
+			if (err)
+				goto out;
+		}
+
+		if (!buffer_uptodate(bh) && (start_offset || (end_bytes && (i == end_block)))) {
+			if (buffer_new(bh)) {
+				memset(bh->b_data, 0, bh->b_size);
+			} else {
+				ll_rw_block(READ, 1, &bh);
+				wait_on_buffer(bh);
+				err = -EIO;
+				if (!buffer_uptodate(bh))
+					goto out;
+			}
+		}
+
+		len = blocksize;
+		if (start_offset) {
+			len = start_bytes;
+			start_offset = 0;
+		} else if (end_bytes && (i == end_block)) {
+			len = end_bytes;
+			end_bytes = 0;
+		}
+		err = 0;
+		if (target_buf+len<=target_data)
+			memset(target_buf, 0, len);
+		else if (target_buf<target_data) {
+			memset(target_buf, 0, target_data-target_buf);
+			copy_from_user(target_data, buf,
+					len+target_buf-target_data);
+		} else
+			err = copy_from_user(target_buf, buf, len);
+		target_buf += len;
+		buf += len;
+
+		/*
+		 * we dirty buffers only after copying the data into
+		 * the page - this way we can dirty the buffer even if
+		 * the bh is still doing IO.
+		 *
+		 * NOTE! This also does a direct dirty balace check,
+		 * rather than relying on bdflush just waking up every
+		 * once in a while. This is to catch (and slow down)
+		 * the processes that write tons of buffer..
+		 *
+		 * Note how we do NOT want to do this in the full block
+		 * case: full pages are flushed not by the people who
+		 * dirtied them, but by people who need memory. And we
+		 * should not penalize them for somebody else writing
+		 * lots of dirty pages.
+		 */
+		set_bit(BH_Uptodate, &bh->b_state);
+		if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
+			__mark_dirty(bh, 0);
+			if (too_many_dirty_buffers)
+				balance_dirty(bh->b_dev);
+		}
+
+		if (err) {
+			err = -EFAULT;
+			goto out;
+		}
+
+skip:
+		i++;
+		block++;
+		bh = bh->b_this_page;
+	} while (bh != head);
+
+	/*
+	 * is this a partial write that happened to make all buffers
+	 * uptodate then we can optimize away a bogus readpage() for
+	 * the next read(). Here we 'discover' wether the page went
+	 * uptodate as a result of this (potentially partial) write.
+	 */
+	if (!partial)
+		SetPageUptodate(page);
+	return bytes;
+out:
+	ClearPageUptodate(page);
+	return err;
+}
+
 
 /*
  * IO completion routine for a buffer_head being used for kiobuf IO: we
@@ -1858,7 +2023,6 @@
 	blocks = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
 	iblock = page->offset >> inode->i_sb->s_blocksize_bits;
 	page->owner = (void *)-1;
-	head = page->buffers;
 	bh = head;
 	nr = 0;
 
@@ -1917,10 +2081,8 @@
 	if (!(page = __get_free_page(GFP_BUFFER)))
 		return 0;
 	bh = create_buffers(page, size, 0);
-	if (!bh) {
-		free_page(page);
-		return 0;
-	}
+	if (!bh)
+		goto no_buffer_head;
 
 	isize = BUFSIZE_INDEX(size);
 
@@ -1950,6 +2112,10 @@
 	mem_map[MAP_NR(page)].buffers = bh;
 	atomic_add(PAGE_SIZE, &buffermem);
 	return 1;
+
+no_buffer_head:
+	free_page(page);
+	return 0;
 }
 
 /*
@@ -2290,12 +2456,7 @@
 				 */
 				atomic_inc(&bh->b_count);
 				spin_unlock(&lru_list_lock);
-				if (major == LOOP_MAJOR && written > 1) {
-					ll_rw_block(WRITEA, 1, &bh);
-					if (buffer_dirty(bh))
-						--written;
-				} else
-					ll_rw_block(WRITE, 1, &bh);
+				ll_rw_block(WRITE, 1, &bh);
 				atomic_dec(&bh->b_count);
 				goto repeat;
 			}
@@ -2319,3 +2480,12 @@
 		}
 	}
 }
+
+static int __init bdflush_init(void)
+{
+	kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+	return 0;
+}
+
+module_init(bdflush_init)
+

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)