patch-2.4.11-dontuse linux/mm/filemap.c

Next file: linux/mm/memory.c
Previous file: linux/lib/dec_and_lock.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.10/linux/mm/filemap.c linux/mm/filemap.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/iobuf.h>
+#include <linux/compiler.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -56,6 +57,7 @@
 #define CLUSTER_PAGES		(1 << page_cluster)
 #define CLUSTER_OFFSET(x)	(((x) >> page_cluster) << page_cluster)
 
+static void FASTCALL(add_page_to_hash_queue(struct page * page, struct page **p));
 static void add_page_to_hash_queue(struct page * page, struct page **p)
 {
 	struct page *next = *p;
@@ -170,11 +172,7 @@
 		page = list_entry(curr, struct page, list);
 		curr = curr->next;
 
-		/* We cannot invalidate something in use.. */
-		if (page_count(page) != 1)
-			continue;
-
-		/* ..or dirty.. */
+		/* We cannot invalidate something in dirty.. */
 		if (PageDirty(page))
 			continue;
 
@@ -182,10 +180,20 @@
 		if (TryLockPage(page))
 			continue;
 
+		if (page->buffers && !try_to_free_buffers(page, 0))
+			goto unlock;
+
+		if (page_count(page) != 1)
+			goto unlock;
+
 		__lru_cache_del(page);
 		__remove_inode_page(page);
 		UnlockPage(page);
 		page_cache_release(page);
+		continue;
+unlock:
+		UnlockPage(page);
+		continue;
 	}
 
 	spin_unlock(&pagemap_lru_lock);
@@ -616,9 +624,6 @@
 {
 	unsigned long flags;
 
-	if (PageLocked(page))
-		BUG();
-
 	flags = page->flags & ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_dirty | 1 << PG_referenced | 1 << PG_arch_1 | 1 << PG_checked);
 	page->flags = flags | (1 << PG_locked);
 	page_cache_get(page);
@@ -635,7 +640,7 @@
 	spin_unlock(&pagecache_lock);
 }
 
-static int add_to_page_cache_unique(struct page * page,
+int add_to_page_cache_unique(struct page * page,
 	struct address_space *mapping, unsigned long offset,
 	struct page **hash)
 {
@@ -792,11 +797,13 @@
 }
 
 /*
- * Same as the above, but lock the page too, verifying that
- * it's still valid once we own it.
- */
-struct page * __find_lock_page (struct address_space *mapping,
-				unsigned long offset, struct page **hash)
+ * Must be called with the pagecache lock held,
+ * will return with it held (but it may be dropped
+ * during blocking operations..
+ */
+static struct page * FASTCALL(__find_lock_page_helper(struct address_space *, unsigned long, struct page *));
+static struct page * __find_lock_page_helper(struct address_space *mapping,
+					unsigned long offset, struct page *hash)
 {
 	struct page *page;
 
@@ -805,27 +812,79 @@
 	 * the hash-list needs a held write-lock.
 	 */
 repeat:
-	spin_lock(&pagecache_lock);
-	page = __find_page_nolock(mapping, offset, *hash);
+	page = __find_page_nolock(mapping, offset, hash);
 	if (page) {
 		page_cache_get(page);
-		spin_unlock(&pagecache_lock);
+		if (TryLockPage(page)) {
+			spin_unlock(&pagecache_lock);
+			lock_page(page);
+			spin_lock(&pagecache_lock);
 
-		lock_page(page);
+			/* Has the page been re-allocated while we slept? */
+			if (page->mapping != mapping || page->index != offset) {
+				UnlockPage(page);
+				page_cache_release(page);
+				goto repeat;
+			}
+		}
+	}
+	return page;
+}
+
+/*
+ * Same as the above, but lock the page too, verifying that
+ * it's still valid once we own it.
+ */
+struct page * __find_lock_page (struct address_space *mapping,
+				unsigned long offset, struct page **hash)
+{
+	struct page *page;
 
-		/* Is the page still hashed? Ok, good.. */
-		if (page->mapping == mapping && page->index == offset)
-			return page;
+	spin_lock(&pagecache_lock);
+	page = __find_lock_page_helper(mapping, offset, *hash);
+	spin_unlock(&pagecache_lock);
+	return page;
+}
 
-		/* Nope: we raced. Release and try again.. */
-		UnlockPage(page);
-		page_cache_release(page);
-		goto repeat;
-	}
+/*
+ * Same as above, but create the page if required..
+ */
+struct page * find_or_create_page(struct address_space *mapping, unsigned long index, unsigned int gfp_mask)
+{
+	struct page *page;
+	struct page **hash = page_hash(mapping, index);
+
+	spin_lock(&pagecache_lock);
+	page = __find_lock_page_helper(mapping, index, *hash);
 	spin_unlock(&pagecache_lock);
-	return NULL;
+	if (!page) {
+		struct page *newpage = alloc_page(gfp_mask);
+		page = ERR_PTR(-ENOMEM);
+		if (newpage) {
+			spin_lock(&pagecache_lock);
+			page = __find_lock_page_helper(mapping, index, *hash);
+			if (likely(!page)) {
+				page = newpage;
+				__add_to_page_cache(page, mapping, index, hash);
+				newpage = NULL;
+			}
+			spin_unlock(&pagecache_lock);
+			if (unlikely(newpage != NULL))
+				page_cache_release(newpage);
+		}
+	}
+	return page;	
 }
 
+/*
+ * Returns locked page at given index in given cache, creating it if needed.
+ */
+struct page *grab_cache_page(struct address_space *mapping, unsigned long index)
+{
+	return find_or_create_page(mapping, index, mapping->gfp_mask);
+}
+
+
 #if 0
 #define PROFILE_READAHEAD
 #define DEBUG_READAHEAD
@@ -956,30 +1015,6 @@
 	return max_readahead[MAJOR(inode->i_dev)][MINOR(inode->i_dev)];
 }
 
-static inline unsigned long calc_end_index(struct inode * inode)
-{
-	unsigned long end_index;
-
-	if (!S_ISBLK(inode->i_mode))
-		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
-	else
-		end_index = buffered_blk_size(inode->i_rdev) >> (PAGE_CACHE_SHIFT - BLOCK_SIZE_BITS);
-
-	return end_index;
-}
-
-static inline loff_t calc_rsize(struct inode * inode)
-{
-	loff_t rsize;
-
-	if (!S_ISBLK(inode->i_mode))
-		rsize = inode->i_size;
-	else
-		rsize = (loff_t) buffered_blk_size(inode->i_rdev) << BLOCK_SIZE_BITS;
-
-	return rsize;
-}
-
 static void generic_file_readahead(int reada_ok,
 	struct file * filp, struct inode * inode,
 	struct page * page)
@@ -990,7 +1025,7 @@
 	unsigned long raend;
 	int max_readahead = get_max_readahead(inode);
 
-	end_index = calc_end_index(inode);
+	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
 
 	raend = filp->f_raend;
 	max_ahead = 0;
@@ -1114,8 +1149,8 @@
  */
 void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
-	struct address_space *mapping = inode->i_mapping;
+	struct address_space *mapping = filp->f_dentry->d_inode->i_mapping;
+	struct inode *inode = mapping->host;
 	unsigned long index, offset;
 	struct page *cached_page;
 	int reada_ok;
@@ -1169,13 +1204,13 @@
 		struct page *page, **hash;
 		unsigned long end_index, nr, ret;
 
-		end_index = calc_end_index(inode);
+		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
 			
 		if (index > end_index)
 			break;
 		nr = PAGE_CACHE_SIZE;
 		if (index == end_index) {
-			nr = calc_rsize(inode) & ~PAGE_CACHE_MASK;
+			nr = inode->i_size & ~PAGE_CACHE_MASK;
 			if (nr <= offset)
 				break;
 		}
@@ -1207,6 +1242,13 @@
 			flush_dcache_page(page);
 
 		/*
+		 * Mark the page accessed if we read the
+		 * beginning or we just did an lseek.
+		 */
+		if (!offset || !filp->f_reada)
+			mark_page_accessed(page);
+
+		/*
 		 * Ok, we have the page, and it's up-to-date, so
 		 * now we can copy it to user space...
 		 *
@@ -1221,7 +1263,6 @@
 		index += offset >> PAGE_CACHE_SHIFT;
 		offset &= ~PAGE_CACHE_MASK;
 
-		mark_page_accessed(page);
 		page_cache_release(page);
 		if (ret == nr && desc->count)
 			continue;
@@ -1316,92 +1357,6 @@
 	UPDATE_ATIME(inode);
 }
 
-static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset)
-{
-	ssize_t retval;
-	int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress;
-	struct kiobuf * iobuf;
-	struct inode * inode = filp->f_dentry->d_inode;
-	struct address_space * mapping = inode->i_mapping;
-
-	new_iobuf = 0;
-	iobuf = filp->f_iobuf;
-	if (test_and_set_bit(0, &filp->f_iobuf_lock)) {
-		/*
-		 * A parallel read/write is using the preallocated iobuf
-		 * so just run slow and allocate a new one.
-		 */
-		retval = alloc_kiovec(1, &iobuf);
-		if (retval)
-			goto out;
-		new_iobuf = 1;
-	}
-
-	if (!S_ISBLK(inode->i_mode)) {
-		blocksize = inode->i_sb->s_blocksize;
-		blocksize_bits = inode->i_sb->s_blocksize_bits;
-	} else {
-		blocksize = BUFFERED_BLOCKSIZE;
-		blocksize_bits = BUFFERED_BLOCKSIZE_BITS;
-	}
-	blocksize_mask = blocksize - 1;
-	chunk_size = KIO_MAX_ATOMIC_IO << 10;
-
-	retval = -EINVAL;
-	if ((offset & blocksize_mask) || (count & blocksize_mask))
-		goto out_free;
-	if (!mapping->a_ops->direct_IO)
-		goto out_free;
-
-	/*
-	 * Flush to disk exlusively the _data_, metadata must remains
-	 * completly asynchronous or performance will go to /dev/null.
-	 */
-	filemap_fdatasync(mapping);
-	retval = fsync_inode_data_buffers(inode);
-	filemap_fdatawait(mapping);
-	if (retval < 0)
-		goto out_free;
-
-	progress = retval = 0;
-	while (count > 0) {
-		iosize = count;
-		if (iosize > chunk_size)
-			iosize = chunk_size;
-
-		retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
-		if (retval)
-			break;
-
-		retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize);
-
-		if (rw == READ && retval > 0)
-			mark_dirty_kiobuf(iobuf, retval);
-		
-		if (retval >= 0) {
-			count -= retval;
-			buf += retval;
-			progress += retval;
-		}
-
-		unmap_kiobuf(iobuf);
-
-		if (retval != iosize)
-			break;
-	}
-
-	if (progress)
-		retval = progress;
-
- out_free:
-	if (!new_iobuf)
-		clear_bit(0, &filp->f_iobuf_lock);
-	else
-		free_kiovec(1, &iobuf);
- out:	
-	return retval;
-}
-
 int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
 {
 	char *kaddr;
@@ -1435,9 +1390,6 @@
 	if ((ssize_t) count < 0)
 		return -EINVAL;
 
-	if (filp->f_flags & O_DIRECT)
-		goto o_direct;
-
 	retval = -EFAULT;
 	if (access_ok(VERIFY_WRITE, buf, count)) {
 		retval = 0;
@@ -1456,28 +1408,7 @@
 				retval = desc.error;
 		}
 	}
- out:
 	return retval;
-
- o_direct:
-	{
-		loff_t pos = *ppos, size;
-		struct inode * inode = filp->f_dentry->d_inode;
-
-		retval = 0;
-		if (!count)
-			goto out; /* skip atime */
-		size = calc_rsize(inode);
-		if (pos < size) {
-			if (pos + count > size)
-				count = size - pos;
-			retval = generic_file_direct_IO(READ, filp, buf, count, pos);
-			if (retval > 0)
-				*ppos = pos + retval;
-		}
-		UPDATE_ATIME(filp->f_dentry->d_inode);
-		goto out;
-	}
 }
 
 static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size)
@@ -1662,7 +1593,6 @@
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page, **hash, *old_page;
 	unsigned long size, pgoff;
-	loff_t rsize;
 
 	pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
 
@@ -1671,8 +1601,7 @@
 	 * An external ptracer can access pages that normally aren't
 	 * accessible..
 	 */
-	rsize = calc_rsize(inode);
-	size = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	if ((pgoff >= size) && (area->vm_mm == current->mm))
 		return NULL;
 
@@ -2171,14 +2100,13 @@
 	long error = -EBADF;
 	struct file * file;
 	unsigned long size, rlim_rss;
-	loff_t rsize;
 
 	/* Doesn't work if there's no mapped file. */
 	if (!vma->vm_file)
 		return error;
 	file = vma->vm_file;
-	rsize = calc_rsize(file->f_dentry->d_inode);
-	size = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	size = (file->f_dentry->d_inode->i_size + PAGE_CACHE_SIZE - 1) >>
+							PAGE_CACHE_SHIFT;
 
 	start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
 	if (end > vma->vm_end)
@@ -2616,19 +2544,6 @@
 	return page;
 }
 
-/*
- * Returns locked page at given index in given cache, creating it if needed.
- */
-
-struct page *grab_cache_page(struct address_space *mapping, unsigned long index)
-{
-	struct page *cached_page = NULL;
-	struct page *page = __grab_cache_page(mapping,index,&cached_page);
-	if (cached_page)
-		page_cache_release(cached_page);
-	return page;
-}
-
 inline void remove_suid(struct inode *inode)
 {
 	unsigned int mode;
@@ -2662,8 +2577,8 @@
 ssize_t
 generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
 {
-	struct inode	*inode = file->f_dentry->d_inode; 
-	struct address_space *mapping = inode->i_mapping;
+	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
+	struct inode	*inode = mapping->host;
 	unsigned long	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
 	loff_t		pos;
 	struct page	*page, *cached_page;
@@ -2695,8 +2610,7 @@
 
 	written = 0;
 
-	/* FIXME: this is for backwards compatibility with 2.4 */
-	if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND)
+	if (file->f_flags & O_APPEND)
 		pos = inode->i_size;
 
 	/*
@@ -2757,17 +2671,15 @@
 			err = -EPERM;
 			goto out;
 		}
-		if (pos >= calc_rsize(inode)) {
-			if (count || pos > calc_rsize(inode)) {
-				/* FIXME: this is for backwards compatibility with 2.4 */
+		if (pos >= inode->i_size) {
+			if (count || pos > inode->i_size) {
 				err = -ENOSPC;
 				goto out;
 			}
-			/* zero-length writes at blkdev end are OK */
 		}
 
-		if (pos + count > calc_rsize(inode))
-			count = calc_rsize(inode) - pos;
+		if (pos + count > inode->i_size)
+			count = inode->i_size - pos;
 	}
 
 	err = 0;
@@ -2778,9 +2690,6 @@
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 	mark_inode_dirty_sync(inode);
 
-	if (file->f_flags & O_DIRECT)
-		goto o_direct;
-
 	do {
 		unsigned long index, offset;
 		long page_fault;
@@ -2855,7 +2764,6 @@
 	if ((status >= 0) && (file->f_flags & O_SYNC))
 		status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA);
 	
-out_status:	
 	err = written ? written : status;
 out:
 
@@ -2864,25 +2772,6 @@
 fail_write:
 	status = -EFAULT;
 	goto unlock;
-
-o_direct:
-	written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos);
-	if (written > 0) {
-		loff_t end = pos + written;
-		if (end > inode->i_size && !S_ISBLK(inode->i_mode)) {
-			inode->i_size = end;
-			mark_inode_dirty(inode);
-		}
-		*ppos = end;
-		invalidate_inode_pages2(mapping);
-	}
-	/*
-	 * Sync the fs metadata but not the minor inode changes and
-	 * of course not the data as we did direct DMA for the IO.
-	 */
-	if (written >= 0 && file->f_flags & O_SYNC)
-		status = generic_osync_inode(inode, OSYNC_METADATA);
-	goto out_status;
 }
 
 void __init page_cache_init(unsigned long mempages)

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)