patch-2.3.7 linux/fs/nfs/dir.c

Next file: linux/fs/nfs/file.c
Previous file: linux/fs/ncpfs/dir.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.6/linux/fs/nfs/dir.c linux/fs/nfs/dir.c
@@ -78,13 +78,13 @@
 	nfs_rename,		/* rename */
 	NULL,			/* readlink */
 	NULL,			/* follow_link */
+	NULL,			/* bmap */
 	NULL,			/* readpage */
 	NULL,			/* writepage */
-	NULL,			/* bmap */
+	NULL,			/* flushpage */
 	NULL,			/* truncate */
 	NULL,			/* permission */
 	NULL,			/* smap */
-	NULL,			/* updatepage */
 	nfs_revalidate,		/* revalidate */
 };
 
@@ -118,6 +118,61 @@
 };
 static kmem_cache_t *nfs_cookie_cachep;
 
+/* This whole scheme relies on the fact that dirent cookies
+ * are monotonically increasing.
+ *
+ * Another invariant is that once we have a valid non-zero
+ * EOF marker cached, we also have the complete set of cookie
+ * table entries.
+ *
+ * We return the page offset assosciated with the page where
+ * cookie must be if it exists at all, however if we can not
+ * figure that out conclusively, we return < 0.
+ */
+static long __nfs_readdir_offset(struct inode *inode, __u32 cookie)
+{
+	struct nfs_cookie_table *p;
+	unsigned long ret = 0;
+
+	for(p = NFS_COOKIES(inode); p != NULL; p = p->next) {
+		int i;
+
+		for (i = 0; i < COOKIES_PER_CHUNK; i++) {
+			__u32 this_cookie = p->cookies[i];
+
+			/* End of known cookies, EOF is our only hope. */
+			if (!this_cookie)
+				goto check_eof;
+
+			/* Next cookie is larger, must be in previous page. */
+			if (this_cookie > cookie)
+				return ret;
+
+			ret += 1;
+
+			/* Exact cookie match, it must be in this page :-) */
+			if (this_cookie == cookie)
+				return ret;
+		}
+	}
+check_eof:
+	if (NFS_DIREOF(inode) != 0)
+		return ret;
+
+	return -1L;
+}
+
+static __inline__ long nfs_readdir_offset(struct inode *inode, __u32 cookie)
+{
+	/* Cookie zero is always at page offset zero.   Optimize the
+	 * other common case since most directories fit entirely
+	 * in one page.
+	 */
+	if (!cookie || (!NFS_COOKIES(inode) && NFS_DIREOF(inode)))
+		return 0;
+	return __nfs_readdir_offset(inode, cookie);
+}
+
 /* Since a cookie of zero is declared special by the NFS
  * protocol, we easily can tell if a cookie in an existing
  * table chunk is valid or not.
@@ -148,38 +203,7 @@
 	return ret;
 }
 
-/* Now we cache directories properly, by stuffing the dirent
- * data directly in the page cache.
- *
- * Inode invalidation due to refresh etc. takes care of
- * _everything_, no sloppy entry flushing logic, no extraneous
- * copying, network direct to page cache, the way it was meant
- * to be.
- *
- * NOTE: Dirent information verification is done always by the
- *	 page-in of the RPC reply, nowhere else, this simplies
- *	 things substantially.
- */
 #define NFS_NAMELEN_ALIGN(__len) ((((__len)+3)>>2)<<2)
-static u32 find_midpoint(__u32 *p, u32 doff)
-{
-	u32 walk = doff & PAGE_MASK;
-
-	while(*p++ != 0) {
-		__u32 skip;
-
-		p++; /* skip fileid */
-
-		/* Skip len, name, and cookie. */
-		skip = NFS_NAMELEN_ALIGN(*p++);
-		p += (skip >> 2) + 1;
-		walk += skip + (4 * sizeof(__u32));
-		if (walk >= doff)
-			break;
-	}
-	return walk;
-}
-
 static int create_cookie(__u32 cookie, unsigned long off, struct inode *inode)
 {
 	struct nfs_cookie_table **cpp;
@@ -211,48 +235,74 @@
 	return 0;
 }
 
-static struct page *try_to_get_dirent_page(struct file *, unsigned long, int);
+static struct page *try_to_get_dirent_page(struct file *, __u32, int);
 
 /* Recover from a revalidation flush.  The case here is that
  * the inode for the directory got invalidated somehow, and
  * all of our cached information is lost.  In order to get
  * a correct cookie for the current readdir request from the
  * user, we must (re-)fetch older readdir page cache entries.
+ *
+ * Returns < 0 if some error occurrs, else it is the page offset
+ * to fetch.
  */
-static int refetch_to_readdir_off(struct file *file, struct inode *inode, u32 off)
+static long refetch_to_readdir_cookie(struct file *file, struct inode *inode)
 {
-	u32 cur_off, goal_off = off & PAGE_MASK;
+	struct page *page;
+	u32 goal_cookie = file->f_pos;
+	long cur_off, ret = -1L;
 
 again:
 	cur_off = 0;
-	while (cur_off < goal_off) {
-		struct page *page;
-
-		page = find_page(inode, cur_off);
+	for (;;) {
+		page = find_get_page(inode, cur_off);
 		if (page) {
-			if (PageLocked(page))
-				__wait_on_page(page);
-			if (!PageUptodate(page))
-				return -1;
+			if (!Page_Uptodate(page))
+				goto out_error;
 		} else {
-			page = try_to_get_dirent_page(file, cur_off, 0);
+			__u32 *cp = find_cookie(inode, cur_off);
+
+			if (!cp)
+				goto out_error;
+
+			page = try_to_get_dirent_page(file, *cp, 0);
 			if (!page) {
 				if (!cur_off)
-					return -1;
+					goto out_error;
 
 				/* Someone touched the dir on us. */
 				goto again;
 			}
-			page_cache_release(page);
 		}
+		page_cache_release(page);
+
+		if ((ret = nfs_readdir_offset(inode, goal_cookie)) >= 0)
+			goto out;
 
-		cur_off += PAGE_SIZE;
+		cur_off += 1;
 	}
+out:
+	return ret;
 
-	return 0;
+out_error:
+	if (page)
+		page_cache_release(page);
+	goto out;
 }
 
-static struct page *try_to_get_dirent_page(struct file *file, unsigned long offset, int refetch_ok)
+/* Now we cache directories properly, by stuffing the dirent
+ * data directly in the page cache.
+ *
+ * Inode invalidation due to refresh etc. takes care of
+ * _everything_, no sloppy entry flushing logic, no extraneous
+ * copying, network direct to page cache, the way it was meant
+ * to be.
+ *
+ * NOTE: Dirent information verification is done always by the
+ *	 page-in of the RPC reply, nowhere else, this simplies
+ *	 things substantially.
+ */
+static struct page *try_to_get_dirent_page(struct file *file, __u32 cookie, int refetch_ok)
 {
 	struct nfs_readdirargs rd_args;
 	struct nfs_readdirres rd_res;
@@ -260,6 +310,7 @@
 	struct inode *inode = dentry->d_inode;
 	struct page *page, **hash;
 	unsigned long page_cache;
+	long offset;
 	__u32 *cookiep;
 
 	page = NULL;
@@ -267,27 +318,34 @@
 	if (!page_cache)
 		goto out;
 
-	while ((cookiep = find_cookie(inode, offset)) == NULL) {
+	if ((offset = nfs_readdir_offset(inode, cookie)) < 0) {
 		if (!refetch_ok ||
-		    refetch_to_readdir_off(file, inode, file->f_pos))
+		    (offset = refetch_to_readdir_cookie(file, inode)) < 0) {
+			page_cache_free(page_cache);
 			goto out;
+		}
+	}
+
+	cookiep = find_cookie(inode, offset);
+	if (!cookiep) {
+		/* Gross fatal error. */
+		page_cache_free(page_cache);
+		goto out;
 	}
 
 	hash = page_hash(inode, offset);
-	page = __find_page(inode, offset, *hash);
+repeat:
+	page = __find_lock_page(inode, offset, hash);
 	if (page) {
 		page_cache_free(page_cache);
-		goto out;
+		goto unlock_out;
 	}
 
 	page = page_cache_entry(page_cache);
-	atomic_inc(&page->count);
-	page->flags = ((page->flags &
-			~((1 << PG_uptodate) | (1 << PG_error))) |
-		       ((1 << PG_referenced) | (1 << PG_locked)));
-	page->offset = offset;
-	add_page_to_inode_queue(inode, page);
-	__add_page_to_hash_queue(page, hash);
+	if (add_to_page_cache_unique(page, inode, offset, hash)) {
+		page_cache_release(page);
+		goto repeat;
+	}
 
 	rd_args.fh = NFS_FH(dentry);
 	rd_res.buffer = (char *)page_cache;
@@ -303,48 +361,50 @@
 	} while(rd_res.bufsiz > 0);
 
 	if (rd_res.bufsiz < 0)
-		NFS_DIREOF(inode) =
-			(offset << PAGE_CACHE_SHIFT) + -(rd_res.bufsiz);
+		NFS_DIREOF(inode) = rd_res.cookie;
 	else if (create_cookie(rd_res.cookie, offset, inode))
 		goto error;
 
-	set_bit(PG_uptodate, &page->flags);
+	SetPageUptodate(page);
 unlock_out:
-	clear_bit(PG_locked, &page->flags);
-	wake_up(&page->wait);
+	UnlockPage(page);
 out:
 	return page;
 
 error:
-	set_bit(PG_error, &page->flags);
+	SetPageError(page);
 	goto unlock_out;
 }
 
-static __inline__ u32 nfs_do_filldir(__u32 *p, u32 doff,
+/* Seek up to dirent assosciated with the passed in cookie,
+ * then fill in dirents found.  Return the last cookie
+ * actually given to the user, to update the file position.
+ */
+static __inline__ u32 nfs_do_filldir(__u32 *p, u32 cookie,
 				     void *dirent, filldir_t filldir)
 {
 	u32 end;
 
-	if (doff & ~PAGE_CACHE_MASK) {
-		doff = find_midpoint(p, doff);
-		p += (doff & ~PAGE_CACHE_MASK) >> 2;
-	}
 	while((end = *p++) != 0) {
-		__u32 fileid = *p++;
-		__u32 len = *p++;
-		__u32 skip = NFS_NAMELEN_ALIGN(len);
-		char *name = (char *) p;
-
-		/* Skip the cookie. */
-		p = ((__u32 *) (name + skip)) + 1;
-		if (filldir(dirent, name, len, doff, fileid) < 0)
-			goto out;
-		doff += (skip + (4 * sizeof(__u32)));
+		__u32 fileid, len, skip, this_cookie;
+		char *name;
+
+		fileid = *p++;
+		len = *p++;
+		name = (char *) p;
+		skip = NFS_NAMELEN_ALIGN(len);
+		p += (skip >> 2);
+		this_cookie = *p++;
+
+		if (this_cookie < cookie)
+			continue;
+
+		cookie = this_cookie;
+		if (filldir(dirent, name, len, cookie, fileid) < 0)
+			break;
 	}
-	if (!*p)
-		doff = PAGE_CACHE_ALIGN(doff);
-out:
-	return doff;
+
+	return cookie;
 }
 
 /* The file offset position is represented in pure bytes, to
@@ -359,7 +419,7 @@
 	struct dentry *dentry = filp->f_dentry;
 	struct inode *inode = dentry->d_inode;
 	struct page *page, **hash;
-	unsigned long offset;
+	long offset;
 	int res;
 
 	res = nfs_revalidate_inode(NFS_DSERVER(dentry), dentry);
@@ -369,14 +429,14 @@
 	if (NFS_DIREOF(inode) && filp->f_pos >= NFS_DIREOF(inode))
 		return 0;
 
-	offset = filp->f_pos >> PAGE_CACHE_SHIFT;
+	if ((offset = nfs_readdir_offset(inode, filp->f_pos)) < 0)
+		goto no_dirent_page;
+
 	hash = page_hash(inode, offset);
-	page = __find_page(inode, offset, *hash);
+	page = __find_get_page(inode, offset, hash);
 	if (!page)
 		goto no_dirent_page;
-	if (PageLocked(page))
-		goto dirent_locked_wait;
-	if (!PageUptodate(page))
+	if (!Page_Uptodate(page))
 		goto dirent_read_error;
 success:
 	filp->f_pos = nfs_do_filldir((__u32 *) page_address(page),
@@ -385,13 +445,11 @@
 	return 0;
 
 no_dirent_page:
-	page = try_to_get_dirent_page(filp, offset, 1);
+	page = try_to_get_dirent_page(filp, filp->f_pos, 1);
 	if (!page)
 		goto no_page;
 
-dirent_locked_wait:
-	wait_on_page(page);
-	if (PageUptodate(page))
+	if (Page_Uptodate(page))
 		goto success;
 dirent_read_error:
 	page_cache_release(page);
@@ -399,20 +457,39 @@
 	return -EIO;
 }
 
-/* Invalidate directory cookie caches and EOF marker
- * for an inode.
+/* Flush directory cookie and EOF caches for an inode.
+ * So we don't thrash allocating/freeing cookie tables,
+ * we keep the cookies around until the inode is
+ * deleted/reused.
+ */
+__inline__ void nfs_flush_dircache(struct inode *inode)
+{
+	struct nfs_cookie_table *p = NFS_COOKIES(inode);
+
+	while (p != NULL) {
+		int i;
+
+		for(i = 0; i < COOKIES_PER_CHUNK; i++)
+			p->cookies[i] = 0;
+
+		p = p->next;
+	}
+	NFS_DIREOF(inode) = 0;
+}
+
+/* Free up directory cache state, this happens when
+ * nfs_delete_inode is called on an NFS directory.
  */
-__inline__ void nfs_invalidate_dircache(struct inode *inode)
+void nfs_free_dircache(struct inode *inode)
 {
 	struct nfs_cookie_table *p = NFS_COOKIES(inode);
 
-	if (p != NULL) {
-		NFS_COOKIES(inode) = NULL;
-		do {	struct nfs_cookie_table *next = p->next;
-			kmem_cache_free(nfs_cookie_cachep, p);
-			p = next;
-		} while (p != NULL);
+	while (p != NULL) {
+		struct nfs_cookie_table *next = p->next;
+		kmem_cache_free(nfs_cookie_cachep, p);
+		p = next;
 	}
+	NFS_COOKIES(inode) = NULL;
 	NFS_DIREOF(inode) = 0;
 }
 
@@ -538,11 +615,11 @@
 	/* Purge readdir caches. */
 	if (dentry->d_parent->d_inode) {
 		invalidate_inode_pages(dentry->d_parent->d_inode);
-		nfs_invalidate_dircache(dentry->d_parent->d_inode);
+		nfs_flush_dircache(dentry->d_parent->d_inode);
 	}
 	if (inode && S_ISDIR(inode->i_mode)) {
 		invalidate_inode_pages(inode);
-		nfs_invalidate_dircache(inode);
+		nfs_flush_dircache(inode);
 	}
 	return 0;
 }
@@ -739,7 +816,7 @@
 	 * Invalidate the dir cache before the operation to avoid a race.
 	 */
 	invalidate_inode_pages(dir);
-	nfs_invalidate_dircache(dir);
+	nfs_flush_dircache(dir);
 	error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
 			dentry->d_name.name, &sattr, &fhandle, &fattr);
 	if (!error)
@@ -769,7 +846,7 @@
 	sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
 
 	invalidate_inode_pages(dir);
-	nfs_invalidate_dircache(dir);
+	nfs_flush_dircache(dir);
 	error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
 				dentry->d_name.name, &sattr, &fhandle, &fattr);
 	if (!error)
@@ -804,7 +881,7 @@
 	 */
 	d_drop(dentry);
 	invalidate_inode_pages(dir);
-	nfs_invalidate_dircache(dir);
+	nfs_flush_dircache(dir);
 	error = nfs_proc_mkdir(NFS_DSERVER(dentry), NFS_FH(dentry->d_parent),
 				dentry->d_name.name, &sattr, &fhandle, &fattr);
 	return error;
@@ -825,7 +902,7 @@
 #endif
 
 	invalidate_inode_pages(dir);
-	nfs_invalidate_dircache(dir);
+	nfs_flush_dircache(dir);
 	error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
 				dentry->d_name.name);
 
@@ -953,7 +1030,7 @@
 	} while(sdentry->d_inode != NULL); /* need negative lookup */
 
 	invalidate_inode_pages(dir);
-	nfs_invalidate_dircache(dir);
+	nfs_flush_dircache(dir);
 	error = nfs_proc_rename(NFS_SERVER(dir),
 				NFS_FH(dentry->d_parent), dentry->d_name.name,
 				NFS_FH(dentry->d_parent), silly);
@@ -1023,7 +1100,7 @@
 		d_delete(dentry);
 	}
 	invalidate_inode_pages(dir);
-	nfs_invalidate_dircache(dir);
+	nfs_flush_dircache(dir);
 	error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
 				dentry->d_name.name);
 	/*
@@ -1090,7 +1167,7 @@
 	 */
 	d_drop(dentry);
 	invalidate_inode_pages(dir);
-	nfs_invalidate_dircache(dir);
+	nfs_flush_dircache(dir);
 	error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
 				dentry->d_name.name, symname, &sattr);
 	if (!error) {
@@ -1121,7 +1198,7 @@
 	 */
 	d_drop(dentry);
 	invalidate_inode_pages(dir);
-	nfs_invalidate_dircache(dir);
+	nfs_flush_dircache(dir);
 	error = nfs_proc_link(NFS_DSERVER(old_dentry), NFS_FH(old_dentry),
 				NFS_FH(dentry->d_parent), dentry->d_name.name);
 	if (!error) {
@@ -1267,9 +1344,9 @@
 	}
 
 	invalidate_inode_pages(new_dir);
-	nfs_invalidate_dircache(new_dir);
+	nfs_flush_dircache(new_dir);
 	invalidate_inode_pages(old_dir);
-	nfs_invalidate_dircache(old_dir);
+	nfs_flush_dircache(old_dir);
 	error = nfs_proc_rename(NFS_DSERVER(old_dentry),
 			NFS_FH(old_dentry->d_parent), old_dentry->d_name.name,
 			NFS_FH(new_dentry->d_parent), new_dentry->d_name.name);

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)