patch-2.3.99-pre3 linux/fs/nfs/write.c
Next file: linux/fs/nfsd/export.c
Previous file: linux/fs/nfs/read.c
Back to the patch index
Back to the overall index
- Lines: 1861
- Date:
Wed Mar 22 15:03:26 2000
- Orig file:
v2.3.99-pre2/linux/fs/nfs/write.c
- Orig date:
Sat Feb 26 22:31:53 2000
diff -u --recursive --new-file v2.3.99-pre2/linux/fs/nfs/write.c linux/fs/nfs/write.c
@@ -46,6 +46,7 @@
* Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
*/
+#include <linux/config.h>
#include <linux/types.h>
#include <linux/malloc.h>
#include <linux/swap.h>
@@ -54,33 +55,126 @@
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
+#include <linux/nfs_flushd.h>
#include <asm/uaccess.h>
#include <linux/smp_lock.h>
#define NFS_PARANOIA 1
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
-static void nfs_wback_begin(struct rpc_task *task);
-static void nfs_wback_result(struct rpc_task *task);
-static void nfs_cancel_request(struct nfs_wreq *req);
+/*
+ * Spinlock
+ */
+spinlock_t nfs_wreq_lock = SPIN_LOCK_UNLOCKED;
+static unsigned int nfs_nr_requests = 0;
/*
- * Cache parameters
+ * Local structures
+ *
+ * Valid flags for a dirty buffer
*/
-#define NFS_WRITEBACK_DELAY (10 * HZ)
-#define NFS_WRITEBACK_MAX 64
+#define PG_BUSY 0x0001
/*
- * Limit number of delayed writes
+ * This is the struct where the WRITE/COMMIT arguments go.
*/
-static int nr_write_requests = 0;
-static struct rpc_wait_queue write_queue = RPC_INIT_WAITQ("write_chain");
+struct nfs_write_data {
+ struct rpc_task task;
+ struct file *file;
+ struct rpc_cred *cred;
+ struct nfs_writeargs args; /* argument struct */
+ struct nfs_writeres res; /* result struct */
+ struct nfs_fattr fattr;
+ struct nfs_writeverf verf;
+ struct list_head pages; /* Coalesced requests we wish to flush */
+};
+
+struct nfs_page {
+ struct list_head wb_hash, /* Inode */
+ wb_list,
+ *wb_list_head;
+ struct file *wb_file;
+ struct rpc_cred *wb_cred;
+ struct page *wb_page; /* page to write out */
+ wait_queue_head_t wb_wait; /* wait queue */
+ unsigned long wb_timeout; /* when to write/commit */
+ unsigned int wb_offset, /* Offset of write */
+ wb_bytes, /* Length of request */
+ wb_count, /* reference count */
+ wb_flags;
+ struct nfs_writeverf wb_verf; /* Commit cookie */
+};
+
+#define NFS_WBACK_BUSY(req) ((req)->wb_flags & PG_BUSY)
+
+/*
+ * Local function declarations
+ */
+static void nfs_writeback_done(struct rpc_task *);
+#ifdef CONFIG_NFS_V3
+static void nfs_commit_done(struct rpc_task *);
+#endif
/* Hack for future NFS swap support */
#ifndef IS_SWAPFILE
# define IS_SWAPFILE(inode) (0)
#endif
+static kmem_cache_t *nfs_page_cachep = NULL;
+static kmem_cache_t *nfs_wdata_cachep = NULL;
+
+static __inline__ struct nfs_page *nfs_page_alloc(void)
+{
+ struct nfs_page *p;
+ p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL);
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->wb_hash);
+ INIT_LIST_HEAD(&p->wb_list);
+ init_waitqueue_head(&p->wb_wait);
+ }
+ return p;
+}
+
+static __inline__ void nfs_page_free(struct nfs_page *p)
+{
+ kmem_cache_free(nfs_page_cachep, p);
+}
+
+static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
+{
+ struct nfs_write_data *p;
+ p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NFS);
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->pages);
+ }
+ return p;
+}
+
+static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
+{
+ kmem_cache_free(nfs_wdata_cachep, p);
+}
+
+static void nfs_writedata_release(struct rpc_task *task)
+{
+ struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata;
+ rpc_release_task(task);
+ nfs_writedata_free(wdata);
+}
+
+/*
+ * This function will be used to simulate weak cache consistency
+ * under NFSv2 when the NFSv3 attribute patch is included.
+ * For the moment, we just call nfs_refresh_inode().
+ */
+static __inline__ int
+nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
+{
+ return nfs_refresh_inode(inode, fattr);
+}
+
/*
* Write a page synchronously.
* Offset is the data offset within the page.
@@ -161,278 +255,770 @@
}
/*
- * Append a writeback request to a list
+ * Write a page to the server. This was supposed to be used for
+ * NFS swapping only.
+ * FIXME: Using this for mmap is pointless, breaks asynchronous
+ * writebacks, and is extremely slow.
+ */
+int
+nfs_writepage(struct dentry * dentry, struct page *page)
+{
+ struct inode *inode = dentry->d_inode;
+ unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+ unsigned offset = PAGE_CACHE_SIZE;
+ int err;
+
+ /* easy case */
+ if (page->index < end_index)
+ goto do_it;
+ /* things got complicated... */
+ offset = inode->i_size & (PAGE_CACHE_SIZE-1);
+ /* OK, are we completely out? */
+ if (page->index >= end_index+1 || !offset)
+ return -EIO;
+do_it:
+ err = nfs_writepage_sync(dentry, inode, page, 0, offset);
+ if ( err == offset) return 0;
+ return err;
+}
+
+/*
+ * Check whether the file range we want to write to is locked by
+ * us.
+ */
+static int
+region_locked(struct inode *inode, struct nfs_page *req)
+{
+ struct file_lock *fl;
+ unsigned long rqstart, rqend;
+
+ /* Don't optimize writes if we don't use NLM */
+ if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
+ return 0;
+
+ rqstart = page_offset(req->wb_page) + req->wb_offset;
+ rqend = rqstart + req->wb_bytes;
+ for (fl = inode->i_flock; fl; fl = fl->fl_next) {
+ if (fl->fl_owner == current->files && (fl->fl_flags & FL_POSIX)
+ && fl->fl_type == F_WRLCK
+ && fl->fl_start <= rqstart && rqend <= fl->fl_end) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static inline struct nfs_page *
+nfs_inode_wb_entry(struct list_head *head)
+{
+ return list_entry(head, struct nfs_page, wb_hash);
+}
+
+/*
+ * Insert a write request into an inode
*/
static inline void
-append_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq)
+nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
{
- dprintk("NFS: append_write_request(%p, %p)\n", q, wreq);
- rpc_append_list(q, wreq);
+ if (!list_empty(&req->wb_hash))
+ return;
+ if (!NFS_WBACK_BUSY(req))
+ printk(KERN_ERR "NFS: unlocked request attempted hashed!\n");
+ inode->u.nfs_i.npages++;
+ list_add(&req->wb_hash, &inode->u.nfs_i.writeback);
+ req->wb_count++;
}
/*
- * Remove a writeback request from a list
+ * Insert a write request into an inode
*/
static inline void
-remove_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq)
+nfs_inode_remove_request(struct nfs_page *req)
{
- dprintk("NFS: remove_write_request(%p, %p)\n", q, wreq);
- rpc_remove_list(q, wreq);
+ struct inode *inode;
+ spin_lock(&nfs_wreq_lock);
+ if (list_empty(&req->wb_hash)) {
+ spin_unlock(&nfs_wreq_lock);
+ return;
+ }
+ if (!NFS_WBACK_BUSY(req))
+ printk(KERN_ERR "NFS: unlocked request attempted unhashed!\n");
+ inode = req->wb_file->f_dentry->d_inode;
+ list_del(&req->wb_hash);
+ INIT_LIST_HEAD(&req->wb_hash);
+ inode->u.nfs_i.npages--;
+ if ((inode->u.nfs_i.npages == 0) != list_empty(&inode->u.nfs_i.writeback))
+ printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n");
+ if (!nfs_have_writebacks(inode))
+ inode_remove_flushd(inode);
+ spin_unlock(&nfs_wreq_lock);
+ nfs_release_request(req);
}
/*
- * Find a non-busy write request for a given page to
- * try to combine with.
+ * Find a request
*/
-static inline struct nfs_wreq *
-find_write_request(struct inode *inode, struct page *page)
+static inline struct nfs_page *
+_nfs_find_request(struct inode *inode, struct page *page)
{
- pid_t pid = current->pid;
- struct nfs_wreq *head, *req;
+ struct list_head *head, *next;
- dprintk("NFS: find_write_request(%x/%ld, %p)\n",
- inode->i_dev, inode->i_ino, page);
- if (!(req = head = NFS_WRITEBACK(inode)))
- return NULL;
- do {
- /*
- * We can't combine with canceled requests or
- * requests that have already been started..
- */
- if (req->wb_flags & (NFS_WRITE_CANCELLED | NFS_WRITE_INPROGRESS))
+ head = &inode->u.nfs_i.writeback;
+ next = head->next;
+ while (next != head) {
+ struct nfs_page *req = nfs_inode_wb_entry(next);
+ next = next->next;
+ if (page_index(req->wb_page) != page_index(page))
continue;
+ req->wb_count++;
+ return req;
+ }
+ return NULL;
+}
- if (req->wb_page == page && req->wb_pid == pid)
- return req;
+struct nfs_page *
+nfs_find_request(struct inode *inode, struct page *page)
+{
+ struct nfs_page *req;
- /*
- * Ehh, don't keep too many tasks queued..
- */
- rpc_wake_up_task(&req->wb_task);
+ spin_lock(&nfs_wreq_lock);
+ req = _nfs_find_request(inode, page);
+ spin_unlock(&nfs_wreq_lock);
+ return req;
+}
- } while ((req = WB_NEXT(req)) != head);
- return NULL;
+static inline struct nfs_page *
+nfs_list_entry(struct list_head *head)
+{
+ return list_entry(head, struct nfs_page, wb_list);
}
/*
- * Find and release all failed requests for this inode.
+ * Insert a write request into a sorted list
*/
-int
-nfs_check_failed_request(struct inode * inode)
+static inline void
+nfs_list_add_request(struct nfs_page *req, struct list_head *head)
{
- /* FIXME! */
- return 0;
+ struct list_head *prev;
+
+ if (!list_empty(&req->wb_list)) {
+ printk(KERN_ERR "NFS: Add to list failed!\n");
+ return;
+ }
+ if (list_empty(&req->wb_hash)) {
+ printk(KERN_ERR "NFS: Unhashed request attempted added to a list!\n");
+ return;
+ }
+ if (!NFS_WBACK_BUSY(req))
+ printk(KERN_ERR "NFS: unlocked request attempted added to list!\n");
+ prev = head->prev;
+ while (prev != head) {
+ struct nfs_page *p = nfs_list_entry(prev);
+ if (page_index(p->wb_page) < page_index(req->wb_page))
+ break;
+ prev = prev->prev;
+ }
+ list_add(&req->wb_list, prev);
+ req->wb_list_head = head;
}
/*
- * Try to merge adjacent write requests. This works only for requests
- * issued by the same user.
+ * Insert a write request into an inode
*/
-static inline int
-update_write_request(struct nfs_wreq *req, unsigned int first,
- unsigned int bytes)
+static inline void
+nfs_list_remove_request(struct nfs_page *req)
{
- unsigned int rqfirst = req->wb_offset,
- rqlast = rqfirst + req->wb_bytes,
- last = first + bytes;
+ if (list_empty(&req->wb_list))
+ return;
+ if (!NFS_WBACK_BUSY(req))
+ printk(KERN_ERR "NFS: unlocked request attempted removed from list!\n");
+ list_del(&req->wb_list);
+ INIT_LIST_HEAD(&req->wb_list);
+ req->wb_list_head = NULL;
+}
- dprintk("nfs: trying to update write request %p\n", req);
+/*
+ * Add a request to the inode's dirty list.
+ */
+static inline void
+nfs_mark_request_dirty(struct nfs_page *req)
+{
+ struct inode *inode = req->wb_file->f_dentry->d_inode;
- /* not contiguous? */
- if (rqlast < first || last < rqfirst)
- return 0;
+ spin_lock(&nfs_wreq_lock);
+ if (list_empty(&req->wb_list)) {
+ nfs_list_add_request(req, &inode->u.nfs_i.dirty);
+ inode->u.nfs_i.ndirty++;
+ }
+ spin_unlock(&nfs_wreq_lock);
+ /*
+ * NB: the call to inode_schedule_scan() must lie outside the
+ * spinlock since it can run flushd().
+ */
+ inode_schedule_scan(inode, req->wb_timeout);
+}
- if (first < rqfirst)
- rqfirst = first;
- if (rqlast < last)
- rqlast = last;
+/*
+ * Check if a request is dirty
+ */
+static inline int
+nfs_dirty_request(struct nfs_page *req)
+{
+ struct inode *inode = req->wb_file->f_dentry->d_inode;
+ return !list_empty(&req->wb_list) && req->wb_list_head == &inode->u.nfs_i.dirty;
+}
- req->wb_offset = rqfirst;
- req->wb_bytes = rqlast - rqfirst;
- req->wb_count++;
+#ifdef CONFIG_NFS_V3
+/*
+ * Add a request to the inode's commit list.
+ */
+static inline void
+nfs_mark_request_commit(struct nfs_page *req)
+{
+ struct inode *inode = req->wb_file->f_dentry->d_inode;
- return 1;
+ spin_lock(&nfs_wreq_lock);
+ if (list_empty(&req->wb_list)) {
+ nfs_list_add_request(req, &inode->u.nfs_i.commit);
+ inode->u.nfs_i.ncommit++;
+ }
+ spin_unlock(&nfs_wreq_lock);
+ /*
+ * NB: the call to inode_schedule_scan() must lie outside the
+ * spinlock since it can run flushd().
+ */
+ inode_schedule_scan(inode, req->wb_timeout);
}
+#endif
-static kmem_cache_t *nfs_wreq_cachep;
-
-int nfs_init_wreqcache(void)
+/*
+ * Lock the page of an asynchronous request
+ */
+static inline int
+nfs_lock_request(struct nfs_page *req)
{
- nfs_wreq_cachep = kmem_cache_create("nfs_wreq",
- sizeof(struct nfs_wreq),
- 0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
- if (nfs_wreq_cachep == NULL)
- return -ENOMEM;
- return 0;
+ if (NFS_WBACK_BUSY(req))
+ return 0;
+ req->wb_count++;
+ req->wb_flags |= PG_BUSY;
+ return 1;
}
static inline void
-free_write_request(struct nfs_wreq * req)
+nfs_unlock_request(struct nfs_page *req)
{
- if (!--req->wb_count)
- kmem_cache_free(nfs_wreq_cachep, req);
+ if (!NFS_WBACK_BUSY(req)) {
+ printk(KERN_ERR "NFS: Invalid unlock attempted\n");
+ return;
+ }
+ req->wb_flags &= ~PG_BUSY;
+ wake_up(&req->wb_wait);
+ nfs_release_request(req);
}
/*
- * Create and initialize a writeback request
+ * Create a write request.
+ * Page must be locked by the caller. This makes sure we never create
+ * two different requests for the same page, and avoids possible deadlock
+ * when we reach the hard limit on the number of dirty pages.
*/
-static inline struct nfs_wreq *
-create_write_request(struct file * file, struct page *page, unsigned int offset, unsigned int bytes)
-{
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
- struct rpc_clnt *clnt = NFS_CLIENT(inode);
- struct nfs_wreq *wreq;
- struct rpc_task *task;
+static struct nfs_page *
+nfs_create_request(struct inode *inode, struct file *file, struct page *page,
+ unsigned int offset, unsigned int count)
+{
+ struct nfs_reqlist *cache = NFS_REQUESTLIST(inode);
+ struct nfs_page *req = NULL;
+ long timeout;
- dprintk("NFS: create_write_request(%s/%s, %ld+%d)\n",
- dentry->d_parent->d_name.name, dentry->d_name.name,
- (page->index << PAGE_CACHE_SHIFT) + offset, bytes);
+ /* Deal with hard/soft limits.
+ */
+ do {
+ /* If we're over the soft limit, flush out old requests */
+ if (nfs_nr_requests >= MAX_REQUEST_SOFT)
+ nfs_wb_file(inode, file);
+
+ /* If we're still over the soft limit, wake up some requests */
+ if (nfs_nr_requests >= MAX_REQUEST_SOFT) {
+ dprintk("NFS: hit soft limit (%d requests)\n",
+ nfs_nr_requests);
+ if (!cache->task)
+ nfs_reqlist_init(NFS_SERVER(inode));
+ nfs_wake_flushd();
+ }
+
+ /* If we haven't reached the hard limit yet,
+ * try to allocate the request struct */
+ if (nfs_nr_requests < MAX_REQUEST_HARD) {
+ req = nfs_page_alloc();
+ if (req != NULL)
+ break;
+ }
- /* FIXME: Enforce hard limit on number of concurrent writes? */
- wreq = kmem_cache_alloc(nfs_wreq_cachep, SLAB_KERNEL);
- if (!wreq)
- goto out_fail;
- memset(wreq, 0, sizeof(*wreq));
-
- task = &wreq->wb_task;
- rpc_init_task(task, clnt, nfs_wback_result, RPC_TASK_NFSWRITE);
- task->tk_calldata = wreq;
- task->tk_action = nfs_wback_begin;
-
- rpcauth_lookupcred(task); /* Obtain user creds */
- if (task->tk_status < 0)
- goto out_req;
+ /* We're over the hard limit. Wait for better times */
+ dprintk("NFS: create_request sleeping (total %d pid %d)\n",
+ nfs_nr_requests, current->pid);
+
+ timeout = 1 * HZ;
+ if (NFS_SERVER(inode)->flags & NFS_MOUNT_INTR) {
+ interruptible_sleep_on_timeout(&cache->request_wait,
+ timeout);
+ if (signalled())
+ break;
+ } else
+ sleep_on_timeout(&cache->request_wait, timeout);
+
+ dprintk("NFS: create_request waking up (tot %d pid %d)\n",
+ nfs_nr_requests, current->pid);
+ } while (!req);
+ if (!req)
+ return NULL;
- /* Put the task on inode's writeback request list. */
+ /* Initialize the request struct. Initially, we assume a
+ * long write-back delay. This will be adjusted in
+ * update_nfs_request below if the region is not locked. */
+ req->wb_page = page;
+ atomic_inc(&page->count);
+ req->wb_offset = offset;
+ req->wb_bytes = count;
+ /* If the region is locked, adjust the timeout */
+ if (region_locked(inode, req))
+ req->wb_timeout = jiffies + NFS_WRITEBACK_LOCKDELAY;
+ else
+ req->wb_timeout = jiffies + NFS_WRITEBACK_DELAY;
+ req->wb_file = file;
+ req->wb_cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
get_file(file);
- wreq->wb_file = file;
- wreq->wb_pid = current->pid;
- wreq->wb_page = page;
- init_waitqueue_head(&wreq->wb_wait);
- wreq->wb_offset = offset;
- wreq->wb_bytes = bytes;
- wreq->wb_count = 2; /* One for the IO, one for us */
-
- kmap(page);
- append_write_request(&NFS_WRITEBACK(inode), wreq);
+ req->wb_count = 1;
+
+ /* register request's existence */
+ cache->nr_requests++;
+ nfs_nr_requests++;
+ return req;
+}
- if (nr_write_requests++ > NFS_WRITEBACK_MAX*3/4)
- rpc_wake_up_next(&write_queue);
- return wreq;
+/*
+ * Release all resources associated with a write request after it
+ * has been committed to stable storage
+ *
+ * Note: Should always be called with the spinlock held!
+ */
+void
+nfs_release_request(struct nfs_page *req)
+{
+ struct inode *inode = req->wb_file->f_dentry->d_inode;
+ struct nfs_reqlist *cache = NFS_REQUESTLIST(inode);
+ struct page *page = req->wb_page;
+
+ spin_lock(&nfs_wreq_lock);
+ if (--req->wb_count) {
+ spin_unlock(&nfs_wreq_lock);
+ return;
+ }
+ spin_unlock(&nfs_wreq_lock);
-out_req:
- rpc_release_task(task);
- kmem_cache_free(nfs_wreq_cachep, wreq);
-out_fail:
- return NULL;
+ if (!list_empty(&req->wb_list)) {
+ printk(KERN_ERR "NFS: Request released while still on a list!\n");
+ nfs_list_remove_request(req);
+ }
+ if (!list_empty(&req->wb_hash)) {
+ printk(KERN_ERR "NFS: Request released while still hashed!\n");
+ nfs_inode_remove_request(req);
+ }
+ if (NFS_WBACK_BUSY(req))
+ printk(KERN_ERR "NFS: Request released while still locked!\n");
+
+ rpcauth_releasecred(NFS_CLIENT(inode)->cl_auth, req->wb_cred);
+ fput(req->wb_file);
+ page_cache_release(page);
+ nfs_page_free(req);
+ /* wake up anyone waiting to allocate a request */
+ cache->nr_requests--;
+ nfs_nr_requests--;
+ wake_up(&cache->request_wait);
}
/*
- * Schedule a writeback RPC call.
- * If the server is congested, don't add to our backlog of queued
- * requests but call it synchronously.
- * The function returns whether we should wait for the thing or not.
- *
- * FIXME: Here we could walk the inode's lock list to see whether the
- * page we're currently writing to has been write-locked by the caller.
- * If it is, we could schedule an async write request with a long
- * delay in order to avoid writing back the page until the lock is
- * released.
+ * Wait for a request to complete.
+ *
+ * Interruptible by signals only if mounted with intr flag.
*/
-static inline int
-schedule_write_request(struct nfs_wreq *req, int sync)
+static int
+nfs_wait_on_request(struct nfs_page *req)
{
- struct rpc_task *task = &req->wb_task;
- struct file *file = req->wb_file;
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = req->wb_file->f_dentry->d_inode;
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+ int retval;
- if (NFS_CONGESTED(inode) || nr_write_requests >= NFS_WRITEBACK_MAX)
- sync = 1;
+ if (!NFS_WBACK_BUSY(req))
+ return 0;
+ req->wb_count++;
+ retval = nfs_wait_event(clnt, req->wb_wait, !NFS_WBACK_BUSY(req));
+ nfs_release_request(req);
+ return retval;
+}
- if (sync) {
- sigset_t oldmask;
- struct rpc_clnt *clnt = NFS_CLIENT(inode);
- dprintk("NFS: %4d schedule_write_request (sync)\n",
- task->tk_pid);
- /* Page is already locked */
- rpc_clnt_sigmask(clnt, &oldmask);
- rpc_execute(task);
- rpc_clnt_sigunmask(clnt, &oldmask);
- } else {
- dprintk("NFS: %4d schedule_write_request (async)\n",
- task->tk_pid);
- task->tk_flags |= RPC_TASK_ASYNC;
- task->tk_timeout = NFS_WRITEBACK_DELAY;
- rpc_sleep_on(&write_queue, task, NULL, NULL);
+/*
+ * Wait for a request to complete.
+ *
+ * Interruptible by signals only if mounted with intr flag.
+ */
+static int
+nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long start, unsigned int count)
+{
+ struct list_head *p, *head;
+ unsigned long idx_start, idx_end;
+ unsigned int pages = 0;
+ int error;
+
+ idx_start = start >> PAGE_CACHE_SHIFT;
+ if (count == 0)
+ idx_end = ~0;
+ else {
+ unsigned long idx_count = (count-1) >> PAGE_CACHE_SHIFT;
+ idx_end = idx_start + idx_count;
}
+ spin_lock(&nfs_wreq_lock);
+ head = &inode->u.nfs_i.writeback;
+ p = head->next;
+ while (p != head) {
+ unsigned long pg_idx;
+ struct nfs_page *req = nfs_inode_wb_entry(p);
+
+ p = p->next;
+
+ if (file && req->wb_file != file)
+ continue;
+
+ pg_idx = page_index(req->wb_page);
+ if (pg_idx < idx_start || pg_idx > idx_end)
+ continue;
- return sync;
+ if (!NFS_WBACK_BUSY(req))
+ continue;
+ req->wb_count++;
+ spin_unlock(&nfs_wreq_lock);
+ error = nfs_wait_on_request(req);
+ nfs_release_request(req);
+ if (error < 0)
+ return error;
+ spin_lock(&nfs_wreq_lock);
+ p = head->next;
+ pages++;
+ }
+ spin_unlock(&nfs_wreq_lock);
+ return pages;
}
/*
- * Wait for request to complete.
+ * Scan cluster for dirty pages and send as many of them to the
+ * server as possible.
*/
static int
-wait_on_write_request(struct nfs_wreq *req)
+nfs_scan_list_timeout(struct list_head *head, struct list_head *dst, struct inode *inode)
{
- struct file *file = req->wb_file;
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
- struct rpc_clnt *clnt = NFS_CLIENT(inode);
- DECLARE_WAITQUEUE(wait, current);
- sigset_t oldmask;
- int retval;
-
- /* Make sure it's started.. */
- if (!WB_INPROGRESS(req))
- rpc_wake_up_task(&req->wb_task);
+ struct list_head *p;
+ struct nfs_page *req;
+ int pages = 0;
+
+ p = head->next;
+ while (p != head) {
+ req = nfs_list_entry(p);
+ p = p->next;
+ if (time_after(req->wb_timeout, jiffies)) {
+ if (time_after(NFS_NEXTSCAN(inode), req->wb_timeout))
+ NFS_NEXTSCAN(inode) = req->wb_timeout;
+ continue;
+ }
+ if (!nfs_lock_request(req))
+ continue;
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, dst);
+ pages++;
+ }
+ return pages;
+}
+
+static int
+nfs_scan_dirty_timeout(struct inode *inode, struct list_head *dst)
+{
+ int pages;
+ spin_lock(&nfs_wreq_lock);
+ pages = nfs_scan_list_timeout(&inode->u.nfs_i.dirty, dst, inode);
+ inode->u.nfs_i.ndirty -= pages;
+ if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty))
+ printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+ spin_unlock(&nfs_wreq_lock);
+ return pages;
+}
+
+#ifdef CONFIG_NFS_V3
+static int
+nfs_scan_commit_timeout(struct inode *inode, struct list_head *dst)
+{
+ int pages;
+ spin_lock(&nfs_wreq_lock);
+ pages = nfs_scan_list_timeout(&inode->u.nfs_i.commit, dst, inode);
+ inode->u.nfs_i.ncommit -= pages;
+ if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit))
+ printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+ spin_unlock(&nfs_wreq_lock);
+ return pages;
+}
+#endif
+
+static int
+nfs_scan_list(struct list_head *src, struct list_head *dst, struct file *file, unsigned long start, unsigned int count)
+{
+ struct list_head *p;
+ struct nfs_page *req;
+ unsigned long idx_start, idx_end;
+ int pages;
+
+ pages = 0;
+ idx_start = start >> PAGE_CACHE_SHIFT;
+ if (count == 0)
+ idx_end = ~0;
+ else
+ idx_end = idx_start + ((count-1) >> PAGE_CACHE_SHIFT);
+ p = src->next;
+ while (p != src) {
+ unsigned long pg_idx;
+
+ req = nfs_list_entry(p);
+ p = p->next;
+
+ if (file && req->wb_file != file)
+ continue;
+
+ pg_idx = page_index(req->wb_page);
+ if (pg_idx < idx_start || pg_idx > idx_end)
+ continue;
+
+ if (!nfs_lock_request(req))
+ continue;
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, dst);
+ pages++;
+ }
+ return pages;
+}
+
+static int
+nfs_scan_dirty(struct inode *inode, struct list_head *dst, struct file *file, unsigned long start, unsigned int count)
+{
+ int pages;
+ spin_lock(&nfs_wreq_lock);
+ pages = nfs_scan_list(&inode->u.nfs_i.dirty, dst, file, start, count);
+ inode->u.nfs_i.ndirty -= pages;
+ if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty))
+ printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+ spin_unlock(&nfs_wreq_lock);
+ return pages;
+}
+
+#ifdef CONFIG_NFS_V3
+static int
+nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, unsigned long start, unsigned int count)
+{
+ int pages;
+ spin_lock(&nfs_wreq_lock);
+ pages = nfs_scan_list(&inode->u.nfs_i.commit, dst, file, start, count);
+ inode->u.nfs_i.ncommit -= pages;
+ if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit))
+ printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+ spin_unlock(&nfs_wreq_lock);
+ return pages;
+}
+#endif
+
+
+static int
+coalesce_requests(struct list_head *src, struct list_head *dst, unsigned int maxpages)
+{
+ struct nfs_page *req = NULL;
+ unsigned int pages = 0;
+
+ while (!list_empty(src)) {
+ struct nfs_page *prev = req;
+
+ req = nfs_list_entry(src->next);
+ if (prev) {
+ if (req->wb_file != prev->wb_file)
+ break;
+
+ if (page_index(req->wb_page) != page_index(prev->wb_page)+1)
+ break;
+
+ if (req->wb_offset != 0)
+ break;
+ }
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, dst);
+ pages++;
+ if (req->wb_offset + req->wb_bytes != PAGE_CACHE_SIZE)
+ break;
+ if (pages >= maxpages)
+ break;
+ }
+ return pages;
+}
+
+/*
+ * Try to update any existing write request, or create one if there is none.
+ * In order to match, the request's credentials must match those of
+ * the calling process.
+ *
+ * Note: Should always be called with the Page Lock held!
+ */
+static struct nfs_page *
+nfs_update_request(struct file* file, struct page *page,
+ unsigned long offset, unsigned int bytes)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct nfs_page *req, *new = NULL;
+ unsigned long rqend, end;
+
+ end = offset + bytes;
- rpc_clnt_sigmask(clnt, &oldmask);
- add_wait_queue(&req->wb_wait, &wait);
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- retval = 0;
- if (req->wb_flags & NFS_WRITE_COMPLETE)
+ /* Loop over all inode entries and see if we find
+ * A request for the page we wish to update
+ */
+ spin_lock(&nfs_wreq_lock);
+ req = _nfs_find_request(inode, page);
+ if (req) {
+ if (!nfs_lock_request(req)) {
+ spin_unlock(&nfs_wreq_lock);
+ nfs_wait_on_request(req);
+ nfs_release_request(req);
+ continue;
+ }
+ spin_unlock(&nfs_wreq_lock);
+ if (new)
+ nfs_release_request(new);
break;
- retval = -ERESTARTSYS;
- if (signalled())
+ }
+
+ req = new;
+ if (req) {
+ nfs_lock_request(req);
+ nfs_inode_add_request(inode, req);
+ spin_unlock(&nfs_wreq_lock);
+ nfs_mark_request_dirty(req);
break;
- schedule();
+ }
+ spin_unlock(&nfs_wreq_lock);
+
+ /* Create the request. It's safe to sleep in this call because
+ * we only get here if the page is locked.
+ */
+ new = nfs_create_request(inode, file, page, offset, bytes);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* We have a request for our page.
+ * If the creds don't match, or the
+ * page addresses don't match,
+ * tell the caller to wait on the conflicting
+ * request.
+ */
+ rqend = req->wb_offset + req->wb_bytes;
+ if (req->wb_file != file
+ || req->wb_page != page
+ || !nfs_dirty_request(req)
+ || offset > rqend || end < req->wb_offset) {
+ nfs_unlock_request(req);
+ nfs_release_request(req);
+ return ERR_PTR(-EBUSY);
}
- remove_wait_queue(&req->wb_wait, &wait);
- current->state = TASK_RUNNING;
- rpc_clnt_sigunmask(clnt, &oldmask);
- return retval;
+
+ /* Okay, the request matches. Update the region */
+ if (offset < req->wb_offset) {
+ req->wb_offset = offset;
+ req->wb_bytes = rqend - req->wb_offset;
+ }
+
+ if (end > rqend)
+ req->wb_bytes = end - req->wb_offset;
+
+ nfs_unlock_request(req);
+
+ return req;
}
/*
- * Write a page to the server. This will be used for NFS swapping only
- * (for now), and we currently do this synchronously only.
+ * This is the strategy routine for NFS.
+ * It is called by nfs_updatepage whenever the user wrote up to the end
+ * of a page.
+ *
+ * We always try to submit a set of requests in parallel so that the
+ * server's write code can gather writes. This is mainly for the benefit
+ * of NFSv2.
+ *
+ * We never submit more requests than we think the remote can handle.
+ * For UDP sockets, we make sure we don't exceed the congestion window;
+ * for TCP, we limit the number of requests to 8.
+ *
+ * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that
+ * should be sent out in one go. This is for the benefit of NFSv2 servers
+ * that perform write gathering.
+ *
+ * FIXME: Different servers may have different sweet spots.
+ * Record the average congestion window in server struct?
*/
-int
-nfs_writepage(struct dentry * dentry, struct page *page)
+#define NFS_STRATEGY_PAGES 8
+static void
+nfs_strategy(struct file *file)
{
- struct inode *inode = dentry->d_inode;
- unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
- unsigned offset = PAGE_CACHE_SIZE;
- int err;
+ struct inode *inode = file->f_dentry->d_inode;
+ unsigned int dirty, wpages;
- /* easy case */
- if (page->index < end_index)
- goto do_it;
- /* things got complicated... */
- offset = inode->i_size & (PAGE_CACHE_SIZE-1);
- /* OK, are we completely out? */
- if (page->index >= end_index+1 || !offset)
- return -EIO;
-do_it:
- err = nfs_writepage_sync(dentry, inode, page, 0, offset);
- if ( err == offset) return 0;
- return err;
+ dirty = inode->u.nfs_i.ndirty;
+ wpages = NFS_SERVER(inode)->wsize >> PAGE_CACHE_SHIFT;
+#ifdef CONFIG_NFS_V3
+ if (NFS_PROTO(inode)->version == 2) {
+ if (dirty >= NFS_STRATEGY_PAGES * wpages)
+ nfs_flush_file(inode, file, 0, 0, 0);
+ } else {
+ if (dirty >= wpages)
+ nfs_flush_file(inode, file, 0, 0, 0);
+ }
+#else
+ if (dirty >= NFS_STRATEGY_PAGES * wpages)
+ nfs_flush_file(inode, file, 0, 0, 0);
+#endif
+ /*
+ * If we're running out of requests, flush out everything
+ * in order to reduce memory useage...
+ */
+ if (nfs_nr_requests > MAX_REQUEST_SOFT)
+ nfs_wb_file(inode, file);
+}
+
+int
+nfs_flush_incompatible(struct file *file, struct page *page)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct nfs_page *req;
+ int status = 0;
+ /*
+ * Look for a request corresponding to this page. If there
+ * is one, and it belongs to another file, we flush it out
+ * before we try to copy anything into the page. Do this
+ * due to the lack of an ACCESS-type call in NFSv2.
+ * Also do the same if we find a request from an existing
+ * dropped page.
+ */
+ req = nfs_find_request(inode,page);
+ if (req) {
+ if (req->wb_file != file || req->wb_page != page)
+ status = nfs_wb_page(inode, page);
+ nfs_release_request(req);
+ }
+ return (status < 0) ? status : 0;
}
/*
@@ -446,27 +1032,13 @@
{
struct dentry *dentry = file->f_dentry;
struct inode *inode = dentry->d_inode;
- struct nfs_wreq *req;
+ struct nfs_page *req;
int synchronous = file->f_flags & O_SYNC;
- int retval;
+ int status = 0;
- dprintk("NFS: nfs_updatepage(%s/%s %d@%ld)\n",
+ dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- count, (page->index << PAGE_CACHE_SHIFT) +offset);
-
- /*
- * Try to find a corresponding request on the writeback queue.
- * If there is one, we can be sure that this request is not
- * yet being processed, because we hold a lock on the page.
- *
- * If the request was created by us, update it. Otherwise,
- * transfer the page lock and flush out the dirty page now.
- * After returning, generic_file_write will wait on the
- * page and retry the update.
- */
- req = find_write_request(inode, page);
- if (req && req->wb_file == file && update_write_request(req, offset, count))
- goto updated;
+ count, page_offset(page) +offset);
/*
* If wsize is smaller than page size, update and write
@@ -475,241 +1047,542 @@
if (NFS_SERVER(inode)->wsize < PAGE_SIZE)
return nfs_writepage_sync(dentry, inode, page, offset, count);
- /* Create the write request. */
- req = create_write_request(file, page, offset, count);
- if (!req)
- return -ENOBUFS;
-
/*
- * Ok, there's another user of this page with the new request..
- * The IO completion will then free the page and the dentry.
+ * Try to find an NFS request corresponding to this page
+ * and update it.
+ * If the existing request cannot be updated, we must flush
+ * it out now.
*/
- get_page(page);
-
- /* Schedule request */
- synchronous = schedule_write_request(req, synchronous);
+ do {
+ req = nfs_update_request(file, page, offset, count);
+ status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
+ if (status != -EBUSY)
+ break;
+ /* Request could not be updated. Flush it out and try again */
+ status = nfs_wb_page(inode, page);
+ } while (status >= 0);
+ if (status < 0)
+ goto done;
-updated:
- if (req->wb_bytes == PAGE_SIZE)
+ if (req->wb_bytes == PAGE_CACHE_SIZE)
SetPageUptodate(page);
- retval = 0;
+ status = 0;
if (synchronous) {
- int status = wait_on_write_request(req);
- if (status) {
- nfs_cancel_request(req);
- retval = status;
- } else {
- status = req->wb_status;
- if (status < 0)
- retval = status;
- }
+ int error;
- if (retval < 0)
- ClearPageUptodate(page);
+ error = nfs_sync_file(inode, file, page_offset(page) + offset, count, FLUSH_SYNC|FLUSH_STABLE);
+ if (error < 0 || (error = file->f_error) < 0)
+ status = error;
+ file->f_error = 0;
+ } else {
+ /* If we wrote past the end of the page.
+ * Call the strategy routine so it can send out a bunch
+ * of requests.
+ */
+ if (req->wb_offset == 0 && req->wb_bytes == PAGE_CACHE_SIZE)
+ nfs_strategy(file);
}
-
- free_write_request(req);
- return retval;
+ nfs_release_request(req);
+done:
+ dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
+ status, inode->i_size);
+ if (status < 0)
+ clear_bit(PG_uptodate, &page->flags);
+ return status;
}
/*
- * Cancel a write request. We always mark it cancelled,
- * but if it's already in progress there's no point in
- * calling rpc_exit, and we don't want to overwrite the
- * tk_status field.
- */
+ * Set up the argument/result storage required for the RPC call.
+ */
static void
-nfs_cancel_request(struct nfs_wreq *req)
+nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data)
{
- req->wb_flags |= NFS_WRITE_CANCELLED;
- if (!WB_INPROGRESS(req)) {
- rpc_exit(&req->wb_task, 0);
- rpc_wake_up_task(&req->wb_task);
+ struct nfs_page *req;
+ struct iovec *iov;
+ unsigned int count;
+
+ /* Set up the RPC argument and reply structs
+ * NB: take care not to mess about with data->commit et al. */
+
+ iov = data->args.iov;
+ count = 0;
+ while (!list_empty(head)) {
+ struct nfs_page *req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, &data->pages);
+ iov->iov_base = (void *)(kmap(req->wb_page) + req->wb_offset);
+ iov->iov_len = req->wb_bytes;
+ count += req->wb_bytes;
+ iov++;
+ data->args.nriov++;
}
+ req = nfs_list_entry(data->pages.next);
+ data->file = req->wb_file;
+ data->cred = req->wb_cred;
+ data->args.fh = NFS_FH(req->wb_file->f_dentry);
+ data->args.offset = page_offset(req->wb_page) + req->wb_offset;
+ data->args.count = count;
+ data->res.fattr = &data->fattr;
+ data->res.count = count;
+ data->res.verf = &data->verf;
}
+
/*
- * Cancel all writeback requests, both pending and in progress.
+ * Create an RPC task for the given write request and kick it.
+ * The page must have been locked by the caller.
+ *
+ * It may happen that the page we're passed is not marked dirty.
+ * This is the case if nfs_updatepage detects a conflicting request
+ * that has been written but not committed.
*/
-static void
-nfs_cancel_dirty(struct inode *inode, pid_t pid)
+static int
+nfs_flush_one(struct list_head *head, struct file *file, int how)
{
- struct nfs_wreq *head, *req;
+ struct dentry *dentry = file->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+ struct nfs_write_data *data;
+ struct rpc_task *task;
+ struct rpc_message msg;
+ int flags,
+ async = !(how & FLUSH_SYNC),
+ stable = (how & FLUSH_STABLE);
+ sigset_t oldset;
+
+
+ data = nfs_writedata_alloc();
+ if (!data)
+ goto out_bad;
+ task = &data->task;
+
+ /* Set the initial flags for the task. */
+ flags = (async) ? RPC_TASK_ASYNC : 0;
+
+ /* Set up the argument struct */
+ nfs_write_rpcsetup(head, data);
+ if (stable) {
+ if (!inode->u.nfs_i.ncommit)
+ data->args.stable = NFS_FILE_SYNC;
+ else
+ data->args.stable = NFS_DATA_SYNC;
+ } else
+ data->args.stable = NFS_UNSTABLE;
+
+ /* Finalize the task. */
+ rpc_init_task(task, clnt, nfs_writeback_done, flags);
+ task->tk_calldata = data;
+
+#ifdef CONFIG_NFS_V3
+ msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE;
+#else
+ msg.rpc_proc = NFSPROC_WRITE;
+#endif
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ msg.rpc_cred = data->cred;
+
+ dprintk("NFS: %4d initiated write call (req %s/%s count %d nriov %d)\n",
+ task->tk_pid,
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name,
+ data->args.count, data->args.nriov);
+
+ rpc_clnt_sigmask(clnt, &oldset);
+ rpc_call_setup(task, &msg, 0);
+ rpc_execute(task);
+ rpc_clnt_sigunmask(clnt, &oldset);
+ return 0;
+ out_bad:
+ while (!list_empty(head)) {
+ struct nfs_page *req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_mark_request_dirty(req);
+ nfs_unlock_request(req);
+ }
+ return -ENOMEM;
+}
- req = head = NFS_WRITEBACK(inode);
- while (req != NULL) {
- if (pid == 0 || req->wb_pid == pid)
- nfs_cancel_request(req);
- if ((req = WB_NEXT(req)) == head)
+static int
+nfs_flush_list(struct inode *inode, struct list_head *head, int how)
+{
+ LIST_HEAD(one_request);
+ struct nfs_page *req;
+ int error = 0;
+ unsigned int pages = 0,
+ wpages = NFS_SERVER(inode)->wsize >> PAGE_CACHE_SHIFT;
+
+ while (!list_empty(head)) {
+ pages += coalesce_requests(head, &one_request, wpages);
+ req = nfs_list_entry(one_request.next);
+ error = nfs_flush_one(&one_request, req->wb_file, how);
+ if (error < 0)
break;
}
+ if (error >= 0)
+ return pages;
+
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_mark_request_dirty(req);
+ nfs_unlock_request(req);
+ }
+ return error;
}
+
/*
- * If we're waiting on somebody else's request
- * we need to increment the counter during the
- * wait so that the request doesn't disappear
- * from under us during the wait..
+ * This function is called when the WRITE call is complete.
*/
-static int FASTCALL(wait_on_other_req(struct nfs_wreq *));
-static int wait_on_other_req(struct nfs_wreq *req)
+static void
+nfs_writeback_done(struct rpc_task *task)
{
- int retval;
- req->wb_count++;
- retval = wait_on_write_request(req);
- free_write_request(req);
- return retval;
-}
+ struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+ struct nfs_writeargs *argp = &data->args;
+ struct nfs_writeres *resp = &data->res;
+ struct dentry *dentry = data->file->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ struct nfs_page *req;
-/*
- * This writes back a set of requests according to the condition.
- *
- * If this ever gets much more convoluted, use a fn pointer for
- * the condition..
- */
-#define NFS_WB(inode, cond) { int retval = 0 ; \
- do { \
- struct nfs_wreq *req = NFS_WRITEBACK(inode); \
- struct nfs_wreq *head = req; \
- if (!req) break; \
- for (;;) { \
- if (!(req->wb_flags & NFS_WRITE_COMPLETE)) \
- if (cond) break; \
- req = WB_NEXT(req); \
- if (req == head) goto out; \
- } \
- retval = wait_on_other_req(req); \
- } while (!retval); \
-out: return retval; \
+ dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
+ task->tk_pid, task->tk_status);
+
+ /* We can't handle that yet but we check for it nevertheless */
+ if (resp->count < argp->count && task->tk_status >= 0) {
+ static unsigned long complain = 0;
+ if (time_before(complain, jiffies)) {
+ printk(KERN_WARNING
+ "NFS: Server wrote less than requested.\n");
+ complain = jiffies + 300 * HZ;
+ }
+ /* Can't do anything about it right now except throw
+ * an error. */
+ task->tk_status = -EIO;
+ }
+#ifdef CONFIG_NFS_V3
+ if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+ /* We tried a write call, but the server did not
+ * commit data to stable storage even though we
+ * requested it.
+ */
+ static unsigned long complain = 0;
+
+ if (time_before(complain, jiffies)) {
+ printk(KERN_NOTICE "NFS: faulty NFSv3 server %s:"
+ " (committed = %d) != (stable = %d)\n",
+ NFS_SERVER(inode)->hostname,
+ resp->verf->committed, argp->stable);
+ complain = jiffies + 300 * HZ;
+ }
+ }
+#endif
+
+ /* Update attributes as result of writeback. */
+ if (task->tk_status >= 0)
+ nfs_write_attributes(inode, resp->fattr);
+
+ while (!list_empty(&data->pages)) {
+ req = nfs_list_entry(data->pages.next);
+ nfs_list_remove_request(req);
+
+ kunmap(req->wb_page);
+
+ dprintk("NFS: write (%s/%s %d@%Ld)",
+ req->wb_file->f_dentry->d_parent->d_name.name,
+ req->wb_file->f_dentry->d_name.name,
+ req->wb_bytes,
+ page_offset(req->wb_page) + req->wb_offset);
+
+ if (task->tk_status < 0) {
+ req->wb_file->f_error = task->tk_status;
+ nfs_inode_remove_request(req);
+ dprintk(", error = %d\n", task->tk_status);
+ goto next;
+ }
+
+#ifdef CONFIG_NFS_V3
+ if (resp->verf->committed != NFS_UNSTABLE) {
+ nfs_inode_remove_request(req);
+ dprintk(" OK\n");
+ goto next;
+ }
+ memcpy(&req->wb_verf, resp->verf, sizeof(req->wb_verf));
+ req->wb_timeout = jiffies + NFS_COMMIT_DELAY;
+ nfs_mark_request_commit(req);
+ dprintk(" marked for commit\n");
+#else
+ nfs_inode_remove_request(req);
+#endif
+ next:
+ nfs_unlock_request(req);
+ }
+ nfs_writedata_release(task);
}
-int
-nfs_wb_all(struct inode *inode)
+
+#ifdef CONFIG_NFS_V3
+/*
+ * Set up the argument/result storage required for the RPC call.
+ */
+static void
+nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data)
{
- NFS_WB(inode, 1);
+ struct nfs_page *req;
+ struct dentry *dentry;
+ struct inode *inode;
+ unsigned long start, end, len;
+
+ /* Set up the RPC argument and reply structs
+ * NB: take care not to mess about with data->commit et al. */
+
+ end = 0;
+ start = ~0;
+ req = nfs_list_entry(head->next);
+ data->file = req->wb_file;
+ data->cred = req->wb_cred;
+ dentry = data->file->f_dentry;
+ inode = dentry->d_inode;
+ while (!list_empty(head)) {
+ struct nfs_page *req;
+ unsigned long rqstart, rqend;
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, &data->pages);
+ rqstart = page_offset(req->wb_page) + req->wb_offset;
+ rqend = rqstart + req->wb_bytes;
+ if (rqstart < start)
+ start = rqstart;
+ if (rqend > end)
+ end = rqend;
+ }
+ data->args.fh = NFS_FH(dentry);
+ data->args.offset = start;
+ len = end - start;
+ if (end >= inode->i_size || len > (~((u32)0) >> 1))
+ len = 0;
+ data->res.count = data->args.count = (u32)len;
+ data->res.fattr = &data->fattr;
+ data->res.verf = &data->verf;
}
/*
- * Write back all requests on one page - we do this before reading it.
+ * Commit dirty pages
*/
-int
-nfs_wb_page(struct inode *inode, struct page *page)
+static int
+nfs_commit_list(struct list_head *head, int how)
{
- NFS_WB(inode, req->wb_page == page);
+ struct rpc_message msg;
+ struct file *file;
+ struct rpc_clnt *clnt;
+ struct nfs_write_data *data;
+ struct rpc_task *task;
+ struct nfs_page *req;
+ int flags,
+ async = !(how & FLUSH_SYNC);
+ sigset_t oldset;
+
+ data = nfs_writedata_alloc();
+
+ if (!data)
+ goto out_bad;
+ task = &data->task;
+
+ flags = (async) ? RPC_TASK_ASYNC : 0;
+
+ /* Set up the argument struct */
+ nfs_commit_rpcsetup(head, data);
+ req = nfs_list_entry(data->pages.next);
+ file = req->wb_file;
+ clnt = NFS_CLIENT(file->f_dentry->d_inode);
+
+ rpc_init_task(task, clnt, nfs_commit_done, flags);
+ task->tk_calldata = data;
+
+ msg.rpc_proc = NFS3PROC_COMMIT;
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ msg.rpc_cred = data->cred;
+
+ dprintk("NFS: %4d initiated commit call\n", task->tk_pid);
+ rpc_clnt_sigmask(clnt, &oldset);
+ rpc_call_setup(task, &msg, 0);
+ rpc_execute(task);
+ rpc_clnt_sigunmask(clnt, &oldset);
+ return 0;
+ out_bad:
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_mark_request_commit(req);
+ nfs_unlock_request(req);
+ }
+ return -ENOMEM;
}
/*
- * Write back all pending writes from one file descriptor..
+ * COMMIT call returned
*/
-int
-nfs_wb_file(struct inode *inode, struct file *file)
+static void
+nfs_commit_done(struct rpc_task *task)
{
- NFS_WB(inode, req->wb_file == file);
+ struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata;
+ struct nfs_writeres *resp = &data->res;
+ struct nfs_page *req;
+ struct dentry *dentry = data->file->f_dentry;
+ struct inode *inode = dentry->d_inode;
+
+ dprintk("NFS: %4d nfs_commit_done (status %d)\n",
+ task->tk_pid, task->tk_status);
+
+ nfs_refresh_inode(inode, resp->fattr);
+ while (!list_empty(&data->pages)) {
+ req = nfs_list_entry(data->pages.next);
+ nfs_list_remove_request(req);
+
+ dprintk("NFS: commit (%s/%s %d@%ld)",
+ req->wb_file->f_dentry->d_parent->d_name.name,
+ req->wb_file->f_dentry->d_name.name,
+ req->wb_bytes,
+ page_offset(req->wb_page) + req->wb_offset);
+ if (task->tk_status < 0) {
+ req->wb_file->f_error = task->tk_status;
+ nfs_inode_remove_request(req);
+ dprintk(", error = %d\n", task->tk_status);
+ goto next;
+ }
+
+ /* Okay, COMMIT succeeded, apparently. Check the verifier
+ * returned by the server against all stored verfs. */
+ if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
+ /* We have a match */
+ nfs_inode_remove_request(req);
+ dprintk(" OK\n");
+ goto next;
+ }
+ /* We have a mismatch. Write the page again */
+ dprintk(" mismatch\n");
+ nfs_mark_request_dirty(req);
+ next:
+ nfs_unlock_request(req);
+ }
+ nfs_writedata_release(task);
}
+#endif
-void
-nfs_inval(struct inode *inode)
+int nfs_flush_file(struct inode *inode, struct file *file, unsigned long start,
+ unsigned int count, int how)
{
- nfs_cancel_dirty(inode,0);
+ LIST_HEAD(head);
+ int pages,
+ error = 0;
+
+ pages = nfs_scan_dirty(inode, &head, file, start, count);
+ if (pages)
+ error = nfs_flush_list(inode, &head, how);
+ if (error < 0)
+ return error;
+ return pages;
}
-/*
- * The following procedures make up the writeback finite state machinery:
- *
- * 1. Try to lock the page if not yet locked by us,
- * set up the RPC call info, and pass to the call FSM.
- */
-static void
-nfs_wback_begin(struct rpc_task *task)
+int nfs_flush_timeout(struct inode *inode, int how)
{
- struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata;
- struct page *page = req->wb_page;
- struct file *file = req->wb_file;
- struct dentry *dentry = file->f_dentry;
+ LIST_HEAD(head);
+ int pages,
+ error = 0;
- dprintk("NFS: %4d nfs_wback_begin (%s/%s, status=%d flags=%x)\n",
- task->tk_pid, dentry->d_parent->d_name.name,
- dentry->d_name.name, task->tk_status, req->wb_flags);
+ pages = nfs_scan_dirty_timeout(inode, &head);
+ if (pages)
+ error = nfs_flush_list(inode, &head, how);
+ if (error < 0)
+ return error;
+ return pages;
+}
- task->tk_status = 0;
+#ifdef CONFIG_NFS_V3
+int nfs_commit_file(struct inode *inode, struct file *file, unsigned long start,
+ unsigned int count, int how)
+{
+ LIST_HEAD(head);
+ int pages,
+ error = 0;
- /* Setup the task struct for a writeback call */
- req->wb_flags |= NFS_WRITE_INPROGRESS;
- req->wb_args.fh = NFS_FH(dentry);
- req->wb_args.offset = (page->index << PAGE_CACHE_SHIFT) + req->wb_offset;
- req->wb_args.count = req->wb_bytes;
- req->wb_args.buffer = (void *) (page_address(page) + req->wb_offset);
+ pages = nfs_scan_commit(inode, &head, file, start, count);
+ if (pages)
+ error = nfs_commit_list(&head, how);
+ if (error < 0)
+ return error;
+ return pages;
+}
- rpc_call_setup(task, NFSPROC_WRITE, &req->wb_args, &req->wb_fattr, 0);
+int nfs_commit_timeout(struct inode *inode, int how)
+{
+ LIST_HEAD(head);
+ int pages,
+ error = 0;
- return;
+ pages = nfs_scan_commit_timeout(inode, &head);
+ if (pages) {
+ pages += nfs_scan_commit(inode, &head, NULL, 0, 0);
+ error = nfs_commit_list(&head, how);
+ }
+ if (error < 0)
+ return error;
+ return pages;
}
+#endif
-/*
- * 2. Collect the result
- */
-static void
-nfs_wback_result(struct rpc_task *task)
+int nfs_sync_file(struct inode *inode, struct file *file, unsigned long start,
+ unsigned int count, int how)
{
- struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata;
- struct file *file = req->wb_file;
- struct page *page = req->wb_page;
- int status = task->tk_status;
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
+ int error,
+ wait;
- dprintk("NFS: %4d nfs_wback_result (%s/%s, status=%d, flags=%x)\n",
- task->tk_pid, dentry->d_parent->d_name.name,
- dentry->d_name.name, status, req->wb_flags);
-
- /* Set the WRITE_COMPLETE flag, but leave WRITE_INPROGRESS set */
- req->wb_flags |= NFS_WRITE_COMPLETE;
- req->wb_status = status;
-
- if (status < 0) {
- req->wb_flags |= NFS_WRITE_INVALIDATE;
- file->f_error = status;
- } else if (!WB_CANCELLED(req)) {
- struct nfs_fattr *fattr = &req->wb_fattr;
- /* Update attributes as result of writeback.
- * Beware: when UDP replies arrive out of order, we
- * may end up overwriting a previous, bigger file size.
- *
- * When the file size shrinks we cancel all pending
- * writebacks.
- */
- if (fattr->mtime.seconds >= inode->i_mtime) {
- if (fattr->size < inode->i_size)
- fattr->size = inode->i_size;
-
- /* possible Solaris 2.5 server bug workaround */
- if (inode->i_ino == fattr->fileid) {
- /*
- * We expect these values to change, and
- * don't want to invalidate the caches.
- */
- inode->i_size = fattr->size;
- inode->i_mtime = fattr->mtime.seconds;
- nfs_refresh_inode(inode, fattr);
- }
- else
- printk("nfs_wback_result: inode %ld, got %u?\n",
- inode->i_ino, fattr->fileid);
- }
- }
+ wait = how & FLUSH_WAIT;
+ how &= ~FLUSH_WAIT;
- rpc_release_task(task);
+ if (!inode && file)
+ inode = file->f_dentry->d_inode;
- if (WB_INVALIDATE(req))
- ClearPageUptodate(page);
+ do {
+ error = 0;
+ if (wait)
+ error = nfs_wait_on_requests(inode, file, start, count);
+ if (error == 0)
+ error = nfs_flush_file(inode, file, start, count, how);
+#ifdef CONFIG_NFS_V3
+ if (error == 0)
+ error = nfs_commit_file(inode, file, start, count, how);
+#endif
+ } while (error > 0);
+ return error;
+}
- kunmap(page);
- __free_page(page);
- remove_write_request(&NFS_WRITEBACK(inode), req);
- nr_write_requests--;
- fput(req->wb_file);
+int nfs_init_nfspagecache(void)
+{
+ nfs_page_cachep = kmem_cache_create("nfs_page",
+ sizeof(struct nfs_page),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (nfs_page_cachep == NULL)
+ return -ENOMEM;
- wake_up(&req->wb_wait);
- free_write_request(req);
+ nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
+ sizeof(struct nfs_write_data),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (nfs_wdata_cachep == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void nfs_destroy_nfspagecache(void)
+{
+ if (kmem_cache_destroy(nfs_page_cachep))
+ printk(KERN_INFO "nfs_page: not all structures were freed\n");
+ if (kmem_cache_destroy(nfs_wdata_cachep))
+ printk(KERN_INFO "nfs_write_data: not all structures were freed\n");
}
+
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)