FreeBSD/Linux Kernel Cross Reference
sys/fs/nfs/write.c
1 /*
2 * linux/fs/nfs/write.c
3 *
4 * Writing file data over NFS.
5 *
6 * We do it like this: When a (user) process wishes to write data to an
7 * NFS file, a write request is allocated that contains the RPC task data
8 * plus some info on the page to be written, and added to the inode's
9 * write chain. If the process writes past the end of the page, an async
10 * RPC call to write the page is scheduled immediately; otherwise, the call
11 * is delayed for a few seconds.
12 *
13 * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
14 *
15 * Write requests are kept on the inode's writeback list. Each entry in
16 * that list references the page (portion) to be written. When the
17 * cache timeout has expired, the RPC task is woken up, and tries to
18 * lock the page. As soon as it manages to do so, the request is moved
19 * from the writeback list to the writelock list.
20 *
21 * Note: we must make sure never to confuse the inode passed in the
22 * write_page request with the one in page->inode. As far as I understand
23 * it, these are different when doing a swap-out.
24 *
25 * To understand everything that goes on here and in the NFS read code,
26 * one should be aware that a page is locked in exactly one of the following
27 * cases:
28 *
29 * - A write request is in progress.
30 * - A user process is in generic_file_write/nfs_update_page
31 * - A user process is in generic_file_read
32 *
33 * Also note that because of the way pages are invalidated in
34 * nfs_revalidate_inode, the following assertions hold:
35 *
36 * - If a page is dirty, there will be no read requests (a page will
37 * not be re-read unless invalidated by nfs_revalidate_inode).
38 * - If the page is not uptodate, there will be no pending write
39 * requests, and no process will be in nfs_update_page.
40 *
41 * FIXME: Interaction with the vmscan routines is not optimal yet.
42 * Either vmscan must be made nfs-savvy, or we need a different page
43 * reclaim concept that supports something like FS-independent
44 * buffer_heads with a b_ops-> field.
45 *
46 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
47 */
48
49 #include <linux/config.h>
50 #include <linux/types.h>
51 #include <linux/slab.h>
52 #include <linux/swap.h>
53 #include <linux/pagemap.h>
54 #include <linux/file.h>
55
56 #include <linux/sunrpc/clnt.h>
57 #include <linux/nfs_fs.h>
58 #include <linux/nfs_mount.h>
59 #include <linux/nfs_flushd.h>
60 #include <linux/nfs_page.h>
61 #include <asm/uaccess.h>
62 #include <linux/smp_lock.h>
63
64 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
65
66 /*
67 * Local structures
68 *
69 * This is the struct where the WRITE/COMMIT arguments go.
70 */
71 struct nfs_write_data {
72 struct rpc_task task;
73 struct inode *inode;
74 struct rpc_cred *cred;
75 struct nfs_writeargs args; /* argument struct */
76 struct nfs_writeres res; /* result struct */
77 struct nfs_fattr fattr;
78 struct nfs_writeverf verf;
79 struct list_head pages; /* Coalesced requests we wish to flush */
80 struct page *pagevec[NFS_WRITE_MAXIOV];
81 };
82
83 /*
84 * Local function declarations
85 */
86 static struct nfs_page * nfs_update_request(struct file*, struct inode *,
87 struct page *,
88 unsigned int, unsigned int);
89 static void nfs_strategy(struct inode *inode);
90 static void nfs_writeback_done(struct rpc_task *);
91 #ifdef CONFIG_NFS_V3
92 static void nfs_commit_done(struct rpc_task *);
93 #endif
94
95 /* Hack for future NFS swap support */
96 #ifndef IS_SWAPFILE
97 # define IS_SWAPFILE(inode) (0)
98 #endif
99
100 static kmem_cache_t *nfs_wdata_cachep;
101
102 static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
103 {
104 struct nfs_write_data *p;
105 p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NOFS);
106 if (p) {
107 memset(p, 0, sizeof(*p));
108 INIT_LIST_HEAD(&p->pages);
109 p->args.pages = p->pagevec;
110 }
111 return p;
112 }
113
114 static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
115 {
116 kmem_cache_free(nfs_wdata_cachep, p);
117 }
118
119 static void nfs_writedata_release(struct rpc_task *task)
120 {
121 struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata;
122 nfs_writedata_free(wdata);
123 }
124
125 /*
126 * Write a page synchronously.
127 * Offset is the data offset within the page.
128 */
129 static int
130 nfs_writepage_sync(struct file *file, struct inode *inode, struct page *page,
131 unsigned int offset, unsigned int count)
132 {
133 struct rpc_cred *cred = NULL;
134 loff_t base;
135 unsigned int wsize = NFS_SERVER(inode)->wsize;
136 int result, refresh = 0, written = 0, flags;
137 u8 *buffer;
138 struct nfs_fattr fattr;
139 struct nfs_writeverf verf;
140
141
142 if (file)
143 cred = get_rpccred(nfs_file_cred(file));
144 if (!cred)
145 cred = get_rpccred(NFS_I(inode)->mm_cred);
146
147 dprintk("NFS: nfs_writepage_sync(%x/%Ld %d@%Ld)\n",
148 inode->i_dev, (long long)NFS_FILEID(inode),
149 count, (long long)(page_offset(page) + offset));
150
151 base = page_offset(page) + offset;
152
153 flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC;
154
155 do {
156 if (count < wsize && !IS_SWAPFILE(inode))
157 wsize = count;
158
159 result = NFS_PROTO(inode)->write(inode, cred, &fattr, flags,
160 offset, wsize, page, &verf);
161 nfs_write_attributes(inode, &fattr);
162
163 if (result < 0) {
164 /* Must mark the page invalid after I/O error */
165 ClearPageUptodate(page);
166 goto io_error;
167 }
168 if (result != wsize)
169 printk("NFS: short write, wsize=%u, result=%d\n",
170 wsize, result);
171 refresh = 1;
172 buffer += wsize;
173 base += wsize;
174 offset += wsize;
175 written += wsize;
176 count -= wsize;
177 /*
178 * If we've extended the file, update the inode
179 * now so we don't invalidate the cache.
180 */
181 if (base > inode->i_size)
182 inode->i_size = base;
183 } while (count);
184
185 if (PageError(page))
186 ClearPageError(page);
187
188 io_error:
189 if (cred)
190 put_rpccred(cred);
191
192 return written? written : result;
193 }
194
195 static int
196 nfs_writepage_async(struct file *file, struct inode *inode, struct page *page,
197 unsigned int offset, unsigned int count)
198 {
199 struct nfs_page *req;
200 loff_t end;
201 int status;
202
203 req = nfs_update_request(file, inode, page, offset, count);
204 status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
205 if (status < 0)
206 goto out;
207 if (!req->wb_cred)
208 req->wb_cred = get_rpccred(NFS_I(inode)->mm_cred);
209 nfs_unlock_request(req);
210 nfs_strategy(inode);
211 end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
212 if (inode->i_size < end)
213 inode->i_size = end;
214
215 out:
216 return status;
217 }
218
219 /*
220 * Write an mmapped page to the server.
221 */
222 int
223 nfs_writepage(struct page *page)
224 {
225 struct inode *inode = page->mapping->host;
226 unsigned long end_index;
227 unsigned offset = PAGE_CACHE_SIZE;
228 int err;
229
230 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
231
232 /* Ensure we've flushed out any previous writes */
233 nfs_wb_page(inode,page);
234
235 /* easy case */
236 if (page->index < end_index)
237 goto do_it;
238 /* things got complicated... */
239 offset = inode->i_size & (PAGE_CACHE_SIZE-1);
240
241 /* OK, are we completely out? */
242 err = -EIO;
243 if (page->index >= end_index+1 || !offset)
244 goto out;
245 do_it:
246 lock_kernel();
247 if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode)) {
248 err = nfs_writepage_async(NULL, inode, page, 0, offset);
249 if (err >= 0)
250 err = 0;
251 } else {
252 err = nfs_writepage_sync(NULL, inode, page, 0, offset);
253 if (err == offset)
254 err = 0;
255 }
256 unlock_kernel();
257 out:
258 UnlockPage(page);
259 return err;
260 }
261
262 /*
263 * Check whether the file range we want to write to is locked by
264 * us.
265 */
266 static int
267 region_locked(struct inode *inode, struct nfs_page *req)
268 {
269 struct file_lock *fl;
270 loff_t rqstart, rqend;
271
272 /* Don't optimize writes if we don't use NLM */
273 if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
274 return 0;
275
276 rqstart = page_offset(req->wb_page) + req->wb_offset;
277 rqend = rqstart + req->wb_bytes;
278 for (fl = inode->i_flock; fl; fl = fl->fl_next) {
279 if (fl->fl_owner == current->files && (fl->fl_flags & FL_POSIX)
280 && fl->fl_type == F_WRLCK
281 && fl->fl_start <= rqstart && rqend <= fl->fl_end) {
282 return 1;
283 }
284 }
285
286 return 0;
287 }
288
289 /*
290 * Insert a write request into an inode
291 * Note: we sort the list in order to be able to optimize nfs_find_request()
292 * & co. for the 'write append' case. For 2.5 we may want to consider
293 * some form of hashing so as to perform well on random writes.
294 */
295 static inline void
296 nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
297 {
298 struct list_head *pos, *head;
299 unsigned long pg_idx = page_index(req->wb_page);
300
301 if (!list_empty(&req->wb_hash))
302 return;
303 if (!NFS_WBACK_BUSY(req))
304 printk(KERN_ERR "NFS: unlocked request attempted hashed!\n");
305 head = &inode->u.nfs_i.writeback;
306 if (list_empty(head))
307 igrab(inode);
308 list_for_each_prev(pos, head) {
309 struct nfs_page *entry = nfs_inode_wb_entry(pos);
310 if (page_index(entry->wb_page) < pg_idx)
311 break;
312 }
313 inode->u.nfs_i.npages++;
314 list_add(&req->wb_hash, pos);
315 req->wb_count++;
316 }
317
318 /*
319 * Insert a write request into an inode
320 */
321 static inline void
322 nfs_inode_remove_request(struct nfs_page *req)
323 {
324 struct inode *inode;
325 spin_lock(&nfs_wreq_lock);
326 if (list_empty(&req->wb_hash)) {
327 spin_unlock(&nfs_wreq_lock);
328 return;
329 }
330 if (!NFS_WBACK_BUSY(req))
331 printk(KERN_ERR "NFS: unlocked request attempted unhashed!\n");
332 inode = req->wb_inode;
333 list_del(&req->wb_hash);
334 INIT_LIST_HEAD(&req->wb_hash);
335 inode->u.nfs_i.npages--;
336 if ((inode->u.nfs_i.npages == 0) != list_empty(&inode->u.nfs_i.writeback))
337 printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n");
338 if (list_empty(&inode->u.nfs_i.writeback)) {
339 spin_unlock(&nfs_wreq_lock);
340 iput(inode);
341 } else
342 spin_unlock(&nfs_wreq_lock);
343 nfs_clear_request(req);
344 nfs_release_request(req);
345 }
346
347 /*
348 * Find a request
349 */
350 static inline struct nfs_page *
351 _nfs_find_request(struct inode *inode, struct page *page)
352 {
353 struct list_head *head, *pos;
354 unsigned long pg_idx = page_index(page);
355
356 head = &inode->u.nfs_i.writeback;
357 list_for_each_prev(pos, head) {
358 struct nfs_page *req = nfs_inode_wb_entry(pos);
359 unsigned long found_idx = page_index(req->wb_page);
360
361 if (pg_idx < found_idx)
362 continue;
363 if (pg_idx != found_idx)
364 break;
365 req->wb_count++;
366 return req;
367 }
368 return NULL;
369 }
370
371 static struct nfs_page *
372 nfs_find_request(struct inode *inode, struct page *page)
373 {
374 struct nfs_page *req;
375
376 spin_lock(&nfs_wreq_lock);
377 req = _nfs_find_request(inode, page);
378 spin_unlock(&nfs_wreq_lock);
379 return req;
380 }
381
382 /*
383 * Add a request to the inode's dirty list.
384 */
385 static inline void
386 nfs_mark_request_dirty(struct nfs_page *req)
387 {
388 struct inode *inode = req->wb_inode;
389
390 spin_lock(&nfs_wreq_lock);
391 nfs_list_add_request(req, &inode->u.nfs_i.dirty);
392 inode->u.nfs_i.ndirty++;
393 __nfs_del_lru(req);
394 __nfs_add_lru(&NFS_SERVER(inode)->lru_dirty, req);
395 spin_unlock(&nfs_wreq_lock);
396 mark_inode_dirty(inode);
397 }
398
399 /*
400 * Check if a request is dirty
401 */
402 static inline int
403 nfs_dirty_request(struct nfs_page *req)
404 {
405 struct inode *inode = req->wb_inode;
406 return !list_empty(&req->wb_list) && req->wb_list_head == &inode->u.nfs_i.dirty;
407 }
408
409 #ifdef CONFIG_NFS_V3
410 /*
411 * Add a request to the inode's commit list.
412 */
413 static inline void
414 nfs_mark_request_commit(struct nfs_page *req)
415 {
416 struct inode *inode = req->wb_inode;
417
418 spin_lock(&nfs_wreq_lock);
419 nfs_list_add_request(req, &inode->u.nfs_i.commit);
420 inode->u.nfs_i.ncommit++;
421 __nfs_del_lru(req);
422 __nfs_add_lru(&NFS_SERVER(inode)->lru_commit, req);
423 spin_unlock(&nfs_wreq_lock);
424 mark_inode_dirty(inode);
425 }
426 #endif
427
428 /*
429 * Wait for a request to complete.
430 *
431 * Interruptible by signals only if mounted with intr flag.
432 */
433 static int
434 nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_start, unsigned int npages)
435 {
436 struct list_head *p, *head;
437 unsigned long idx_end;
438 unsigned int res = 0;
439 int error;
440
441 if (npages == 0)
442 idx_end = ~0;
443 else
444 idx_end = idx_start + npages - 1;
445
446 head = &inode->u.nfs_i.writeback;
447 restart:
448 spin_lock(&nfs_wreq_lock);
449 list_for_each_prev(p, head) {
450 unsigned long pg_idx;
451 struct nfs_page *req = nfs_inode_wb_entry(p);
452
453 if (file && req->wb_file != file)
454 continue;
455
456 pg_idx = page_index(req->wb_page);
457 if (pg_idx < idx_start)
458 break;
459 if (pg_idx > idx_end)
460 continue;
461
462 if (!NFS_WBACK_BUSY(req))
463 continue;
464 req->wb_count++;
465 spin_unlock(&nfs_wreq_lock);
466 error = nfs_wait_on_request(req);
467 nfs_release_request(req);
468 if (error < 0)
469 return error;
470 res++;
471 goto restart;
472 }
473 spin_unlock(&nfs_wreq_lock);
474 return res;
475 }
476
477 /**
478 * nfs_scan_lru_dirty_timeout - Scan LRU list for timed out dirty requests
479 * @server: NFS superblock data
480 * @dst: destination list
481 *
482 * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list.
483 * The elements are checked to ensure that they form a contiguous set
484 * of pages, and that they originated from the same file.
485 */
486 int
487 nfs_scan_lru_dirty_timeout(struct nfs_server *server, struct list_head *dst)
488 {
489 struct inode *inode;
490 int npages;
491
492 npages = nfs_scan_lru_timeout(&server->lru_dirty, dst, server->wpages);
493 if (npages) {
494 inode = nfs_list_entry(dst->next)->wb_inode;
495 inode->u.nfs_i.ndirty -= npages;
496 }
497 return npages;
498 }
499
500 /**
501 * nfs_scan_lru_dirty - Scan LRU list for dirty requests
502 * @server: NFS superblock data
503 * @dst: destination list
504 *
505 * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list.
506 * The elements are checked to ensure that they form a contiguous set
507 * of pages, and that they originated from the same file.
508 */
509 int
510 nfs_scan_lru_dirty(struct nfs_server *server, struct list_head *dst)
511 {
512 struct inode *inode;
513 int npages;
514
515 npages = nfs_scan_lru(&server->lru_dirty, dst, server->wpages);
516 if (npages) {
517 inode = nfs_list_entry(dst->next)->wb_inode;
518 inode->u.nfs_i.ndirty -= npages;
519 }
520 return npages;
521 }
522
523 /*
524 * nfs_scan_dirty - Scan an inode for dirty requests
525 * @inode: NFS inode to scan
526 * @dst: destination list
527 * @file: if set, ensure we match requests from this file
528 * @idx_start: lower bound of page->index to scan.
529 * @npages: idx_start + npages sets the upper bound to scan.
530 *
531 * Moves requests from the inode's dirty page list.
532 * The requests are *not* checked to ensure that they form a contiguous set.
533 */
534 static int
535 nfs_scan_dirty(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages)
536 {
537 int res;
538 res = nfs_scan_list(&inode->u.nfs_i.dirty, dst, file, idx_start, npages);
539 inode->u.nfs_i.ndirty -= res;
540 if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty))
541 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
542 return res;
543 }
544
545 #ifdef CONFIG_NFS_V3
546 /**
547 * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests
548 * @server: NFS superblock data
549 * @dst: destination list
550 *
551 * Finds the first a timed out request in the NFS commit LRU list and moves it
552 * to the list dst. If such an element is found, we move all other commit
553 * requests that apply to the same inode.
554 * The assumption is that doing everything in a single commit-to-disk is
555 * the cheaper alternative.
556 */
557 int
558 nfs_scan_lru_commit_timeout(struct nfs_server *server, struct list_head *dst)
559 {
560 struct inode *inode;
561 int npages;
562
563 npages = nfs_scan_lru_timeout(&server->lru_commit, dst, 1);
564 if (npages) {
565 inode = nfs_list_entry(dst->next)->wb_inode;
566 npages += nfs_scan_list(&inode->u.nfs_i.commit, dst, NULL, 0, 0);
567 inode->u.nfs_i.ncommit -= npages;
568 }
569 return npages;
570 }
571
572
573 /**
574 * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests
575 * @server: NFS superblock data
576 * @dst: destination list
577 *
578 * Finds the first request in the NFS commit LRU list and moves it
579 * to the list dst. If such an element is found, we move all other commit
580 * requests that apply to the same inode.
581 * The assumption is that doing everything in a single commit-to-disk is
582 * the cheaper alternative.
583 */
584 int
585 nfs_scan_lru_commit(struct nfs_server *server, struct list_head *dst)
586 {
587 struct inode *inode;
588 int npages;
589
590 npages = nfs_scan_lru(&server->lru_commit, dst, 1);
591 if (npages) {
592 inode = nfs_list_entry(dst->next)->wb_inode;
593 npages += nfs_scan_list(&inode->u.nfs_i.commit, dst, NULL, 0, 0);
594 inode->u.nfs_i.ncommit -= npages;
595 }
596 return npages;
597 }
598
599 /*
600 * nfs_scan_commit - Scan an inode for commit requests
601 * @inode: NFS inode to scan
602 * @dst: destination list
603 * @file: if set, ensure we collect requests from this file only.
604 * @idx_start: lower bound of page->index to scan.
605 * @npages: idx_start + npages sets the upper bound to scan.
606 *
607 * Moves requests from the inode's 'commit' request list.
608 * The requests are *not* checked to ensure that they form a contiguous set.
609 */
610 static int
611 nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages)
612 {
613 int res;
614 res = nfs_scan_list(&inode->u.nfs_i.commit, dst, file, idx_start, npages);
615 inode->u.nfs_i.ncommit -= res;
616 if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit))
617 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
618 return res;
619 }
620 #endif
621
622
623 /*
624 * Try to update any existing write request, or create one if there is none.
625 * In order to match, the request's credentials must match those of
626 * the calling process.
627 *
628 * Note: Should always be called with the Page Lock held!
629 */
630 static struct nfs_page *
631 nfs_update_request(struct file* file, struct inode *inode, struct page *page,
632 unsigned int offset, unsigned int bytes)
633 {
634 struct nfs_page *req, *new = NULL;
635 unsigned long rqend, end;
636
637 end = offset + bytes;
638
639 for (;;) {
640 /* Loop over all inode entries and see if we find
641 * A request for the page we wish to update
642 */
643 spin_lock(&nfs_wreq_lock);
644 req = _nfs_find_request(inode, page);
645 if (req) {
646 if (!nfs_lock_request_dontget(req)) {
647 int error;
648 spin_unlock(&nfs_wreq_lock);
649 error = nfs_wait_on_request(req);
650 nfs_release_request(req);
651 if (error < 0)
652 return ERR_PTR(error);
653 continue;
654 }
655 spin_unlock(&nfs_wreq_lock);
656 if (new)
657 nfs_release_request(new);
658 break;
659 }
660
661 if (new) {
662 nfs_lock_request_dontget(new);
663 nfs_inode_add_request(inode, new);
664 spin_unlock(&nfs_wreq_lock);
665 nfs_mark_request_dirty(new);
666 return new;
667 }
668 spin_unlock(&nfs_wreq_lock);
669
670 new = nfs_create_request(nfs_file_cred(file), inode, page, offset, bytes);
671 if (IS_ERR(new))
672 return new;
673 if (file) {
674 new->wb_file = file;
675 get_file(file);
676 }
677 /* If the region is locked, adjust the timeout */
678 if (region_locked(inode, new))
679 new->wb_timeout = jiffies + NFS_WRITEBACK_LOCKDELAY;
680 else
681 new->wb_timeout = jiffies + NFS_WRITEBACK_DELAY;
682 }
683
684 /* We have a request for our page.
685 * If the creds don't match, or the
686 * page addresses don't match,
687 * tell the caller to wait on the conflicting
688 * request.
689 */
690 rqend = req->wb_offset + req->wb_bytes;
691 if (req->wb_file != file
692 || req->wb_page != page
693 || !nfs_dirty_request(req)
694 || offset > rqend || end < req->wb_offset) {
695 nfs_unlock_request(req);
696 return ERR_PTR(-EBUSY);
697 }
698
699 /* Okay, the request matches. Update the region */
700 if (offset < req->wb_offset) {
701 req->wb_offset = offset;
702 req->wb_bytes = rqend - req->wb_offset;
703 }
704
705 if (end > rqend)
706 req->wb_bytes = end - req->wb_offset;
707
708 return req;
709 }
710
711 /*
712 * This is the strategy routine for NFS.
713 * It is called by nfs_updatepage whenever the user wrote up to the end
714 * of a page.
715 *
716 * We always try to submit a set of requests in parallel so that the
717 * server's write code can gather writes. This is mainly for the benefit
718 * of NFSv2.
719 *
720 * We never submit more requests than we think the remote can handle.
721 * For UDP sockets, we make sure we don't exceed the congestion window;
722 * for TCP, we limit the number of requests to 8.
723 *
724 * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that
725 * should be sent out in one go. This is for the benefit of NFSv2 servers
726 * that perform write gathering.
727 *
728 * FIXME: Different servers may have different sweet spots.
729 * Record the average congestion window in server struct?
730 */
731 #define NFS_STRATEGY_PAGES 8
732 static void
733 nfs_strategy(struct inode *inode)
734 {
735 unsigned int dirty, wpages;
736
737 dirty = inode->u.nfs_i.ndirty;
738 wpages = NFS_SERVER(inode)->wpages;
739 #ifdef CONFIG_NFS_V3
740 if (NFS_PROTO(inode)->version == 2) {
741 if (dirty >= NFS_STRATEGY_PAGES * wpages)
742 nfs_flush_file(inode, NULL, 0, 0, 0);
743 } else if (dirty >= wpages)
744 nfs_flush_file(inode, NULL, 0, 0, 0);
745 #else
746 if (dirty >= NFS_STRATEGY_PAGES * wpages)
747 nfs_flush_file(inode, NULL, 0, 0, 0);
748 #endif
749 }
750
751 int
752 nfs_flush_incompatible(struct file *file, struct page *page)
753 {
754 struct rpc_cred *cred = nfs_file_cred(file);
755 struct inode *inode = page->mapping->host;
756 struct nfs_page *req;
757 int status = 0;
758 /*
759 * Look for a request corresponding to this page. If there
760 * is one, and it belongs to another file, we flush it out
761 * before we try to copy anything into the page. Do this
762 * due to the lack of an ACCESS-type call in NFSv2.
763 * Also do the same if we find a request from an existing
764 * dropped page.
765 */
766 req = nfs_find_request(inode,page);
767 if (req) {
768 if (req->wb_file != file || req->wb_cred != cred || req->wb_page != page)
769 status = nfs_wb_page(inode, page);
770 nfs_release_request(req);
771 }
772 return (status < 0) ? status : 0;
773 }
774
775 /*
776 * Update and possibly write a cached page of an NFS file.
777 *
778 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
779 * things with a page scheduled for an RPC call (e.g. invalidate it).
780 */
781 int
782 nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count)
783 {
784 struct dentry *dentry = file->f_dentry;
785 struct inode *inode = page->mapping->host;
786 struct nfs_page *req;
787 loff_t end;
788 int status = 0;
789
790 dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
791 dentry->d_parent->d_name.name, dentry->d_name.name,
792 count, (long long)(page_offset(page) +offset));
793
794 /*
795 * If wsize is smaller than page size, update and write
796 * page synchronously.
797 */
798 if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE || IS_SYNC(inode)) {
799 status = nfs_writepage_sync(file, inode, page, offset, count);
800 if (status > 0) {
801 if (offset == 0 && status == PAGE_CACHE_SIZE)
802 SetPageUptodate(page);
803 return 0;
804 }
805 return status;
806 }
807
808 /*
809 * Try to find an NFS request corresponding to this page
810 * and update it.
811 * If the existing request cannot be updated, we must flush
812 * it out now.
813 */
814 do {
815 req = nfs_update_request(file, inode, page, offset, count);
816 status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
817 if (status != -EBUSY)
818 break;
819 /* Request could not be updated. Flush it out and try again */
820 status = nfs_wb_page(inode, page);
821 } while (status >= 0);
822 if (status < 0)
823 goto done;
824
825 status = 0;
826 end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
827 if (inode->i_size < end)
828 inode->i_size = end;
829
830 /* If we wrote past the end of the page.
831 * Call the strategy routine so it can send out a bunch
832 * of requests.
833 */
834 if (req->wb_offset == 0 && req->wb_bytes == PAGE_CACHE_SIZE) {
835 SetPageUptodate(page);
836 nfs_unlock_request(req);
837 nfs_strategy(inode);
838 } else
839 nfs_unlock_request(req);
840 done:
841 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
842 status, (long long)inode->i_size);
843 if (status < 0)
844 ClearPageUptodate(page);
845 return status;
846 }
847
848 /*
849 * Set up the argument/result storage required for the RPC call.
850 */
851 static void
852 nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data)
853 {
854 struct nfs_page *req;
855 struct page **pages;
856 unsigned int count;
857
858 /* Set up the RPC argument and reply structs
859 * NB: take care not to mess about with data->commit et al. */
860
861 pages = data->args.pages;
862 count = 0;
863 while (!list_empty(head)) {
864 struct nfs_page *req = nfs_list_entry(head->next);
865 nfs_list_remove_request(req);
866 nfs_list_add_request(req, &data->pages);
867 *pages++ = req->wb_page;
868 count += req->wb_bytes;
869 }
870 req = nfs_list_entry(data->pages.next);
871 data->inode = req->wb_inode;
872 data->cred = req->wb_cred;
873 data->args.fh = NFS_FH(req->wb_inode);
874 data->args.offset = page_offset(req->wb_page) + req->wb_offset;
875 data->args.pgbase = req->wb_offset;
876 data->args.count = count;
877 data->res.fattr = &data->fattr;
878 data->res.count = count;
879 data->res.verf = &data->verf;
880 }
881
882
883 /*
884 * Create an RPC task for the given write request and kick it.
885 * The page must have been locked by the caller.
886 *
887 * It may happen that the page we're passed is not marked dirty.
888 * This is the case if nfs_updatepage detects a conflicting request
889 * that has been written but not committed.
890 */
891 static int
892 nfs_flush_one(struct list_head *head, struct inode *inode, int how)
893 {
894 struct rpc_clnt *clnt = NFS_CLIENT(inode);
895 struct nfs_write_data *data;
896 struct rpc_task *task;
897 struct rpc_message msg;
898 int flags,
899 nfsvers = NFS_PROTO(inode)->version,
900 async = !(how & FLUSH_SYNC),
901 stable = (how & FLUSH_STABLE);
902 sigset_t oldset;
903
904
905 data = nfs_writedata_alloc();
906 if (!data)
907 goto out_bad;
908 task = &data->task;
909
910 /* Set the initial flags for the task. */
911 flags = (async) ? RPC_TASK_ASYNC : 0;
912
913 /* Set up the argument struct */
914 nfs_write_rpcsetup(head, data);
915 if (nfsvers < 3)
916 data->args.stable = NFS_FILE_SYNC;
917 else if (stable) {
918 if (!inode->u.nfs_i.ncommit)
919 data->args.stable = NFS_FILE_SYNC;
920 else
921 data->args.stable = NFS_DATA_SYNC;
922 } else
923 data->args.stable = NFS_UNSTABLE;
924
925 /* Finalize the task. */
926 rpc_init_task(task, clnt, nfs_writeback_done, flags);
927 task->tk_calldata = data;
928 /* Release requests */
929 task->tk_release = nfs_writedata_release;
930
931 #ifdef CONFIG_NFS_V3
932 msg.rpc_proc = (nfsvers == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE;
933 #else
934 msg.rpc_proc = NFSPROC_WRITE;
935 #endif
936 msg.rpc_argp = &data->args;
937 msg.rpc_resp = &data->res;
938 msg.rpc_cred = data->cred;
939
940 dprintk("NFS: %4d initiated write call (req %x/%Ld count %u)\n",
941 task->tk_pid,
942 inode->i_dev,
943 (long long)NFS_FILEID(inode),
944 data->args.count);
945
946 rpc_clnt_sigmask(clnt, &oldset);
947 rpc_call_setup(task, &msg, 0);
948 lock_kernel();
949 rpc_execute(task);
950 unlock_kernel();
951 rpc_clnt_sigunmask(clnt, &oldset);
952 return 0;
953 out_bad:
954 while (!list_empty(head)) {
955 struct nfs_page *req = nfs_list_entry(head->next);
956 nfs_list_remove_request(req);
957 nfs_mark_request_dirty(req);
958 nfs_unlock_request(req);
959 }
960 return -ENOMEM;
961 }
962
963 int
964 nfs_flush_list(struct list_head *head, int wpages, int how)
965 {
966 LIST_HEAD(one_request);
967 struct nfs_page *req;
968 int error = 0;
969 unsigned int pages = 0;
970
971 while (!list_empty(head)) {
972 pages += nfs_coalesce_requests(head, &one_request, wpages);
973 req = nfs_list_entry(one_request.next);
974 error = nfs_flush_one(&one_request, req->wb_inode, how);
975 if (error < 0)
976 break;
977 }
978 if (error >= 0)
979 return pages;
980
981 while (!list_empty(head)) {
982 req = nfs_list_entry(head->next);
983 nfs_list_remove_request(req);
984 nfs_mark_request_dirty(req);
985 nfs_unlock_request(req);
986 }
987 return error;
988 }
989
990
991 /*
992 * This function is called when the WRITE call is complete.
993 */
994 static void
995 nfs_writeback_done(struct rpc_task *task)
996 {
997 struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
998 struct nfs_writeargs *argp = &data->args;
999 struct nfs_writeres *resp = &data->res;
1000 struct inode *inode = data->inode;
1001 struct nfs_page *req;
1002 struct page *page;
1003
1004 dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
1005 task->tk_pid, task->tk_status);
1006
1007 if (nfs_async_handle_jukebox(task))
1008 return;
1009
1010 /* We can't handle that yet but we check for it nevertheless */
1011 if (resp->count < argp->count && task->tk_status >= 0) {
1012 static unsigned long complain;
1013 if (time_before(complain, jiffies)) {
1014 printk(KERN_WARNING
1015 "NFS: Server wrote less than requested.\n");
1016 complain = jiffies + 300 * HZ;
1017 }
1018 /* Can't do anything about it right now except throw
1019 * an error. */
1020 task->tk_status = -EIO;
1021 }
1022 #ifdef CONFIG_NFS_V3
1023 if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
1024 /* We tried a write call, but the server did not
1025 * commit data to stable storage even though we
1026 * requested it.
1027 * Note: There is a known bug in Tru64 < 5.0 in which
1028 * the server reports NFS_DATA_SYNC, but performs
1029 * NFS_FILE_SYNC. We therefore implement this checking
1030 * as a dprintk() in order to avoid filling syslog.
1031 */
1032 static unsigned long complain;
1033
1034 if (time_before(complain, jiffies)) {
1035 dprintk("NFS: faulty NFSv3 server %s:"
1036 " (committed = %d) != (stable = %d)\n",
1037 NFS_SERVER(inode)->hostname,
1038 resp->verf->committed, argp->stable);
1039 complain = jiffies + 300 * HZ;
1040 }
1041 }
1042 #endif
1043
1044 /*
1045 * Update attributes as result of writeback.
1046 * FIXME: There is an inherent race with invalidate_inode_pages and
1047 * writebacks since the page->count is kept > 1 for as long
1048 * as the page has a write request pending.
1049 */
1050 nfs_write_attributes(inode, resp->fattr);
1051 while (!list_empty(&data->pages)) {
1052 req = nfs_list_entry(data->pages.next);
1053 nfs_list_remove_request(req);
1054 page = req->wb_page;
1055
1056 dprintk("NFS: write (%x/%Ld %d@%Ld)",
1057 req->wb_inode->i_dev,
1058 (long long)NFS_FILEID(req->wb_inode),
1059 req->wb_bytes,
1060 (long long)(page_offset(page) + req->wb_offset));
1061
1062 if (task->tk_status < 0) {
1063 ClearPageUptodate(page);
1064 SetPageError(page);
1065 if (req->wb_file)
1066 req->wb_file->f_error = task->tk_status;
1067 nfs_inode_remove_request(req);
1068 dprintk(", error = %d\n", task->tk_status);
1069 goto next;
1070 }
1071
1072 #ifdef CONFIG_NFS_V3
1073 if (argp->stable != NFS_UNSTABLE || resp->verf->committed == NFS_FILE_SYNC) {
1074 nfs_inode_remove_request(req);
1075 dprintk(" OK\n");
1076 goto next;
1077 }
1078 memcpy(&req->wb_verf, resp->verf, sizeof(req->wb_verf));
1079 req->wb_timeout = jiffies + NFS_COMMIT_DELAY;
1080 nfs_mark_request_commit(req);
1081 dprintk(" marked for commit\n");
1082 #else
1083 nfs_inode_remove_request(req);
1084 #endif
1085 next:
1086 nfs_unlock_request(req);
1087 }
1088 }
1089
1090
1091 #ifdef CONFIG_NFS_V3
1092 /*
1093 * Set up the argument/result storage required for the RPC call.
1094 */
1095 static void
1096 nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data)
1097 {
1098 struct nfs_page *first, *last;
1099 struct inode *inode;
1100 loff_t start, end, len;
1101
1102 /* Set up the RPC argument and reply structs
1103 * NB: take care not to mess about with data->commit et al. */
1104
1105 list_splice(head, &data->pages);
1106 INIT_LIST_HEAD(head);
1107 first = nfs_list_entry(data->pages.next);
1108 last = nfs_list_entry(data->pages.prev);
1109 inode = first->wb_inode;
1110
1111 /*
1112 * Determine the offset range of requests in the COMMIT call.
1113 * We rely on the fact that data->pages is an ordered list...
1114 */
1115 start = page_offset(first->wb_page) + first->wb_offset;
1116 end = page_offset(last->wb_page) + (last->wb_offset + last->wb_bytes);
1117 len = end - start;
1118 /* If 'len' is not a 32-bit quantity, pass '' in the COMMIT call */
1119 if (end >= inode->i_size || len < 0 || len > (~((u32)0) >> 1))
1120 len = 0;
1121
1122 data->inode = inode;
1123 data->cred = first->wb_cred;
1124 data->args.fh = NFS_FH(inode);
1125 data->args.offset = start;
1126 data->res.count = data->args.count = (u32)len;
1127 data->res.fattr = &data->fattr;
1128 data->res.verf = &data->verf;
1129 }
1130
1131 /*
1132 * Commit dirty pages
1133 */
1134 int
1135 nfs_commit_list(struct list_head *head, int how)
1136 {
1137 struct rpc_message msg;
1138 struct rpc_clnt *clnt;
1139 struct nfs_write_data *data;
1140 struct rpc_task *task;
1141 struct nfs_page *req;
1142 int flags,
1143 async = !(how & FLUSH_SYNC);
1144 sigset_t oldset;
1145
1146 data = nfs_writedata_alloc();
1147
1148 if (!data)
1149 goto out_bad;
1150 task = &data->task;
1151
1152 flags = (async) ? RPC_TASK_ASYNC : 0;
1153
1154 /* Set up the argument struct */
1155 nfs_commit_rpcsetup(head, data);
1156 req = nfs_list_entry(data->pages.next);
1157 clnt = NFS_CLIENT(req->wb_inode);
1158
1159 rpc_init_task(task, clnt, nfs_commit_done, flags);
1160 task->tk_calldata = data;
1161 /* Release requests */
1162 task->tk_release = nfs_writedata_release;
1163
1164 msg.rpc_proc = NFS3PROC_COMMIT;
1165 msg.rpc_argp = &data->args;
1166 msg.rpc_resp = &data->res;
1167 msg.rpc_cred = data->cred;
1168
1169 dprintk("NFS: %4d initiated commit call\n", task->tk_pid);
1170 rpc_clnt_sigmask(clnt, &oldset);
1171 rpc_call_setup(task, &msg, 0);
1172 lock_kernel();
1173 rpc_execute(task);
1174 unlock_kernel();
1175 rpc_clnt_sigunmask(clnt, &oldset);
1176 return 0;
1177 out_bad:
1178 while (!list_empty(head)) {
1179 req = nfs_list_entry(head->next);
1180 nfs_list_remove_request(req);
1181 nfs_mark_request_commit(req);
1182 nfs_unlock_request(req);
1183 }
1184 return -ENOMEM;
1185 }
1186
1187 /*
1188 * COMMIT call returned
1189 */
1190 static void
1191 nfs_commit_done(struct rpc_task *task)
1192 {
1193 struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata;
1194 struct nfs_writeres *resp = &data->res;
1195 struct nfs_page *req;
1196 struct inode *inode = data->inode;
1197
1198 dprintk("NFS: %4d nfs_commit_done (status %d)\n",
1199 task->tk_pid, task->tk_status);
1200
1201 if (nfs_async_handle_jukebox(task))
1202 return;
1203
1204 nfs_write_attributes(inode, resp->fattr);
1205 while (!list_empty(&data->pages)) {
1206 req = nfs_list_entry(data->pages.next);
1207 nfs_list_remove_request(req);
1208
1209 dprintk("NFS: commit (%x/%Ld %d@%Ld)",
1210 req->wb_inode->i_dev,
1211 (long long)NFS_FILEID(req->wb_inode),
1212 req->wb_bytes,
1213 (long long)(page_offset(req->wb_page) + req->wb_offset));
1214 if (task->tk_status < 0) {
1215 if (req->wb_file)
1216 req->wb_file->f_error = task->tk_status;
1217 nfs_inode_remove_request(req);
1218 dprintk(", error = %d\n", task->tk_status);
1219 goto next;
1220 }
1221
1222 /* Okay, COMMIT succeeded, apparently. Check the verifier
1223 * returned by the server against all stored verfs. */
1224 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
1225 /* We have a match */
1226 nfs_inode_remove_request(req);
1227 dprintk(" OK\n");
1228 goto next;
1229 }
1230 /* We have a mismatch. Write the page again */
1231 dprintk(" mismatch\n");
1232 nfs_mark_request_dirty(req);
1233 next:
1234 nfs_unlock_request(req);
1235 }
1236 }
1237 #endif
1238
1239 int nfs_flush_file(struct inode *inode, struct file *file, unsigned long idx_start,
1240 unsigned int npages, int how)
1241 {
1242 LIST_HEAD(head);
1243 int res,
1244 error = 0;
1245
1246 spin_lock(&nfs_wreq_lock);
1247 res = nfs_scan_dirty(inode, &head, file, idx_start, npages);
1248 spin_unlock(&nfs_wreq_lock);
1249 if (res)
1250 error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how);
1251 if (error < 0)
1252 return error;
1253 return res;
1254 }
1255
1256 #ifdef CONFIG_NFS_V3
1257 int nfs_commit_file(struct inode *inode, struct file *file, unsigned long idx_start,
1258 unsigned int npages, int how)
1259 {
1260 LIST_HEAD(head);
1261 int res,
1262 error = 0;
1263
1264 spin_lock(&nfs_wreq_lock);
1265 res = nfs_scan_commit(inode, &head, file, idx_start, npages);
1266 spin_unlock(&nfs_wreq_lock);
1267 if (res)
1268 error = nfs_commit_list(&head, how);
1269 if (error < 0)
1270 return error;
1271 return res;
1272 }
1273 #endif
1274
1275 int nfs_sync_file(struct inode *inode, struct file *file, unsigned long idx_start,
1276 unsigned int npages, int how)
1277 {
1278 int error,
1279 wait;
1280
1281 wait = how & FLUSH_WAIT;
1282 how &= ~FLUSH_WAIT;
1283
1284 if (!inode && file)
1285 inode = file->f_dentry->d_inode;
1286
1287 do {
1288 error = 0;
1289 if (wait)
1290 error = nfs_wait_on_requests(inode, file, idx_start, npages);
1291 if (error == 0)
1292 error = nfs_flush_file(inode, file, idx_start, npages, how);
1293 #ifdef CONFIG_NFS_V3
1294 if (error == 0)
1295 error = nfs_commit_file(inode, file, idx_start, npages, how);
1296 #endif
1297 } while (error > 0);
1298 return error;
1299 }
1300
1301 int nfs_init_writepagecache(void)
1302 {
1303 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1304 sizeof(struct nfs_write_data),
1305 0, SLAB_HWCACHE_ALIGN,
1306 NULL, NULL);
1307 if (nfs_wdata_cachep == NULL)
1308 return -ENOMEM;
1309
1310 return 0;
1311 }
1312
1313 void nfs_destroy_writepagecache(void)
1314 {
1315 if (kmem_cache_destroy(nfs_wdata_cachep))
1316 printk(KERN_INFO "nfs_write_data: not all structures were freed\n");
1317 }
1318
Cache object: 9f2855e80a1c213fc96c7f8e2d0eea70
|