1 /*-
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 /*
39 * vnode op calls for Sun NFS version 2 and 3
40 */
41
42 #include "opt_inet.h"
43
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/systm.h>
47 #include <sys/resourcevar.h>
48 #include <sys/proc.h>
49 #include <sys/mount.h>
50 #include <sys/bio.h>
51 #include <sys/buf.h>
52 #include <sys/malloc.h>
53 #include <sys/mbuf.h>
54 #include <sys/namei.h>
55 #include <sys/socket.h>
56 #include <sys/vnode.h>
57 #include <sys/dirent.h>
58 #include <sys/fcntl.h>
59 #include <sys/lockf.h>
60 #include <sys/stat.h>
61 #include <sys/sysctl.h>
62 #include <sys/signalvar.h>
63
64 #include <vm/vm.h>
65 #include <vm/vm_object.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_object.h>
68
69 #include <fs/fifofs/fifo.h>
70
71 #include <rpc/rpcclnt.h>
72
73 #include <nfs/rpcv2.h>
74 #include <nfs/nfsproto.h>
75 #include <nfsclient/nfs.h>
76 #include <nfsclient/nfsnode.h>
77 #include <nfsclient/nfsmount.h>
78 #include <nfsclient/nfs_lock.h>
79 #include <nfs/xdr_subs.h>
80 #include <nfsclient/nfsm_subs.h>
81
82 #include <net/if.h>
83 #include <netinet/in.h>
84 #include <netinet/in_var.h>
85
86 /* Defs */
87 #define TRUE 1
88 #define FALSE 0
89
90 /*
91 * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
92 * calls are not in getblk() and brelse() so that they would not be necessary
93 * here.
94 */
95 #ifndef B_VMIO
96 #define vfs_busy_pages(bp, f)
97 #endif
98
99 static vop_read_t nfsfifo_read;
100 static vop_write_t nfsfifo_write;
101 static vop_close_t nfsfifo_close;
102 static int nfs_flush(struct vnode *, int, struct thread *,
103 int);
104 static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
105 struct thread *);
106 static vop_lookup_t nfs_lookup;
107 static vop_create_t nfs_create;
108 static vop_mknod_t nfs_mknod;
109 static vop_open_t nfs_open;
110 static vop_close_t nfs_close;
111 static vop_access_t nfs_access;
112 static vop_getattr_t nfs_getattr;
113 static vop_setattr_t nfs_setattr;
114 static vop_read_t nfs_read;
115 static vop_fsync_t nfs_fsync;
116 static vop_remove_t nfs_remove;
117 static vop_link_t nfs_link;
118 static vop_rename_t nfs_rename;
119 static vop_mkdir_t nfs_mkdir;
120 static vop_rmdir_t nfs_rmdir;
121 static vop_symlink_t nfs_symlink;
122 static vop_readdir_t nfs_readdir;
123 static vop_strategy_t nfs_strategy;
124 static int nfs_lookitup(struct vnode *, const char *, int,
125 struct ucred *, struct thread *, struct nfsnode **);
126 static int nfs_sillyrename(struct vnode *, struct vnode *,
127 struct componentname *);
128 static vop_access_t nfsspec_access;
129 static vop_readlink_t nfs_readlink;
130 static vop_print_t nfs_print;
131 static vop_advlock_t nfs_advlock;
132 static vop_advlockasync_t nfs_advlockasync;
133
134 /*
135 * Global vfs data structures for nfs
136 */
137 struct vop_vector nfs_vnodeops = {
138 .vop_default = &default_vnodeops,
139 .vop_access = nfs_access,
140 .vop_advlock = nfs_advlock,
141 .vop_advlockasync = nfs_advlockasync,
142 .vop_close = nfs_close,
143 .vop_create = nfs_create,
144 .vop_fsync = nfs_fsync,
145 .vop_getattr = nfs_getattr,
146 .vop_getpages = nfs_getpages,
147 .vop_putpages = nfs_putpages,
148 .vop_inactive = nfs_inactive,
149 .vop_lease = VOP_NULL,
150 .vop_link = nfs_link,
151 .vop_lookup = nfs_lookup,
152 .vop_mkdir = nfs_mkdir,
153 .vop_mknod = nfs_mknod,
154 .vop_open = nfs_open,
155 .vop_print = nfs_print,
156 .vop_read = nfs_read,
157 .vop_readdir = nfs_readdir,
158 .vop_readlink = nfs_readlink,
159 .vop_reclaim = nfs_reclaim,
160 .vop_remove = nfs_remove,
161 .vop_rename = nfs_rename,
162 .vop_rmdir = nfs_rmdir,
163 .vop_setattr = nfs_setattr,
164 .vop_strategy = nfs_strategy,
165 .vop_symlink = nfs_symlink,
166 .vop_write = nfs_write,
167 };
168
169 struct vop_vector nfs_fifoops = {
170 .vop_default = &fifo_specops,
171 .vop_access = nfsspec_access,
172 .vop_close = nfsfifo_close,
173 .vop_fsync = nfs_fsync,
174 .vop_getattr = nfs_getattr,
175 .vop_inactive = nfs_inactive,
176 .vop_print = nfs_print,
177 .vop_read = nfsfifo_read,
178 .vop_reclaim = nfs_reclaim,
179 .vop_setattr = nfs_setattr,
180 .vop_write = nfsfifo_write,
181 };
182
183 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
184 struct componentname *cnp, struct vattr *vap);
185 static int nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
186 struct ucred *cred, struct thread *td);
187 static int nfs_renamerpc(struct vnode *fdvp, const char *fnameptr,
188 int fnamelen, struct vnode *tdvp,
189 const char *tnameptr, int tnamelen,
190 struct ucred *cred, struct thread *td);
191 static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
192 struct sillyrename *sp);
193
194 /*
195 * Global variables
196 */
197 struct mtx nfs_iod_mtx;
198 struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
199 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
200 int nfs_numasync = 0;
201 vop_advlock_t *nfs_advlock_p = nfs_dolock;
202 vop_reclaim_t *nfs_reclaim_p = NULL;
203 #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1))
204
205 SYSCTL_DECL(_vfs_nfs);
206
207 static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
208 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
209 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
210
211 static int nfs_prime_access_cache = 1;
212 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW,
213 &nfs_prime_access_cache, 0,
214 "Prime NFS ACCESS cache when fetching attributes");
215
216 static int nfsv3_commit_on_close = 0;
217 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
218 &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
219
220 static int nfs_clean_pages_on_close = 1;
221 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
222 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
223
224 int nfs_directio_enable = 0;
225 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
226 &nfs_directio_enable, 0, "Enable NFS directio");
227
228 /*
229 * This sysctl allows other processes to mmap a file that has been opened
230 * O_DIRECT by a process. In general, having processes mmap the file while
231 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow
232 * this by default to prevent DoS attacks - to prevent a malicious user from
233 * opening up files O_DIRECT preventing other users from mmap'ing these
234 * files. "Protected" environments where stricter consistency guarantees are
235 * required can disable this knob. The process that opened the file O_DIRECT
236 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
237 * meaningful.
238 */
239 int nfs_directio_allow_mmap = 1;
240 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
241 &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
242
243 #if 0
244 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
245 &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
246
247 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
248 &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
249 #endif
250
251 #define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \
252 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \
253 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
254
255 /*
256 * SMP Locking Note :
257 * The list of locks after the description of the lock is the ordering
258 * of other locks acquired with the lock held.
259 * np->n_mtx : Protects the fields in the nfsnode.
260 VM Object Lock
261 VI_MTX (acquired indirectly)
262 * nmp->nm_mtx : Protects the fields in the nfsmount.
263 rep->r_mtx
264 * nfs_iod_mtx : Global lock, protects shared nfsiod state.
265 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
266 nmp->nm_mtx
267 rep->r_mtx
268 * rep->r_mtx : Protects the fields in an nfsreq.
269 */
270
271 static int
272 nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td,
273 struct ucred *cred)
274 {
275 const int v3 = 1;
276 u_int32_t *tl;
277 int error = 0, attrflag;
278
279 struct mbuf *mreq, *mrep, *md, *mb;
280 caddr_t bpos, dpos;
281 u_int32_t rmode;
282 struct nfsnode *np = VTONFS(vp);
283
284 nfsstats.rpccnt[NFSPROC_ACCESS]++;
285 mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
286 mb = mreq;
287 bpos = mtod(mb, caddr_t);
288 nfsm_fhtom(vp, v3);
289 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
290 *tl = txdr_unsigned(wmode);
291 nfsm_request(vp, NFSPROC_ACCESS, td, cred);
292 nfsm_postop_attr(vp, attrflag);
293 if (!error) {
294 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
295 rmode = fxdr_unsigned(u_int32_t, *tl);
296 mtx_lock(&np->n_mtx);
297 np->n_mode = rmode;
298 np->n_modeuid = cred->cr_uid;
299 np->n_modestamp = time_second;
300 mtx_unlock(&np->n_mtx);
301 }
302 m_freem(mrep);
303 nfsmout:
304 return (error);
305 }
306
307 /*
308 * nfs access vnode op.
309 * For nfs version 2, just return ok. File accesses may fail later.
310 * For nfs version 3, use the access rpc to check accessibility. If file modes
311 * are changed on the server, accesses might still fail later.
312 */
313 static int
314 nfs_access(struct vop_access_args *ap)
315 {
316 struct vnode *vp = ap->a_vp;
317 int error = 0;
318 u_int32_t mode, wmode;
319 int v3 = NFS_ISV3(vp);
320 struct nfsnode *np = VTONFS(vp);
321
322 /*
323 * Disallow write attempts on filesystems mounted read-only;
324 * unless the file is a socket, fifo, or a block or character
325 * device resident on the filesystem.
326 */
327 if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
328 switch (vp->v_type) {
329 case VREG:
330 case VDIR:
331 case VLNK:
332 return (EROFS);
333 default:
334 break;
335 }
336 }
337 /*
338 * For nfs v3, check to see if we have done this recently, and if
339 * so return our cached result instead of making an ACCESS call.
340 * If not, do an access rpc, otherwise you are stuck emulating
341 * ufs_access() locally using the vattr. This may not be correct,
342 * since the server may apply other access criteria such as
343 * client uid-->server uid mapping that we do not know about.
344 */
345 if (v3) {
346 if (ap->a_mode & VREAD)
347 mode = NFSV3ACCESS_READ;
348 else
349 mode = 0;
350 if (vp->v_type != VDIR) {
351 if (ap->a_mode & VWRITE)
352 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
353 if (ap->a_mode & VEXEC)
354 mode |= NFSV3ACCESS_EXECUTE;
355 } else {
356 if (ap->a_mode & VWRITE)
357 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
358 NFSV3ACCESS_DELETE);
359 if (ap->a_mode & VEXEC)
360 mode |= NFSV3ACCESS_LOOKUP;
361 }
362 /* XXX safety belt, only make blanket request if caching */
363 if (nfsaccess_cache_timeout > 0) {
364 wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
365 NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
366 NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
367 } else {
368 wmode = mode;
369 }
370
371 /*
372 * Does our cached result allow us to give a definite yes to
373 * this request?
374 */
375 mtx_lock(&np->n_mtx);
376 if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
377 (ap->a_cred->cr_uid == np->n_modeuid) &&
378 ((np->n_mode & mode) == mode)) {
379 nfsstats.accesscache_hits++;
380 } else {
381 /*
382 * Either a no, or a don't know. Go to the wire.
383 */
384 nfsstats.accesscache_misses++;
385 mtx_unlock(&np->n_mtx);
386 error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred);
387 mtx_lock(&np->n_mtx);
388 if (!error) {
389 if ((np->n_mode & mode) != mode) {
390 error = EACCES;
391 }
392 }
393 }
394 mtx_unlock(&np->n_mtx);
395 return (error);
396 } else {
397 if ((error = nfsspec_access(ap)) != 0) {
398 return (error);
399 }
400 /*
401 * Attempt to prevent a mapped root from accessing a file
402 * which it shouldn't. We try to read a byte from the file
403 * if the user is root and the file is not zero length.
404 * After calling nfsspec_access, we should have the correct
405 * file size cached.
406 */
407 mtx_lock(&np->n_mtx);
408 if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
409 && VTONFS(vp)->n_size > 0) {
410 struct iovec aiov;
411 struct uio auio;
412 char buf[1];
413
414 mtx_unlock(&np->n_mtx);
415 aiov.iov_base = buf;
416 aiov.iov_len = 1;
417 auio.uio_iov = &aiov;
418 auio.uio_iovcnt = 1;
419 auio.uio_offset = 0;
420 auio.uio_resid = 1;
421 auio.uio_segflg = UIO_SYSSPACE;
422 auio.uio_rw = UIO_READ;
423 auio.uio_td = ap->a_td;
424
425 if (vp->v_type == VREG)
426 error = nfs_readrpc(vp, &auio, ap->a_cred);
427 else if (vp->v_type == VDIR) {
428 char* bp;
429 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
430 aiov.iov_base = bp;
431 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
432 error = nfs_readdirrpc(vp, &auio, ap->a_cred);
433 free(bp, M_TEMP);
434 } else if (vp->v_type == VLNK)
435 error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
436 else
437 error = EACCES;
438 } else
439 mtx_unlock(&np->n_mtx);
440 return (error);
441 }
442 }
443
444 int nfs_otw_getattr_avoid = 0;
445
446 /*
447 * nfs open vnode op
448 * Check to see if the type is ok
449 * and that deletion is not in progress.
450 * For paged in text files, you will need to flush the page cache
451 * if consistency is lost.
452 */
453 /* ARGSUSED */
454 static int
455 nfs_open(struct vop_open_args *ap)
456 {
457 struct vnode *vp = ap->a_vp;
458 struct nfsnode *np = VTONFS(vp);
459 struct vattr vattr;
460 int error;
461 int fmode = ap->a_mode;
462
463 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
464 return (EOPNOTSUPP);
465
466 /*
467 * Get a valid lease. If cached data is stale, flush it.
468 */
469 mtx_lock(&np->n_mtx);
470 if (np->n_flag & NMODIFIED) {
471 mtx_unlock(&np->n_mtx);
472 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
473 if (error == EINTR || error == EIO)
474 return (error);
475 np->n_attrstamp = 0;
476 if (vp->v_type == VDIR)
477 np->n_direofoffset = 0;
478 error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);
479 if (error)
480 return (error);
481 mtx_lock(&np->n_mtx);
482 np->n_mtime = vattr.va_mtime;
483 mtx_unlock(&np->n_mtx);
484 } else {
485 struct thread *td = curthread;
486
487 if (np->n_ac_ts_syscalls != td->td_syscalls ||
488 np->n_ac_ts_tid != td->td_tid ||
489 td->td_proc == NULL ||
490 np->n_ac_ts_pid != td->td_proc->p_pid) {
491 np->n_attrstamp = 0;
492 }
493 mtx_unlock(&np->n_mtx);
494 error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);
495 if (error)
496 return (error);
497 mtx_lock(&np->n_mtx);
498 if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
499 if (vp->v_type == VDIR)
500 np->n_direofoffset = 0;
501 mtx_unlock(&np->n_mtx);
502 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
503 if (error == EINTR || error == EIO) {
504 return (error);
505 }
506 mtx_lock(&np->n_mtx);
507 np->n_mtime = vattr.va_mtime;
508 }
509 mtx_unlock(&np->n_mtx);
510 }
511 /*
512 * If the object has >= 1 O_DIRECT active opens, we disable caching.
513 */
514 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
515 if (np->n_directio_opens == 0) {
516 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
517 if (error)
518 return (error);
519 mtx_lock(&np->n_mtx);
520 np->n_flag |= NNONCACHE;
521 mtx_unlock(&np->n_mtx);
522 }
523 np->n_directio_opens++;
524 }
525 vnode_create_vobject(vp, vattr.va_size, ap->a_td);
526 return (0);
527 }
528
529 /*
530 * nfs close vnode op
531 * What an NFS client should do upon close after writing is a debatable issue.
532 * Most NFS clients push delayed writes to the server upon close, basically for
533 * two reasons:
534 * 1 - So that any write errors may be reported back to the client process
535 * doing the close system call. By far the two most likely errors are
536 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
537 * 2 - To put a worst case upper bound on cache inconsistency between
538 * multiple clients for the file.
539 * There is also a consistency problem for Version 2 of the protocol w.r.t.
540 * not being able to tell if other clients are writing a file concurrently,
541 * since there is no way of knowing if the changed modify time in the reply
542 * is only due to the write for this client.
543 * (NFS Version 3 provides weak cache consistency data in the reply that
544 * should be sufficient to detect and handle this case.)
545 *
546 * The current code does the following:
547 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
548 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
549 * or commit them (this satisfies 1 and 2 except for the
550 * case where the server crashes after this close but
551 * before the commit RPC, which is felt to be "good
552 * enough". Changing the last argument to nfs_flush() to
553 * a 1 would force a commit operation, if it is felt a
554 * commit is necessary now.
555 */
556 /* ARGSUSED */
557 static int
558 nfs_close(struct vop_close_args *ap)
559 {
560 struct vnode *vp = ap->a_vp;
561 struct nfsnode *np = VTONFS(vp);
562 int error = 0;
563 int fmode = ap->a_fflag;
564
565 if (vp->v_type == VREG) {
566 /*
567 * Examine and clean dirty pages, regardless of NMODIFIED.
568 * This closes a major hole in close-to-open consistency.
569 * We want to push out all dirty pages (and buffers) on
570 * close, regardless of whether they were dirtied by
571 * mmap'ed writes or via write().
572 */
573 if (nfs_clean_pages_on_close && vp->v_object) {
574 VM_OBJECT_LOCK(vp->v_object);
575 vm_object_page_clean(vp->v_object, 0, 0, 0);
576 VM_OBJECT_UNLOCK(vp->v_object);
577 }
578 mtx_lock(&np->n_mtx);
579 if (np->n_flag & NMODIFIED) {
580 mtx_unlock(&np->n_mtx);
581 if (NFS_ISV3(vp)) {
582 /*
583 * Under NFSv3 we have dirty buffers to dispose of. We
584 * must flush them to the NFS server. We have the option
585 * of waiting all the way through the commit rpc or just
586 * waiting for the initial write. The default is to only
587 * wait through the initial write so the data is in the
588 * server's cache, which is roughly similar to the state
589 * a standard disk subsystem leaves the file in on close().
590 *
591 * We cannot clear the NMODIFIED bit in np->n_flag due to
592 * potential races with other processes, and certainly
593 * cannot clear it if we don't commit.
594 */
595 int cm = nfsv3_commit_on_close ? 1 : 0;
596 error = nfs_flush(vp, MNT_WAIT, ap->a_td, cm);
597 /* np->n_flag &= ~NMODIFIED; */
598 } else
599 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
600 mtx_lock(&np->n_mtx);
601 }
602 if (np->n_flag & NWRITEERR) {
603 np->n_flag &= ~NWRITEERR;
604 error = np->n_error;
605 }
606 mtx_unlock(&np->n_mtx);
607 }
608 if (nfs_directio_enable)
609 KASSERT((np->n_directio_asyncwr == 0),
610 ("nfs_close: dirty unflushed (%d) directio buffers\n",
611 np->n_directio_asyncwr));
612 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
613 mtx_lock(&np->n_mtx);
614 KASSERT((np->n_directio_opens > 0),
615 ("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
616 np->n_directio_opens--;
617 if (np->n_directio_opens == 0)
618 np->n_flag &= ~NNONCACHE;
619 mtx_unlock(&np->n_mtx);
620 }
621 return (error);
622 }
623
624 /*
625 * nfs getattr call from vfs.
626 */
627 static int
628 nfs_getattr(struct vop_getattr_args *ap)
629 {
630 struct vnode *vp = ap->a_vp;
631 struct nfsnode *np = VTONFS(vp);
632 struct vattr *vap = ap->a_vap;
633 struct vattr vattr;
634 caddr_t bpos, dpos;
635 int error = 0;
636 struct mbuf *mreq, *mrep, *md, *mb;
637 int v3 = NFS_ISV3(vp);
638
639 /*
640 * Update local times for special files.
641 */
642 mtx_lock(&np->n_mtx);
643 if (np->n_flag & (NACC | NUPD))
644 np->n_flag |= NCHG;
645 mtx_unlock(&np->n_mtx);
646 /*
647 * First look in the cache.
648 */
649 if (nfs_getattrcache(vp, &vattr) == 0)
650 goto nfsmout;
651 if (v3 && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) {
652 nfsstats.accesscache_misses++;
653 nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_td, ap->a_cred);
654 if (nfs_getattrcache(vp, &vattr) == 0)
655 goto nfsmout;
656 }
657 nfsstats.rpccnt[NFSPROC_GETATTR]++;
658 mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
659 mb = mreq;
660 bpos = mtod(mb, caddr_t);
661 nfsm_fhtom(vp, v3);
662 nfsm_request(vp, NFSPROC_GETATTR, ap->a_td, ap->a_cred);
663 if (!error) {
664 nfsm_loadattr(vp, &vattr);
665 }
666 m_freem(mrep);
667 nfsmout:
668 vap->va_type = vattr.va_type;
669 vap->va_mode = vattr.va_mode;
670 vap->va_nlink = vattr.va_nlink;
671 vap->va_uid = vattr.va_uid;
672 vap->va_gid = vattr.va_gid;
673 vap->va_fsid = vattr.va_fsid;
674 vap->va_fileid = vattr.va_fileid;
675 vap->va_size = vattr.va_size;
676 vap->va_blocksize = vattr.va_blocksize;
677 vap->va_atime = vattr.va_atime;
678 vap->va_mtime = vattr.va_mtime;
679 vap->va_ctime = vattr.va_ctime;
680 vap->va_gen = vattr.va_gen;
681 vap->va_flags = vattr.va_flags;
682 vap->va_rdev = vattr.va_rdev;
683 vap->va_bytes = vattr.va_bytes;
684 vap->va_filerev = vattr.va_filerev;
685
686 return (error);
687 }
688
689 /*
690 * nfs setattr call.
691 */
692 static int
693 nfs_setattr(struct vop_setattr_args *ap)
694 {
695 struct vnode *vp = ap->a_vp;
696 struct nfsnode *np = VTONFS(vp);
697 struct vattr *vap = ap->a_vap;
698 int error = 0;
699 u_quad_t tsize;
700
701 #ifndef nolint
702 tsize = (u_quad_t)0;
703 #endif
704
705 /*
706 * Setting of flags and marking of atimes are not supported.
707 */
708 if (vap->va_flags != VNOVAL || (vap->va_vaflags & VA_MARK_ATIME))
709 return (EOPNOTSUPP);
710
711 /*
712 * Disallow write attempts if the filesystem is mounted read-only.
713 */
714 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
715 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
716 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
717 (vp->v_mount->mnt_flag & MNT_RDONLY)) {
718 error = EROFS;
719 goto out;
720 }
721 if (vap->va_size != VNOVAL) {
722 switch (vp->v_type) {
723 case VDIR:
724 return (EISDIR);
725 case VCHR:
726 case VBLK:
727 case VSOCK:
728 case VFIFO:
729 if (vap->va_mtime.tv_sec == VNOVAL &&
730 vap->va_atime.tv_sec == VNOVAL &&
731 vap->va_mode == (mode_t)VNOVAL &&
732 vap->va_uid == (uid_t)VNOVAL &&
733 vap->va_gid == (gid_t)VNOVAL)
734 return (0);
735 vap->va_size = VNOVAL;
736 break;
737 default:
738 /*
739 * Disallow write attempts if the filesystem is
740 * mounted read-only.
741 */
742 if (vp->v_mount->mnt_flag & MNT_RDONLY)
743 return (EROFS);
744 /*
745 * We run vnode_pager_setsize() early (why?),
746 * we must set np->n_size now to avoid vinvalbuf
747 * V_SAVE races that might setsize a lower
748 * value.
749 */
750 mtx_lock(&np->n_mtx);
751 tsize = np->n_size;
752 mtx_unlock(&np->n_mtx);
753 error = nfs_meta_setsize(vp, ap->a_cred,
754 ap->a_td, vap->va_size);
755 mtx_lock(&np->n_mtx);
756 if (np->n_flag & NMODIFIED) {
757 tsize = np->n_size;
758 mtx_unlock(&np->n_mtx);
759 if (vap->va_size == 0)
760 error = nfs_vinvalbuf(vp, 0, ap->a_td, 1);
761 else
762 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
763 if (error) {
764 vnode_pager_setsize(vp, tsize);
765 goto out;
766 }
767 } else
768 mtx_unlock(&np->n_mtx);
769 /*
770 * np->n_size has already been set to vap->va_size
771 * in nfs_meta_setsize(). We must set it again since
772 * nfs_loadattrcache() could be called through
773 * nfs_meta_setsize() and could modify np->n_size.
774 */
775 mtx_lock(&np->n_mtx);
776 np->n_vattr.va_size = np->n_size = vap->va_size;
777 mtx_unlock(&np->n_mtx);
778 };
779 } else {
780 mtx_lock(&np->n_mtx);
781 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) &&
782 (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
783 mtx_unlock(&np->n_mtx);
784 if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1)) != 0 &&
785 (error == EINTR || error == EIO))
786 return error;
787 } else
788 mtx_unlock(&np->n_mtx);
789 }
790 error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_td);
791 if (error && vap->va_size != VNOVAL) {
792 mtx_lock(&np->n_mtx);
793 np->n_size = np->n_vattr.va_size = tsize;
794 vnode_pager_setsize(vp, tsize);
795 mtx_unlock(&np->n_mtx);
796 }
797 out:
798 return (error);
799 }
800
801 /*
802 * Do an nfs setattr rpc.
803 */
804 static int
805 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
806 struct thread *td)
807 {
808 struct nfsv2_sattr *sp;
809 struct nfsnode *np = VTONFS(vp);
810 caddr_t bpos, dpos;
811 u_int32_t *tl;
812 int error = 0, wccflag = NFSV3_WCCRATTR;
813 struct mbuf *mreq, *mrep, *md, *mb;
814 int v3 = NFS_ISV3(vp);
815
816 nfsstats.rpccnt[NFSPROC_SETATTR]++;
817 mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
818 mb = mreq;
819 bpos = mtod(mb, caddr_t);
820 nfsm_fhtom(vp, v3);
821 if (v3) {
822 nfsm_v3attrbuild(vap, TRUE);
823 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
824 *tl = nfs_false;
825 } else {
826 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
827 if (vap->va_mode == (mode_t)VNOVAL)
828 sp->sa_mode = nfs_xdrneg1;
829 else
830 sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
831 if (vap->va_uid == (uid_t)VNOVAL)
832 sp->sa_uid = nfs_xdrneg1;
833 else
834 sp->sa_uid = txdr_unsigned(vap->va_uid);
835 if (vap->va_gid == (gid_t)VNOVAL)
836 sp->sa_gid = nfs_xdrneg1;
837 else
838 sp->sa_gid = txdr_unsigned(vap->va_gid);
839 sp->sa_size = txdr_unsigned(vap->va_size);
840 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
841 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
842 }
843 nfsm_request(vp, NFSPROC_SETATTR, td, cred);
844 if (v3) {
845 np->n_modestamp = 0;
846 nfsm_wcc_data(vp, wccflag);
847 } else
848 nfsm_loadattr(vp, NULL);
849 m_freem(mrep);
850 nfsmout:
851 return (error);
852 }
853
854 /*
855 * nfs lookup call, one step at a time...
856 * First look in cache
857 * If not found, unlock the directory nfsnode and do the rpc
858 */
859 static int
860 nfs_lookup(struct vop_lookup_args *ap)
861 {
862 struct componentname *cnp = ap->a_cnp;
863 struct vnode *dvp = ap->a_dvp;
864 struct vnode **vpp = ap->a_vpp;
865 int flags = cnp->cn_flags;
866 struct vnode *newvp;
867 struct nfsmount *nmp;
868 caddr_t bpos, dpos;
869 struct mbuf *mreq, *mrep, *md, *mb;
870 long len;
871 nfsfh_t *fhp;
872 struct nfsnode *np;
873 int error = 0, attrflag, fhsize;
874 int v3 = NFS_ISV3(dvp);
875 struct thread *td = cnp->cn_thread;
876
877 *vpp = NULLVP;
878 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
879 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
880 return (EROFS);
881 if (dvp->v_type != VDIR)
882 return (ENOTDIR);
883 nmp = VFSTONFS(dvp->v_mount);
884 np = VTONFS(dvp);
885 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) {
886 *vpp = NULLVP;
887 return (error);
888 }
889 error = cache_lookup(dvp, vpp, cnp);
890 if (error > 0 && error != ENOENT)
891 return (error);
892 if (error == -1) {
893 struct vattr vattr;
894
895 newvp = *vpp;
896 if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, td)
897 && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
898 nfsstats.lookupcache_hits++;
899 if (cnp->cn_nameiop != LOOKUP &&
900 (flags & ISLASTCN))
901 cnp->cn_flags |= SAVENAME;
902 return (0);
903 }
904 cache_purge(newvp);
905 if (dvp != newvp)
906 vput(newvp);
907 else
908 vrele(newvp);
909 *vpp = NULLVP;
910 }
911 error = 0;
912 newvp = NULLVP;
913 nfsstats.lookupcache_misses++;
914 nfsstats.rpccnt[NFSPROC_LOOKUP]++;
915 len = cnp->cn_namelen;
916 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
917 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
918 mb = mreq;
919 bpos = mtod(mb, caddr_t);
920 nfsm_fhtom(dvp, v3);
921 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
922 nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred);
923 if (error) {
924 if (v3) {
925 nfsm_postop_attr(dvp, attrflag);
926 m_freem(mrep);
927 }
928 goto nfsmout;
929 }
930 nfsm_getfh(fhp, fhsize, v3);
931
932 /*
933 * Handle RENAME case...
934 */
935 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
936 if (NFS_CMPFH(np, fhp, fhsize)) {
937 m_freem(mrep);
938 return (EISDIR);
939 }
940 error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
941 if (error) {
942 m_freem(mrep);
943 return (error);
944 }
945 newvp = NFSTOV(np);
946 if (v3) {
947 nfsm_postop_attr(newvp, attrflag);
948 nfsm_postop_attr(dvp, attrflag);
949 } else
950 nfsm_loadattr(newvp, NULL);
951 *vpp = newvp;
952 m_freem(mrep);
953 cnp->cn_flags |= SAVENAME;
954 return (0);
955 }
956
957 if (flags & ISDOTDOT) {
958 VOP_UNLOCK(dvp, 0, td);
959 error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags);
960 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
961 if (error)
962 return (error);
963 newvp = NFSTOV(np);
964 } else if (NFS_CMPFH(np, fhp, fhsize)) {
965 VREF(dvp);
966 newvp = dvp;
967 } else {
968 error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags);
969 if (error) {
970 m_freem(mrep);
971 return (error);
972 }
973 newvp = NFSTOV(np);
974 }
975 if (v3) {
976 nfsm_postop_attr(newvp, attrflag);
977 nfsm_postop_attr(dvp, attrflag);
978 } else
979 nfsm_loadattr(newvp, NULL);
980 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
981 cnp->cn_flags |= SAVENAME;
982 if ((cnp->cn_flags & MAKEENTRY) &&
983 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
984 np->n_ctime = np->n_vattr.va_ctime.tv_sec;
985 cache_enter(dvp, newvp, cnp);
986 }
987 *vpp = newvp;
988 m_freem(mrep);
989 nfsmout:
990 if (error) {
991 if (newvp != NULLVP) {
992 vput(newvp);
993 *vpp = NULLVP;
994 }
995 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
996 (flags & ISLASTCN) && error == ENOENT) {
997 if (dvp->v_mount->mnt_flag & MNT_RDONLY)
998 error = EROFS;
999 else
1000 error = EJUSTRETURN;
1001 }
1002 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1003 cnp->cn_flags |= SAVENAME;
1004 }
1005 return (error);
1006 }
1007
1008 /*
1009 * nfs read call.
1010 * Just call nfs_bioread() to do the work.
1011 */
1012 static int
1013 nfs_read(struct vop_read_args *ap)
1014 {
1015 struct vnode *vp = ap->a_vp;
1016
1017 switch (vp->v_type) {
1018 case VREG:
1019 return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
1020 case VDIR:
1021 return (EISDIR);
1022 default:
1023 return (EOPNOTSUPP);
1024 }
1025 }
1026
1027 /*
1028 * nfs readlink call
1029 */
1030 static int
1031 nfs_readlink(struct vop_readlink_args *ap)
1032 {
1033 struct vnode *vp = ap->a_vp;
1034
1035 if (vp->v_type != VLNK)
1036 return (EINVAL);
1037 return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
1038 }
1039
1040 /*
1041 * Do a readlink rpc.
1042 * Called by nfs_doio() from below the buffer cache.
1043 */
1044 int
1045 nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
1046 {
1047 caddr_t bpos, dpos;
1048 int error = 0, len, attrflag;
1049 struct mbuf *mreq, *mrep, *md, *mb;
1050 int v3 = NFS_ISV3(vp);
1051
1052 nfsstats.rpccnt[NFSPROC_READLINK]++;
1053 mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
1054 mb = mreq;
1055 bpos = mtod(mb, caddr_t);
1056 nfsm_fhtom(vp, v3);
1057 nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred);
1058 if (v3)
1059 nfsm_postop_attr(vp, attrflag);
1060 if (!error) {
1061 nfsm_strsiz(len, NFS_MAXPATHLEN);
1062 if (len == NFS_MAXPATHLEN) {
1063 struct nfsnode *np = VTONFS(vp);
1064 mtx_lock(&np->n_mtx);
1065 if (np->n_size && np->n_size < NFS_MAXPATHLEN)
1066 len = np->n_size;
1067 mtx_unlock(&np->n_mtx);
1068 }
1069 nfsm_mtouio(uiop, len);
1070 }
1071 m_freem(mrep);
1072 nfsmout:
1073 return (error);
1074 }
1075
1076 /*
1077 * nfs read rpc call
1078 * Ditto above
1079 */
1080 int
1081 nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
1082 {
1083 u_int32_t *tl;
1084 caddr_t bpos, dpos;
1085 struct mbuf *mreq, *mrep, *md, *mb;
1086 struct nfsmount *nmp;
1087 int error = 0, len, retlen, tsiz, eof, attrflag;
1088 int v3 = NFS_ISV3(vp);
1089 int rsize;
1090
1091 #ifndef nolint
1092 eof = 0;
1093 #endif
1094 nmp = VFSTONFS(vp->v_mount);
1095 tsiz = uiop->uio_resid;
1096 mtx_lock(&nmp->nm_mtx);
1097 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
1098 mtx_unlock(&nmp->nm_mtx);
1099 return (EFBIG);
1100 }
1101 rsize = nmp->nm_rsize;
1102 mtx_unlock(&nmp->nm_mtx);
1103 while (tsiz > 0) {
1104 nfsstats.rpccnt[NFSPROC_READ]++;
1105 len = (tsiz > rsize) ? rsize : tsiz;
1106 mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
1107 mb = mreq;
1108 bpos = mtod(mb, caddr_t);
1109 nfsm_fhtom(vp, v3);
1110 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3);
1111 if (v3) {
1112 txdr_hyper(uiop->uio_offset, tl);
1113 *(tl + 2) = txdr_unsigned(len);
1114 } else {
1115 *tl++ = txdr_unsigned(uiop->uio_offset);
1116 *tl++ = txdr_unsigned(len);
1117 *tl = 0;
1118 }
1119 nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred);
1120 if (v3) {
1121 nfsm_postop_attr(vp, attrflag);
1122 if (error) {
1123 m_freem(mrep);
1124 goto nfsmout;
1125 }
1126 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
1127 eof = fxdr_unsigned(int, *(tl + 1));
1128 } else {
1129 nfsm_loadattr(vp, NULL);
1130 }
1131 nfsm_strsiz(retlen, rsize);
1132 nfsm_mtouio(uiop, retlen);
1133 m_freem(mrep);
1134 tsiz -= retlen;
1135 if (v3) {
1136 if (eof || retlen == 0) {
1137 tsiz = 0;
1138 }
1139 } else if (retlen < len) {
1140 tsiz = 0;
1141 }
1142 }
1143 nfsmout:
1144 return (error);
1145 }
1146
1147 /*
1148 * nfs write call
1149 */
1150 int
1151 nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
1152 int *iomode, int *must_commit)
1153 {
1154 u_int32_t *tl;
1155 int32_t backup;
1156 caddr_t bpos, dpos;
1157 struct mbuf *mreq, *mrep, *md, *mb;
1158 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1159 int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
1160 int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
1161 int wsize;
1162
1163 #ifndef DIAGNOSTIC
1164 if (uiop->uio_iovcnt != 1)
1165 panic("nfs: writerpc iovcnt > 1");
1166 #endif
1167 *must_commit = 0;
1168 tsiz = uiop->uio_resid;
1169 mtx_lock(&nmp->nm_mtx);
1170 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
1171 mtx_unlock(&nmp->nm_mtx);
1172 return (EFBIG);
1173 }
1174 wsize = nmp->nm_wsize;
1175 mtx_unlock(&nmp->nm_mtx);
1176 while (tsiz > 0) {
1177 nfsstats.rpccnt[NFSPROC_WRITE]++;
1178 len = (tsiz > wsize) ? wsize : tsiz;
1179 mreq = nfsm_reqhead(vp, NFSPROC_WRITE,
1180 NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
1181 mb = mreq;
1182 bpos = mtod(mb, caddr_t);
1183 nfsm_fhtom(vp, v3);
1184 if (v3) {
1185 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
1186 txdr_hyper(uiop->uio_offset, tl);
1187 tl += 2;
1188 *tl++ = txdr_unsigned(len);
1189 *tl++ = txdr_unsigned(*iomode);
1190 *tl = txdr_unsigned(len);
1191 } else {
1192 u_int32_t x;
1193
1194 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
1195 /* Set both "begin" and "current" to non-garbage. */
1196 x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1197 *tl++ = x; /* "begin offset" */
1198 *tl++ = x; /* "current offset" */
1199 x = txdr_unsigned(len);
1200 *tl++ = x; /* total to this offset */
1201 *tl = x; /* size of this write */
1202 }
1203 nfsm_uiotom(uiop, len);
1204 nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred);
1205 if (v3) {
1206 wccflag = NFSV3_WCCCHK;
1207 nfsm_wcc_data(vp, wccflag);
1208 if (!error) {
1209 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED
1210 + NFSX_V3WRITEVERF);
1211 rlen = fxdr_unsigned(int, *tl++);
1212 if (rlen == 0) {
1213 error = NFSERR_IO;
1214 m_freem(mrep);
1215 break;
1216 } else if (rlen < len) {
1217 backup = len - rlen;
1218 uiop->uio_iov->iov_base =
1219 (char *)uiop->uio_iov->iov_base -
1220 backup;
1221 uiop->uio_iov->iov_len += backup;
1222 uiop->uio_offset -= backup;
1223 uiop->uio_resid += backup;
1224 len = rlen;
1225 }
1226 commit = fxdr_unsigned(int, *tl++);
1227
1228 /*
1229 * Return the lowest committment level
1230 * obtained by any of the RPCs.
1231 */
1232 if (committed == NFSV3WRITE_FILESYNC)
1233 committed = commit;
1234 else if (committed == NFSV3WRITE_DATASYNC &&
1235 commit == NFSV3WRITE_UNSTABLE)
1236 committed = commit;
1237 mtx_lock(&nmp->nm_mtx);
1238 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
1239 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1240 NFSX_V3WRITEVERF);
1241 nmp->nm_state |= NFSSTA_HASWRITEVERF;
1242 } else if (bcmp((caddr_t)tl,
1243 (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
1244 *must_commit = 1;
1245 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1246 NFSX_V3WRITEVERF);
1247 }
1248 mtx_unlock(&nmp->nm_mtx);
1249 }
1250 } else {
1251 nfsm_loadattr(vp, NULL);
1252 }
1253 if (wccflag) {
1254 mtx_lock(&(VTONFS(vp))->n_mtx);
1255 VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime;
1256 mtx_unlock(&(VTONFS(vp))->n_mtx);
1257 }
1258 m_freem(mrep);
1259 if (error)
1260 break;
1261 tsiz -= len;
1262 }
1263 nfsmout:
1264 if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC)
1265 committed = NFSV3WRITE_FILESYNC;
1266 *iomode = committed;
1267 if (error)
1268 uiop->uio_resid = tsiz;
1269 return (error);
1270 }
1271
1272 /*
1273 * nfs mknod rpc
1274 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1275 * mode set to specify the file type and the size field for rdev.
1276 */
1277 static int
1278 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
1279 struct vattr *vap)
1280 {
1281 struct nfsv2_sattr *sp;
1282 u_int32_t *tl;
1283 struct vnode *newvp = NULL;
1284 struct nfsnode *np = NULL;
1285 struct vattr vattr;
1286 caddr_t bpos, dpos;
1287 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
1288 struct mbuf *mreq, *mrep, *md, *mb;
1289 u_int32_t rdev;
1290 int v3 = NFS_ISV3(dvp);
1291
1292 if (vap->va_type == VCHR || vap->va_type == VBLK)
1293 rdev = txdr_unsigned(vap->va_rdev);
1294 else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
1295 rdev = nfs_xdrneg1;
1296 else {
1297 return (EOPNOTSUPP);
1298 }
1299 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) {
1300 return (error);
1301 }
1302 nfsstats.rpccnt[NFSPROC_MKNOD]++;
1303 mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
1304 + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1305 mb = mreq;
1306 bpos = mtod(mb, caddr_t);
1307 nfsm_fhtom(dvp, v3);
1308 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1309 if (v3) {
1310 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
1311 *tl++ = vtonfsv3_type(vap->va_type);
1312 nfsm_v3attrbuild(vap, FALSE);
1313 if (vap->va_type == VCHR || vap->va_type == VBLK) {
1314 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
1315 *tl++ = txdr_unsigned(umajor(vap->va_rdev));
1316 *tl = txdr_unsigned(uminor(vap->va_rdev));
1317 }
1318 } else {
1319 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1320 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1321 sp->sa_uid = nfs_xdrneg1;
1322 sp->sa_gid = nfs_xdrneg1;
1323 sp->sa_size = rdev;
1324 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1325 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1326 }
1327 nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred);
1328 if (!error) {
1329 nfsm_mtofh(dvp, newvp, v3, gotvp);
1330 if (!gotvp) {
1331 if (newvp) {
1332 vput(newvp);
1333 newvp = NULL;
1334 }
1335 error = nfs_lookitup(dvp, cnp->cn_nameptr,
1336 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
1337 if (!error)
1338 newvp = NFSTOV(np);
1339 }
1340 }
1341 if (v3)
1342 nfsm_wcc_data(dvp, wccflag);
1343 m_freem(mrep);
1344 nfsmout:
1345 if (error) {
1346 if (newvp)
1347 vput(newvp);
1348 } else {
1349 if (cnp->cn_flags & MAKEENTRY)
1350 cache_enter(dvp, newvp, cnp);
1351 *vpp = newvp;
1352 }
1353 mtx_lock(&(VTONFS(dvp))->n_mtx);
1354 VTONFS(dvp)->n_flag |= NMODIFIED;
1355 if (!wccflag)
1356 VTONFS(dvp)->n_attrstamp = 0;
1357 mtx_unlock(&(VTONFS(dvp))->n_mtx);
1358 return (error);
1359 }
1360
1361 /*
1362 * nfs mknod vop
1363 * just call nfs_mknodrpc() to do the work.
1364 */
1365 /* ARGSUSED */
1366 static int
1367 nfs_mknod(struct vop_mknod_args *ap)
1368 {
1369 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
1370 }
1371
1372 static u_long create_verf;
1373 /*
1374 * nfs file create call
1375 */
1376 static int
1377 nfs_create(struct vop_create_args *ap)
1378 {
1379 struct vnode *dvp = ap->a_dvp;
1380 struct vattr *vap = ap->a_vap;
1381 struct componentname *cnp = ap->a_cnp;
1382 struct nfsv2_sattr *sp;
1383 u_int32_t *tl;
1384 struct nfsnode *np = NULL;
1385 struct vnode *newvp = NULL;
1386 caddr_t bpos, dpos;
1387 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
1388 struct mbuf *mreq, *mrep, *md, *mb;
1389 struct vattr vattr;
1390 int v3 = NFS_ISV3(dvp);
1391
1392 /*
1393 * Oops, not for me..
1394 */
1395 if (vap->va_type == VSOCK)
1396 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
1397
1398 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) {
1399 return (error);
1400 }
1401 if (vap->va_vaflags & VA_EXCLUSIVE)
1402 fmode |= O_EXCL;
1403 again:
1404 nfsstats.rpccnt[NFSPROC_CREATE]++;
1405 mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
1406 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1407 mb = mreq;
1408 bpos = mtod(mb, caddr_t);
1409 nfsm_fhtom(dvp, v3);
1410 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1411 if (v3) {
1412 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
1413 if (fmode & O_EXCL) {
1414 *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
1415 tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF);
1416 #ifdef INET
1417 if (!TAILQ_EMPTY(&in_ifaddrhead))
1418 *tl++ = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr.s_addr;
1419 else
1420 #endif
1421 *tl++ = create_verf;
1422 *tl = ++create_verf;
1423 } else {
1424 *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
1425 nfsm_v3attrbuild(vap, FALSE);
1426 }
1427 } else {
1428 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1429 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1430 sp->sa_uid = nfs_xdrneg1;
1431 sp->sa_gid = nfs_xdrneg1;
1432 sp->sa_size = 0;
1433 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1434 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1435 }
1436 nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred);
1437 if (!error) {
1438 nfsm_mtofh(dvp, newvp, v3, gotvp);
1439 if (!gotvp) {
1440 if (newvp) {
1441 vput(newvp);
1442 newvp = NULL;
1443 }
1444 error = nfs_lookitup(dvp, cnp->cn_nameptr,
1445 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
1446 if (!error)
1447 newvp = NFSTOV(np);
1448 }
1449 }
1450 if (v3)
1451 nfsm_wcc_data(dvp, wccflag);
1452 m_freem(mrep);
1453 nfsmout:
1454 if (error) {
1455 if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
1456 fmode &= ~O_EXCL;
1457 goto again;
1458 }
1459 if (newvp)
1460 vput(newvp);
1461 } else if (v3 && (fmode & O_EXCL)) {
1462 /*
1463 * We are normally called with only a partially initialized
1464 * VAP. Since the NFSv3 spec says that server may use the
1465 * file attributes to store the verifier, the spec requires
1466 * us to do a SETATTR RPC. FreeBSD servers store the verifier
1467 * in atime, but we can't really assume that all servers will
1468 * so we ensure that our SETATTR sets both atime and mtime.
1469 */
1470 if (vap->va_mtime.tv_sec == VNOVAL)
1471 vfs_timestamp(&vap->va_mtime);
1472 if (vap->va_atime.tv_sec == VNOVAL)
1473 vap->va_atime = vap->va_mtime;
1474 error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_thread);
1475 if (error)
1476 vput(newvp);
1477 }
1478 if (!error) {
1479 if (cnp->cn_flags & MAKEENTRY)
1480 cache_enter(dvp, newvp, cnp);
1481 *ap->a_vpp = newvp;
1482 }
1483 mtx_lock(&(VTONFS(dvp))->n_mtx);
1484 VTONFS(dvp)->n_flag |= NMODIFIED;
1485 if (!wccflag)
1486 VTONFS(dvp)->n_attrstamp = 0;
1487 mtx_unlock(&(VTONFS(dvp))->n_mtx);
1488 return (error);
1489 }
1490
1491 /*
1492 * nfs file remove call
1493 * To try and make nfs semantics closer to ufs semantics, a file that has
1494 * other processes using the vnode is renamed instead of removed and then
1495 * removed later on the last close.
1496 * - If v_usecount > 1
1497 * If a rename is not already in the works
1498 * call nfs_sillyrename() to set it up
1499 * else
1500 * do the remove rpc
1501 */
1502 static int
1503 nfs_remove(struct vop_remove_args *ap)
1504 {
1505 struct vnode *vp = ap->a_vp;
1506 struct vnode *dvp = ap->a_dvp;
1507 struct componentname *cnp = ap->a_cnp;
1508 struct nfsnode *np = VTONFS(vp);
1509 int error = 0;
1510 struct vattr vattr;
1511
1512 #ifndef DIAGNOSTIC
1513 if ((cnp->cn_flags & HASBUF) == 0)
1514 panic("nfs_remove: no name");
1515 if (vrefcnt(vp) < 1)
1516 panic("nfs_remove: bad v_usecount");
1517 #endif
1518 if (vp->v_type == VDIR)
1519 error = EPERM;
1520 else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
1521 VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_thread) == 0 &&
1522 vattr.va_nlink > 1)) {
1523 /*
1524 * Purge the name cache so that the chance of a lookup for
1525 * the name succeeding while the remove is in progress is
1526 * minimized. Without node locking it can still happen, such
1527 * that an I/O op returns ESTALE, but since you get this if
1528 * another host removes the file..
1529 */
1530 cache_purge(vp);
1531 /*
1532 * throw away biocache buffers, mainly to avoid
1533 * unnecessary delayed writes later.
1534 */
1535 error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1);
1536 /* Do the rpc */
1537 if (error != EINTR && error != EIO)
1538 error = nfs_removerpc(dvp, cnp->cn_nameptr,
1539 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
1540 /*
1541 * Kludge City: If the first reply to the remove rpc is lost..
1542 * the reply to the retransmitted request will be ENOENT
1543 * since the file was in fact removed
1544 * Therefore, we cheat and return success.
1545 */
1546 if (error == ENOENT)
1547 error = 0;
1548 } else if (!np->n_sillyrename)
1549 error = nfs_sillyrename(dvp, vp, cnp);
1550 np->n_attrstamp = 0;
1551 return (error);
1552 }
1553
1554 /*
1555 * nfs file remove rpc called from nfs_inactive
1556 */
1557 int
1558 nfs_removeit(struct sillyrename *sp)
1559 {
1560 /*
1561 * Make sure that the directory vnode is still valid.
1562 * XXX we should lock sp->s_dvp here.
1563 */
1564 if (sp->s_dvp->v_type == VBAD)
1565 return (0);
1566 return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
1567 NULL));
1568 }
1569
1570 /*
1571 * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
1572 */
1573 static int
1574 nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
1575 struct ucred *cred, struct thread *td)
1576 {
1577 caddr_t bpos, dpos;
1578 int error = 0, wccflag = NFSV3_WCCRATTR;
1579 struct mbuf *mreq, *mrep, *md, *mb;
1580 int v3 = NFS_ISV3(dvp);
1581
1582 nfsstats.rpccnt[NFSPROC_REMOVE]++;
1583 mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE,
1584 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
1585 mb = mreq;
1586 bpos = mtod(mb, caddr_t);
1587 nfsm_fhtom(dvp, v3);
1588 nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
1589 nfsm_request(dvp, NFSPROC_REMOVE, td, cred);
1590 if (v3)
1591 nfsm_wcc_data(dvp, wccflag);
1592 m_freem(mrep);
1593 nfsmout:
1594 mtx_lock(&(VTONFS(dvp))->n_mtx);
1595 VTONFS(dvp)->n_flag |= NMODIFIED;
1596 if (!wccflag)
1597 VTONFS(dvp)->n_attrstamp = 0;
1598 mtx_unlock(&(VTONFS(dvp))->n_mtx);
1599 return (error);
1600 }
1601
1602 /*
1603 * nfs file rename call
1604 */
1605 static int
1606 nfs_rename(struct vop_rename_args *ap)
1607 {
1608 struct vnode *fvp = ap->a_fvp;
1609 struct vnode *tvp = ap->a_tvp;
1610 struct vnode *fdvp = ap->a_fdvp;
1611 struct vnode *tdvp = ap->a_tdvp;
1612 struct componentname *tcnp = ap->a_tcnp;
1613 struct componentname *fcnp = ap->a_fcnp;
1614 int error;
1615
1616 #ifndef DIAGNOSTIC
1617 if ((tcnp->cn_flags & HASBUF) == 0 ||
1618 (fcnp->cn_flags & HASBUF) == 0)
1619 panic("nfs_rename: no name");
1620 #endif
1621 /* Check for cross-device rename */
1622 if ((fvp->v_mount != tdvp->v_mount) ||
1623 (tvp && (fvp->v_mount != tvp->v_mount))) {
1624 error = EXDEV;
1625 goto out;
1626 }
1627
1628 if (fvp == tvp) {
1629 nfs_printf("nfs_rename: fvp == tvp (can't happen)\n");
1630 error = 0;
1631 goto out;
1632 }
1633 if ((error = vn_lock(fvp, LK_EXCLUSIVE, fcnp->cn_thread)) != 0)
1634 goto out;
1635
1636 /*
1637 * We have to flush B_DELWRI data prior to renaming
1638 * the file. If we don't, the delayed-write buffers
1639 * can be flushed out later after the file has gone stale
1640 * under NFSV3. NFSV2 does not have this problem because
1641 * ( as far as I can tell ) it flushes dirty buffers more
1642 * often.
1643 *
1644 * Skip the rename operation if the fsync fails, this can happen
1645 * due to the server's volume being full, when we pushed out data
1646 * that was written back to our cache earlier. Not checking for
1647 * this condition can result in potential (silent) data loss.
1648 */
1649 error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread);
1650 VOP_UNLOCK(fvp, 0, fcnp->cn_thread);
1651 if (!error && tvp)
1652 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread);
1653 if (error)
1654 goto out;
1655
1656 /*
1657 * If the tvp exists and is in use, sillyrename it before doing the
1658 * rename of the new file over it.
1659 * XXX Can't sillyrename a directory.
1660 */
1661 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
1662 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
1663 vput(tvp);
1664 tvp = NULL;
1665 }
1666
1667 error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
1668 tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
1669 tcnp->cn_thread);
1670
1671 if (fvp->v_type == VDIR) {
1672 if (tvp != NULL && tvp->v_type == VDIR)
1673 cache_purge(tdvp);
1674 cache_purge(fdvp);
1675 }
1676
1677 out:
1678 if (tdvp == tvp)
1679 vrele(tdvp);
1680 else
1681 vput(tdvp);
1682 if (tvp)
1683 vput(tvp);
1684 vrele(fdvp);
1685 vrele(fvp);
1686 /*
1687 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
1688 */
1689 if (error == ENOENT)
1690 error = 0;
1691 return (error);
1692 }
1693
1694 /*
1695 * nfs file rename rpc called from nfs_remove() above
1696 */
1697 static int
1698 nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
1699 struct sillyrename *sp)
1700 {
1701
1702 return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp,
1703 sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread));
1704 }
1705
1706 /*
1707 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
1708 */
1709 static int
1710 nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen,
1711 struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred,
1712 struct thread *td)
1713 {
1714 caddr_t bpos, dpos;
1715 int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
1716 struct mbuf *mreq, *mrep, *md, *mb;
1717 int v3 = NFS_ISV3(fdvp);
1718
1719 nfsstats.rpccnt[NFSPROC_RENAME]++;
1720 mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME,
1721 (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
1722 nfsm_rndup(tnamelen));
1723 mb = mreq;
1724 bpos = mtod(mb, caddr_t);
1725 nfsm_fhtom(fdvp, v3);
1726 nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
1727 nfsm_fhtom(tdvp, v3);
1728 nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
1729 nfsm_request(fdvp, NFSPROC_RENAME, td, cred);
1730 if (v3) {
1731 nfsm_wcc_data(fdvp, fwccflag);
1732 nfsm_wcc_data(tdvp, twccflag);
1733 }
1734 m_freem(mrep);
1735 nfsmout:
1736 mtx_lock(&(VTONFS(fdvp))->n_mtx);
1737 VTONFS(fdvp)->n_flag |= NMODIFIED;
1738 mtx_unlock(&(VTONFS(fdvp))->n_mtx);
1739 mtx_lock(&(VTONFS(tdvp))->n_mtx);
1740 VTONFS(tdvp)->n_flag |= NMODIFIED;
1741 mtx_unlock(&(VTONFS(tdvp))->n_mtx);
1742 if (!fwccflag)
1743 VTONFS(fdvp)->n_attrstamp = 0;
1744 if (!twccflag)
1745 VTONFS(tdvp)->n_attrstamp = 0;
1746 return (error);
1747 }
1748
1749 /*
1750 * nfs hard link create call
1751 */
1752 static int
1753 nfs_link(struct vop_link_args *ap)
1754 {
1755 struct vnode *vp = ap->a_vp;
1756 struct vnode *tdvp = ap->a_tdvp;
1757 struct componentname *cnp = ap->a_cnp;
1758 caddr_t bpos, dpos;
1759 int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
1760 struct mbuf *mreq, *mrep, *md, *mb;
1761 int v3;
1762
1763 if (vp->v_mount != tdvp->v_mount) {
1764 return (EXDEV);
1765 }
1766
1767 /*
1768 * Push all writes to the server, so that the attribute cache
1769 * doesn't get "out of sync" with the server.
1770 * XXX There should be a better way!
1771 */
1772 VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread);
1773
1774 v3 = NFS_ISV3(vp);
1775 nfsstats.rpccnt[NFSPROC_LINK]++;
1776 mreq = nfsm_reqhead(vp, NFSPROC_LINK,
1777 NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1778 mb = mreq;
1779 bpos = mtod(mb, caddr_t);
1780 nfsm_fhtom(vp, v3);
1781 nfsm_fhtom(tdvp, v3);
1782 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1783 nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred);
1784 if (v3) {
1785 nfsm_postop_attr(vp, attrflag);
1786 nfsm_wcc_data(tdvp, wccflag);
1787 }
1788 m_freem(mrep);
1789 nfsmout:
1790 mtx_lock(&(VTONFS(tdvp))->n_mtx);
1791 VTONFS(tdvp)->n_flag |= NMODIFIED;
1792 mtx_unlock(&(VTONFS(tdvp))->n_mtx);
1793 if (!attrflag)
1794 VTONFS(vp)->n_attrstamp = 0;
1795 if (!wccflag)
1796 VTONFS(tdvp)->n_attrstamp = 0;
1797 return (error);
1798 }
1799
1800 /*
1801 * nfs symbolic link create call
1802 */
1803 static int
1804 nfs_symlink(struct vop_symlink_args *ap)
1805 {
1806 struct vnode *dvp = ap->a_dvp;
1807 struct vattr *vap = ap->a_vap;
1808 struct componentname *cnp = ap->a_cnp;
1809 struct nfsv2_sattr *sp;
1810 caddr_t bpos, dpos;
1811 int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
1812 struct mbuf *mreq, *mrep, *md, *mb;
1813 struct vnode *newvp = NULL;
1814 int v3 = NFS_ISV3(dvp);
1815
1816 nfsstats.rpccnt[NFSPROC_SYMLINK]++;
1817 slen = strlen(ap->a_target);
1818 mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
1819 nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
1820 mb = mreq;
1821 bpos = mtod(mb, caddr_t);
1822 nfsm_fhtom(dvp, v3);
1823 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1824 if (v3) {
1825 nfsm_v3attrbuild(vap, FALSE);
1826 }
1827 nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
1828 if (!v3) {
1829 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1830 sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
1831 sp->sa_uid = nfs_xdrneg1;
1832 sp->sa_gid = nfs_xdrneg1;
1833 sp->sa_size = nfs_xdrneg1;
1834 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1835 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1836 }
1837
1838 /*
1839 * Issue the NFS request and get the rpc response.
1840 *
1841 * Only NFSv3 responses returning an error of 0 actually return
1842 * a file handle that can be converted into newvp without having
1843 * to do an extra lookup rpc.
1844 */
1845 nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred);
1846 if (v3) {
1847 if (error == 0)
1848 nfsm_mtofh(dvp, newvp, v3, gotvp);
1849 nfsm_wcc_data(dvp, wccflag);
1850 }
1851
1852 /*
1853 * out code jumps -> here, mrep is also freed.
1854 */
1855
1856 m_freem(mrep);
1857 nfsmout:
1858
1859 /*
1860 * If we do not have an error and we could not extract the newvp from
1861 * the response due to the request being NFSv2, we have to do a
1862 * lookup in order to obtain a newvp to return.
1863 */
1864 if (error == 0 && newvp == NULL) {
1865 struct nfsnode *np = NULL;
1866
1867 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
1868 cnp->cn_cred, cnp->cn_thread, &np);
1869 if (!error)
1870 newvp = NFSTOV(np);
1871 }
1872 if (error) {
1873 if (newvp)
1874 vput(newvp);
1875 } else {
1876 *ap->a_vpp = newvp;
1877 }
1878 mtx_lock(&(VTONFS(dvp))->n_mtx);
1879 VTONFS(dvp)->n_flag |= NMODIFIED;
1880 mtx_unlock(&(VTONFS(dvp))->n_mtx);
1881 if (!wccflag)
1882 VTONFS(dvp)->n_attrstamp = 0;
1883 return (error);
1884 }
1885
1886 /*
1887 * nfs make dir call
1888 */
1889 static int
1890 nfs_mkdir(struct vop_mkdir_args *ap)
1891 {
1892 struct vnode *dvp = ap->a_dvp;
1893 struct vattr *vap = ap->a_vap;
1894 struct componentname *cnp = ap->a_cnp;
1895 struct nfsv2_sattr *sp;
1896 int len;
1897 struct nfsnode *np = NULL;
1898 struct vnode *newvp = NULL;
1899 caddr_t bpos, dpos;
1900 int error = 0, wccflag = NFSV3_WCCRATTR;
1901 int gotvp = 0;
1902 struct mbuf *mreq, *mrep, *md, *mb;
1903 struct vattr vattr;
1904 int v3 = NFS_ISV3(dvp);
1905
1906 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) {
1907 return (error);
1908 }
1909 len = cnp->cn_namelen;
1910 nfsstats.rpccnt[NFSPROC_MKDIR]++;
1911 mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR,
1912 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
1913 mb = mreq;
1914 bpos = mtod(mb, caddr_t);
1915 nfsm_fhtom(dvp, v3);
1916 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
1917 if (v3) {
1918 nfsm_v3attrbuild(vap, FALSE);
1919 } else {
1920 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1921 sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
1922 sp->sa_uid = nfs_xdrneg1;
1923 sp->sa_gid = nfs_xdrneg1;
1924 sp->sa_size = nfs_xdrneg1;
1925 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1926 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1927 }
1928 nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred);
1929 if (!error)
1930 nfsm_mtofh(dvp, newvp, v3, gotvp);
1931 if (v3)
1932 nfsm_wcc_data(dvp, wccflag);
1933 m_freem(mrep);
1934 nfsmout:
1935 mtx_lock(&(VTONFS(dvp))->n_mtx);
1936 VTONFS(dvp)->n_flag |= NMODIFIED;
1937 mtx_unlock(&(VTONFS(dvp))->n_mtx);
1938 if (!wccflag)
1939 VTONFS(dvp)->n_attrstamp = 0;
1940 if (error == 0 && newvp == NULL) {
1941 error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
1942 cnp->cn_thread, &np);
1943 if (!error) {
1944 newvp = NFSTOV(np);
1945 if (newvp->v_type != VDIR)
1946 error = EEXIST;
1947 }
1948 }
1949 if (error) {
1950 if (newvp)
1951 vput(newvp);
1952 } else
1953 *ap->a_vpp = newvp;
1954 return (error);
1955 }
1956
1957 /*
1958 * nfs remove directory call
1959 */
1960 static int
1961 nfs_rmdir(struct vop_rmdir_args *ap)
1962 {
1963 struct vnode *vp = ap->a_vp;
1964 struct vnode *dvp = ap->a_dvp;
1965 struct componentname *cnp = ap->a_cnp;
1966 caddr_t bpos, dpos;
1967 int error = 0, wccflag = NFSV3_WCCRATTR;
1968 struct mbuf *mreq, *mrep, *md, *mb;
1969 int v3 = NFS_ISV3(dvp);
1970
1971 if (dvp == vp)
1972 return (EINVAL);
1973 nfsstats.rpccnt[NFSPROC_RMDIR]++;
1974 mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR,
1975 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1976 mb = mreq;
1977 bpos = mtod(mb, caddr_t);
1978 nfsm_fhtom(dvp, v3);
1979 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1980 nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred);
1981 if (v3)
1982 nfsm_wcc_data(dvp, wccflag);
1983 m_freem(mrep);
1984 nfsmout:
1985 mtx_lock(&(VTONFS(dvp))->n_mtx);
1986 VTONFS(dvp)->n_flag |= NMODIFIED;
1987 mtx_unlock(&(VTONFS(dvp))->n_mtx);
1988 if (!wccflag)
1989 VTONFS(dvp)->n_attrstamp = 0;
1990 cache_purge(dvp);
1991 cache_purge(vp);
1992 /*
1993 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
1994 */
1995 if (error == ENOENT)
1996 error = 0;
1997 return (error);
1998 }
1999
2000 /*
2001 * nfs readdir call
2002 */
2003 static int
2004 nfs_readdir(struct vop_readdir_args *ap)
2005 {
2006 struct vnode *vp = ap->a_vp;
2007 struct nfsnode *np = VTONFS(vp);
2008 struct uio *uio = ap->a_uio;
2009 int tresid, error = 0;
2010 struct vattr vattr;
2011
2012 if (vp->v_type != VDIR)
2013 return(EPERM);
2014
2015 /*
2016 * First, check for hit on the EOF offset cache
2017 */
2018 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
2019 (np->n_flag & NMODIFIED) == 0) {
2020 if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_td) == 0) {
2021 mtx_lock(&np->n_mtx);
2022 if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
2023 mtx_unlock(&np->n_mtx);
2024 nfsstats.direofcache_hits++;
2025 goto out;
2026 } else
2027 mtx_unlock(&np->n_mtx);
2028 }
2029 }
2030
2031 /*
2032 * Call nfs_bioread() to do the real work.
2033 */
2034 tresid = uio->uio_resid;
2035 error = nfs_bioread(vp, uio, 0, ap->a_cred);
2036
2037 if (!error && uio->uio_resid == tresid) {
2038 nfsstats.direofcache_misses++;
2039 }
2040 out:
2041 return (error);
2042 }
2043
2044 /*
2045 * Readdir rpc call.
2046 * Called from below the buffer cache by nfs_doio().
2047 */
2048 int
2049 nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
2050 {
2051 int len, left;
2052 struct dirent *dp = NULL;
2053 u_int32_t *tl;
2054 caddr_t cp;
2055 nfsuint64 *cookiep;
2056 caddr_t bpos, dpos;
2057 struct mbuf *mreq, *mrep, *md, *mb;
2058 nfsuint64 cookie;
2059 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2060 struct nfsnode *dnp = VTONFS(vp);
2061 u_quad_t fileno;
2062 int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2063 int attrflag;
2064 int v3 = NFS_ISV3(vp);
2065
2066 #ifndef DIAGNOSTIC
2067 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2068 (uiop->uio_resid & (DIRBLKSIZ - 1)))
2069 panic("nfs readdirrpc bad uio");
2070 #endif
2071
2072 /*
2073 * If there is no cookie, assume directory was stale.
2074 */
2075 nfs_dircookie_lock(dnp);
2076 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2077 if (cookiep) {
2078 cookie = *cookiep;
2079 nfs_dircookie_unlock(dnp);
2080 } else {
2081 nfs_dircookie_unlock(dnp);
2082 return (NFSERR_BAD_COOKIE);
2083 }
2084
2085 /*
2086 * Loop around doing readdir rpc's of size nm_readdirsize
2087 * truncated to a multiple of DIRBLKSIZ.
2088 * The stopping criteria is EOF or buffer full.
2089 */
2090 while (more_dirs && bigenough) {
2091 nfsstats.rpccnt[NFSPROC_READDIR]++;
2092 mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
2093 NFSX_READDIR(v3));
2094 mb = mreq;
2095 bpos = mtod(mb, caddr_t);
2096 nfsm_fhtom(vp, v3);
2097 if (v3) {
2098 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
2099 *tl++ = cookie.nfsuquad[0];
2100 *tl++ = cookie.nfsuquad[1];
2101 mtx_lock(&dnp->n_mtx);
2102 *tl++ = dnp->n_cookieverf.nfsuquad[0];
2103 *tl++ = dnp->n_cookieverf.nfsuquad[1];
2104 mtx_unlock(&dnp->n_mtx);
2105 } else {
2106 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
2107 *tl++ = cookie.nfsuquad[0];
2108 }
2109 *tl = txdr_unsigned(nmp->nm_readdirsize);
2110 nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred);
2111 if (v3) {
2112 nfsm_postop_attr(vp, attrflag);
2113 if (!error) {
2114 tl = nfsm_dissect(u_int32_t *,
2115 2 * NFSX_UNSIGNED);
2116 mtx_lock(&dnp->n_mtx);
2117 dnp->n_cookieverf.nfsuquad[0] = *tl++;
2118 dnp->n_cookieverf.nfsuquad[1] = *tl;
2119 mtx_unlock(&dnp->n_mtx);
2120 } else {
2121 m_freem(mrep);
2122 goto nfsmout;
2123 }
2124 }
2125 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2126 more_dirs = fxdr_unsigned(int, *tl);
2127
2128 /* loop thru the dir entries, doctoring them to 4bsd form */
2129 while (more_dirs && bigenough) {
2130 if (v3) {
2131 tl = nfsm_dissect(u_int32_t *,
2132 3 * NFSX_UNSIGNED);
2133 fileno = fxdr_hyper(tl);
2134 len = fxdr_unsigned(int, *(tl + 2));
2135 } else {
2136 tl = nfsm_dissect(u_int32_t *,
2137 2 * NFSX_UNSIGNED);
2138 fileno = fxdr_unsigned(u_quad_t, *tl++);
2139 len = fxdr_unsigned(int, *tl);
2140 }
2141 if (len <= 0 || len > NFS_MAXNAMLEN) {
2142 error = EBADRPC;
2143 m_freem(mrep);
2144 goto nfsmout;
2145 }
2146 tlen = nfsm_rndup(len);
2147 if (tlen == len)
2148 tlen += 4; /* To ensure null termination */
2149 left = DIRBLKSIZ - blksiz;
2150 if ((tlen + DIRHDSIZ) > left) {
2151 dp->d_reclen += left;
2152 uiop->uio_iov->iov_base =
2153 (char *)uiop->uio_iov->iov_base + left;
2154 uiop->uio_iov->iov_len -= left;
2155 uiop->uio_offset += left;
2156 uiop->uio_resid -= left;
2157 blksiz = 0;
2158 }
2159 if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2160 bigenough = 0;
2161 if (bigenough) {
2162 dp = (struct dirent *)uiop->uio_iov->iov_base;
2163 dp->d_fileno = (int)fileno;
2164 dp->d_namlen = len;
2165 dp->d_reclen = tlen + DIRHDSIZ;
2166 dp->d_type = DT_UNKNOWN;
2167 blksiz += dp->d_reclen;
2168 if (blksiz == DIRBLKSIZ)
2169 blksiz = 0;
2170 uiop->uio_offset += DIRHDSIZ;
2171 uiop->uio_resid -= DIRHDSIZ;
2172 uiop->uio_iov->iov_base =
2173 (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
2174 uiop->uio_iov->iov_len -= DIRHDSIZ;
2175 nfsm_mtouio(uiop, len);
2176 cp = uiop->uio_iov->iov_base;
2177 tlen -= len;
2178 *cp = '\0'; /* null terminate */
2179 uiop->uio_iov->iov_base =
2180 (char *)uiop->uio_iov->iov_base + tlen;
2181 uiop->uio_iov->iov_len -= tlen;
2182 uiop->uio_offset += tlen;
2183 uiop->uio_resid -= tlen;
2184 } else
2185 nfsm_adv(nfsm_rndup(len));
2186 if (v3) {
2187 tl = nfsm_dissect(u_int32_t *,
2188 3 * NFSX_UNSIGNED);
2189 } else {
2190 tl = nfsm_dissect(u_int32_t *,
2191 2 * NFSX_UNSIGNED);
2192 }
2193 if (bigenough) {
2194 cookie.nfsuquad[0] = *tl++;
2195 if (v3)
2196 cookie.nfsuquad[1] = *tl++;
2197 } else if (v3)
2198 tl += 2;
2199 else
2200 tl++;
2201 more_dirs = fxdr_unsigned(int, *tl);
2202 }
2203 /*
2204 * If at end of rpc data, get the eof boolean
2205 */
2206 if (!more_dirs) {
2207 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2208 more_dirs = (fxdr_unsigned(int, *tl) == 0);
2209 }
2210 m_freem(mrep);
2211 }
2212 /*
2213 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2214 * by increasing d_reclen for the last record.
2215 */
2216 if (blksiz > 0) {
2217 left = DIRBLKSIZ - blksiz;
2218 dp->d_reclen += left;
2219 uiop->uio_iov->iov_base =
2220 (char *)uiop->uio_iov->iov_base + left;
2221 uiop->uio_iov->iov_len -= left;
2222 uiop->uio_offset += left;
2223 uiop->uio_resid -= left;
2224 }
2225
2226 /*
2227 * We are now either at the end of the directory or have filled the
2228 * block.
2229 */
2230 if (bigenough)
2231 dnp->n_direofoffset = uiop->uio_offset;
2232 else {
2233 if (uiop->uio_resid > 0)
2234 nfs_printf("EEK! readdirrpc resid > 0\n");
2235 nfs_dircookie_lock(dnp);
2236 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2237 *cookiep = cookie;
2238 nfs_dircookie_unlock(dnp);
2239 }
2240 nfsmout:
2241 return (error);
2242 }
2243
2244 /*
2245 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
2246 */
2247 int
2248 nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
2249 {
2250 int len, left;
2251 struct dirent *dp;
2252 u_int32_t *tl;
2253 caddr_t cp;
2254 struct vnode *newvp;
2255 nfsuint64 *cookiep;
2256 caddr_t bpos, dpos, dpossav1, dpossav2;
2257 struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2;
2258 struct nameidata nami, *ndp = &nami;
2259 struct componentname *cnp = &ndp->ni_cnd;
2260 nfsuint64 cookie;
2261 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2262 struct nfsnode *dnp = VTONFS(vp), *np;
2263 nfsfh_t *fhp;
2264 u_quad_t fileno;
2265 int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
2266 int attrflag, fhsize;
2267
2268 #ifndef nolint
2269 dp = NULL;
2270 #endif
2271 #ifndef DIAGNOSTIC
2272 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2273 (uiop->uio_resid & (DIRBLKSIZ - 1)))
2274 panic("nfs readdirplusrpc bad uio");
2275 #endif
2276 ndp->ni_dvp = vp;
2277 newvp = NULLVP;
2278
2279 /*
2280 * If there is no cookie, assume directory was stale.
2281 */
2282 nfs_dircookie_lock(dnp);
2283 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2284 if (cookiep) {
2285 cookie = *cookiep;
2286 nfs_dircookie_unlock(dnp);
2287 } else {
2288 nfs_dircookie_unlock(dnp);
2289 return (NFSERR_BAD_COOKIE);
2290 }
2291 /*
2292 * Loop around doing readdir rpc's of size nm_readdirsize
2293 * truncated to a multiple of DIRBLKSIZ.
2294 * The stopping criteria is EOF or buffer full.
2295 */
2296 while (more_dirs && bigenough) {
2297 nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
2298 mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
2299 NFSX_FH(1) + 6 * NFSX_UNSIGNED);
2300 mb = mreq;
2301 bpos = mtod(mb, caddr_t);
2302 nfsm_fhtom(vp, 1);
2303 tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED);
2304 *tl++ = cookie.nfsuquad[0];
2305 *tl++ = cookie.nfsuquad[1];
2306 mtx_lock(&dnp->n_mtx);
2307 *tl++ = dnp->n_cookieverf.nfsuquad[0];
2308 *tl++ = dnp->n_cookieverf.nfsuquad[1];
2309 mtx_unlock(&dnp->n_mtx);
2310 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
2311 *tl = txdr_unsigned(nmp->nm_rsize);
2312 nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred);
2313 nfsm_postop_attr(vp, attrflag);
2314 if (error) {
2315 m_freem(mrep);
2316 goto nfsmout;
2317 }
2318 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2319 mtx_lock(&dnp->n_mtx);
2320 dnp->n_cookieverf.nfsuquad[0] = *tl++;
2321 dnp->n_cookieverf.nfsuquad[1] = *tl++;
2322 mtx_unlock(&dnp->n_mtx);
2323 more_dirs = fxdr_unsigned(int, *tl);
2324
2325 /* loop thru the dir entries, doctoring them to 4bsd form */
2326 while (more_dirs && bigenough) {
2327 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2328 fileno = fxdr_hyper(tl);
2329 len = fxdr_unsigned(int, *(tl + 2));
2330 if (len <= 0 || len > NFS_MAXNAMLEN) {
2331 error = EBADRPC;
2332 m_freem(mrep);
2333 goto nfsmout;
2334 }
2335 tlen = nfsm_rndup(len);
2336 if (tlen == len)
2337 tlen += 4; /* To ensure null termination*/
2338 left = DIRBLKSIZ - blksiz;
2339 if ((tlen + DIRHDSIZ) > left) {
2340 dp->d_reclen += left;
2341 uiop->uio_iov->iov_base =
2342 (char *)uiop->uio_iov->iov_base + left;
2343 uiop->uio_iov->iov_len -= left;
2344 uiop->uio_offset += left;
2345 uiop->uio_resid -= left;
2346 blksiz = 0;
2347 }
2348 if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2349 bigenough = 0;
2350 if (bigenough) {
2351 dp = (struct dirent *)uiop->uio_iov->iov_base;
2352 dp->d_fileno = (int)fileno;
2353 dp->d_namlen = len;
2354 dp->d_reclen = tlen + DIRHDSIZ;
2355 dp->d_type = DT_UNKNOWN;
2356 blksiz += dp->d_reclen;
2357 if (blksiz == DIRBLKSIZ)
2358 blksiz = 0;
2359 uiop->uio_offset += DIRHDSIZ;
2360 uiop->uio_resid -= DIRHDSIZ;
2361 uiop->uio_iov->iov_base =
2362 (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
2363 uiop->uio_iov->iov_len -= DIRHDSIZ;
2364 cnp->cn_nameptr = uiop->uio_iov->iov_base;
2365 cnp->cn_namelen = len;
2366 nfsm_mtouio(uiop, len);
2367 cp = uiop->uio_iov->iov_base;
2368 tlen -= len;
2369 *cp = '\0';
2370 uiop->uio_iov->iov_base =
2371 (char *)uiop->uio_iov->iov_base + tlen;
2372 uiop->uio_iov->iov_len -= tlen;
2373 uiop->uio_offset += tlen;
2374 uiop->uio_resid -= tlen;
2375 } else
2376 nfsm_adv(nfsm_rndup(len));
2377 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2378 if (bigenough) {
2379 cookie.nfsuquad[0] = *tl++;
2380 cookie.nfsuquad[1] = *tl++;
2381 } else
2382 tl += 2;
2383
2384 /*
2385 * Since the attributes are before the file handle
2386 * (sigh), we must skip over the attributes and then
2387 * come back and get them.
2388 */
2389 attrflag = fxdr_unsigned(int, *tl);
2390 if (attrflag) {
2391 dpossav1 = dpos;
2392 mdsav1 = md;
2393 nfsm_adv(NFSX_V3FATTR);
2394 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2395 doit = fxdr_unsigned(int, *tl);
2396 /*
2397 * Skip loading the attrs for "..". There's a
2398 * race between loading the attrs here and
2399 * lookups that look for the directory currently
2400 * being read (in the parent). We try to acquire
2401 * the exclusive lock on ".." here, owning the
2402 * lock on the directory being read. Lookup will
2403 * hold the lock on ".." and try to acquire the
2404 * lock on the directory being read.
2405 *
2406 * There are other ways of fixing this, one would
2407 * be to do a trylock on the ".." vnode and skip
2408 * loading the attrs on ".." if it happens to be
2409 * locked by another process. But skipping the
2410 * attrload on ".." seems the easiest option.
2411 */
2412 if (strcmp(dp->d_name, "..") == 0) {
2413 doit = 0;
2414 /*
2415 * We've already skipped over the attrs,
2416 * skip over the filehandle. And store d_type
2417 * as VDIR.
2418 */
2419 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2420 i = fxdr_unsigned(int, *tl);
2421 nfsm_adv(nfsm_rndup(i));
2422 dp->d_type = IFTODT(VTTOIF(VDIR));
2423 }
2424 if (doit) {
2425 nfsm_getfh(fhp, fhsize, 1);
2426 if (NFS_CMPFH(dnp, fhp, fhsize)) {
2427 VREF(vp);
2428 newvp = vp;
2429 np = dnp;
2430 } else {
2431 error = nfs_nget(vp->v_mount, fhp,
2432 fhsize, &np, LK_EXCLUSIVE);
2433 if (error)
2434 doit = 0;
2435 else
2436 newvp = NFSTOV(np);
2437 }
2438 }
2439 if (doit && bigenough) {
2440 dpossav2 = dpos;
2441 dpos = dpossav1;
2442 mdsav2 = md;
2443 md = mdsav1;
2444 nfsm_loadattr(newvp, NULL);
2445 dpos = dpossav2;
2446 md = mdsav2;
2447 dp->d_type =
2448 IFTODT(VTTOIF(np->n_vattr.va_type));
2449 ndp->ni_vp = newvp;
2450 /* Update n_ctime, so subsequent lookup doesn't purge entry */
2451 np->n_ctime = np->n_vattr.va_ctime.tv_sec;
2452 cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
2453 }
2454 } else {
2455 /* Just skip over the file handle */
2456 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2457 i = fxdr_unsigned(int, *tl);
2458 if (i) {
2459 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2460 fhsize = fxdr_unsigned(int, *tl);
2461 nfsm_adv(nfsm_rndup(fhsize));
2462 }
2463 }
2464 if (newvp != NULLVP) {
2465 if (newvp == vp)
2466 vrele(newvp);
2467 else
2468 vput(newvp);
2469 newvp = NULLVP;
2470 }
2471 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2472 more_dirs = fxdr_unsigned(int, *tl);
2473 }
2474 /*
2475 * If at end of rpc data, get the eof boolean
2476 */
2477 if (!more_dirs) {
2478 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2479 more_dirs = (fxdr_unsigned(int, *tl) == 0);
2480 }
2481 m_freem(mrep);
2482 }
2483 /*
2484 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2485 * by increasing d_reclen for the last record.
2486 */
2487 if (blksiz > 0) {
2488 left = DIRBLKSIZ - blksiz;
2489 dp->d_reclen += left;
2490 uiop->uio_iov->iov_base =
2491 (char *)uiop->uio_iov->iov_base + left;
2492 uiop->uio_iov->iov_len -= left;
2493 uiop->uio_offset += left;
2494 uiop->uio_resid -= left;
2495 }
2496
2497 /*
2498 * We are now either at the end of the directory or have filled the
2499 * block.
2500 */
2501 if (bigenough)
2502 dnp->n_direofoffset = uiop->uio_offset;
2503 else {
2504 if (uiop->uio_resid > 0)
2505 nfs_printf("EEK! readdirplusrpc resid > 0\n");
2506 nfs_dircookie_lock(dnp);
2507 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2508 *cookiep = cookie;
2509 nfs_dircookie_unlock(dnp);
2510 }
2511 nfsmout:
2512 if (newvp != NULLVP) {
2513 if (newvp == vp)
2514 vrele(newvp);
2515 else
2516 vput(newvp);
2517 newvp = NULLVP;
2518 }
2519 return (error);
2520 }
2521
2522 /*
2523 * Silly rename. To make the NFS filesystem that is stateless look a little
2524 * more like the "ufs" a remove of an active vnode is translated to a rename
2525 * to a funny looking filename that is removed by nfs_inactive on the
2526 * nfsnode. There is the potential for another process on a different client
2527 * to create the same funny name between the nfs_lookitup() fails and the
2528 * nfs_rename() completes, but...
2529 */
2530 static int
2531 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
2532 {
2533 struct sillyrename *sp;
2534 struct nfsnode *np;
2535 int error;
2536 short pid;
2537 unsigned int lticks;
2538
2539 cache_purge(dvp);
2540 np = VTONFS(vp);
2541 #ifndef DIAGNOSTIC
2542 if (vp->v_type == VDIR)
2543 panic("nfs: sillyrename dir");
2544 #endif
2545 MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
2546 M_NFSREQ, M_WAITOK);
2547 sp->s_cred = crhold(cnp->cn_cred);
2548 sp->s_dvp = dvp;
2549 sp->s_removeit = nfs_removeit;
2550 VREF(dvp);
2551
2552 /*
2553 * Fudge together a funny name.
2554 * Changing the format of the funny name to accomodate more
2555 * sillynames per directory.
2556 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is
2557 * CPU ticks since boot.
2558 */
2559 pid = cnp->cn_thread->td_proc->p_pid;
2560 lticks = (unsigned int)ticks;
2561 for ( ; ; ) {
2562 sp->s_namlen = sprintf(sp->s_name,
2563 ".nfs.%08x.%04x4.4", lticks,
2564 pid);
2565 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2566 cnp->cn_thread, NULL))
2567 break;
2568 lticks++;
2569 }
2570 error = nfs_renameit(dvp, cnp, sp);
2571 if (error)
2572 goto bad;
2573 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2574 cnp->cn_thread, &np);
2575 np->n_sillyrename = sp;
2576 return (0);
2577 bad:
2578 vrele(sp->s_dvp);
2579 crfree(sp->s_cred);
2580 free((caddr_t)sp, M_NFSREQ);
2581 return (error);
2582 }
2583
2584 /*
2585 * Look up a file name and optionally either update the file handle or
2586 * allocate an nfsnode, depending on the value of npp.
2587 * npp == NULL --> just do the lookup
2588 * *npp == NULL --> allocate a new nfsnode and make sure attributes are
2589 * handled too
2590 * *npp != NULL --> update the file handle in the vnode
2591 */
2592 static int
2593 nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
2594 struct thread *td, struct nfsnode **npp)
2595 {
2596 struct vnode *newvp = NULL;
2597 struct nfsnode *np, *dnp = VTONFS(dvp);
2598 caddr_t bpos, dpos;
2599 int error = 0, fhlen, attrflag;
2600 struct mbuf *mreq, *mrep, *md, *mb;
2601 nfsfh_t *nfhp;
2602 int v3 = NFS_ISV3(dvp);
2603
2604 nfsstats.rpccnt[NFSPROC_LOOKUP]++;
2605 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
2606 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
2607 mb = mreq;
2608 bpos = mtod(mb, caddr_t);
2609 nfsm_fhtom(dvp, v3);
2610 nfsm_strtom(name, len, NFS_MAXNAMLEN);
2611 nfsm_request(dvp, NFSPROC_LOOKUP, td, cred);
2612 if (npp && !error) {
2613 nfsm_getfh(nfhp, fhlen, v3);
2614 if (*npp) {
2615 np = *npp;
2616 if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
2617 free((caddr_t)np->n_fhp, M_NFSBIGFH);
2618 np->n_fhp = &np->n_fh;
2619 } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
2620 np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK);
2621 bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
2622 np->n_fhsize = fhlen;
2623 newvp = NFSTOV(np);
2624 } else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
2625 VREF(dvp);
2626 newvp = dvp;
2627 } else {
2628 error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE);
2629 if (error) {
2630 m_freem(mrep);
2631 return (error);
2632 }
2633 newvp = NFSTOV(np);
2634 }
2635 if (v3) {
2636 nfsm_postop_attr(newvp, attrflag);
2637 if (!attrflag && *npp == NULL) {
2638 m_freem(mrep);
2639 if (newvp == dvp)
2640 vrele(newvp);
2641 else
2642 vput(newvp);
2643 return (ENOENT);
2644 }
2645 } else
2646 nfsm_loadattr(newvp, NULL);
2647 }
2648 m_freem(mrep);
2649 nfsmout:
2650 if (npp && *npp == NULL) {
2651 if (error) {
2652 if (newvp) {
2653 if (newvp == dvp)
2654 vrele(newvp);
2655 else
2656 vput(newvp);
2657 }
2658 } else
2659 *npp = np;
2660 }
2661 return (error);
2662 }
2663
2664 /*
2665 * Nfs Version 3 commit rpc
2666 */
2667 int
2668 nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
2669 struct thread *td)
2670 {
2671 u_int32_t *tl;
2672 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2673 caddr_t bpos, dpos;
2674 int error = 0, wccflag = NFSV3_WCCRATTR;
2675 struct mbuf *mreq, *mrep, *md, *mb;
2676
2677 mtx_lock(&nmp->nm_mtx);
2678 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
2679 mtx_unlock(&nmp->nm_mtx);
2680 return (0);
2681 }
2682 mtx_unlock(&nmp->nm_mtx);
2683 nfsstats.rpccnt[NFSPROC_COMMIT]++;
2684 mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
2685 mb = mreq;
2686 bpos = mtod(mb, caddr_t);
2687 nfsm_fhtom(vp, 1);
2688 tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED);
2689 txdr_hyper(offset, tl);
2690 tl += 2;
2691 *tl = txdr_unsigned(cnt);
2692 nfsm_request(vp, NFSPROC_COMMIT, td, cred);
2693 nfsm_wcc_data(vp, wccflag);
2694 if (!error) {
2695 tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF);
2696 if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
2697 NFSX_V3WRITEVERF)) {
2698 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
2699 NFSX_V3WRITEVERF);
2700 error = NFSERR_STALEWRITEVERF;
2701 }
2702 }
2703 m_freem(mrep);
2704 nfsmout:
2705 return (error);
2706 }
2707
2708 /*
2709 * Strategy routine.
2710 * For async requests when nfsiod(s) are running, queue the request by
2711 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
2712 * request.
2713 */
2714 static int
2715 nfs_strategy(struct vop_strategy_args *ap)
2716 {
2717 struct buf *bp = ap->a_bp;
2718 struct ucred *cr;
2719
2720 KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
2721 KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp));
2722
2723 if (bp->b_iocmd == BIO_READ)
2724 cr = bp->b_rcred;
2725 else
2726 cr = bp->b_wcred;
2727
2728 /*
2729 * If the op is asynchronous and an i/o daemon is waiting
2730 * queue the request, wake it up and wait for completion
2731 * otherwise just do it ourselves.
2732 */
2733 if ((bp->b_flags & B_ASYNC) == 0 ||
2734 nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread))
2735 (void)nfs_doio(ap->a_vp, bp, cr, curthread);
2736 return (0);
2737 }
2738
2739 /*
2740 * fsync vnode op. Just call nfs_flush() with commit == 1.
2741 */
2742 /* ARGSUSED */
2743 static int
2744 nfs_fsync(struct vop_fsync_args *ap)
2745 {
2746 return (nfs_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1));
2747 }
2748
2749 /*
2750 * Flush all the blocks associated with a vnode.
2751 * Walk through the buffer pool and push any dirty pages
2752 * associated with the vnode.
2753 */
2754 static int
2755 nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
2756 int commit)
2757 {
2758 struct nfsnode *np = VTONFS(vp);
2759 struct buf *bp;
2760 int i;
2761 struct buf *nbp;
2762 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2763 int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
2764 int passone = 1;
2765 u_quad_t off, endoff, toff;
2766 struct ucred* wcred = NULL;
2767 struct buf **bvec = NULL;
2768 #ifndef NFS_COMMITBVECSIZ
2769 #define NFS_COMMITBVECSIZ 20
2770 #endif
2771 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
2772 int bvecsize = 0, bveccount;
2773
2774 if (nmp->nm_flag & NFSMNT_INT)
2775 slpflag = PCATCH;
2776 if (!commit)
2777 passone = 0;
2778 /*
2779 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
2780 * server, but has not been committed to stable storage on the server
2781 * yet. On the first pass, the byte range is worked out and the commit
2782 * rpc is done. On the second pass, nfs_writebp() is called to do the
2783 * job.
2784 */
2785 again:
2786 off = (u_quad_t)-1;
2787 endoff = 0;
2788 bvecpos = 0;
2789 if (NFS_ISV3(vp) && commit) {
2790 s = splbio();
2791 if (bvec != NULL && bvec != bvec_on_stack)
2792 free(bvec, M_TEMP);
2793 /*
2794 * Count up how many buffers waiting for a commit.
2795 */
2796 bveccount = 0;
2797 VI_LOCK(vp);
2798 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
2799 if (BUF_REFCNT(bp) == 0 &&
2800 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
2801 == (B_DELWRI | B_NEEDCOMMIT))
2802 bveccount++;
2803 }
2804 /*
2805 * Allocate space to remember the list of bufs to commit. It is
2806 * important to use M_NOWAIT here to avoid a race with nfs_write.
2807 * If we can't get memory (for whatever reason), we will end up
2808 * committing the buffers one-by-one in the loop below.
2809 */
2810 if (bveccount > NFS_COMMITBVECSIZ) {
2811 /*
2812 * Release the vnode interlock to avoid a lock
2813 * order reversal.
2814 */
2815 VI_UNLOCK(vp);
2816 bvec = (struct buf **)
2817 malloc(bveccount * sizeof(struct buf *),
2818 M_TEMP, M_NOWAIT);
2819 VI_LOCK(vp);
2820 if (bvec == NULL) {
2821 bvec = bvec_on_stack;
2822 bvecsize = NFS_COMMITBVECSIZ;
2823 } else
2824 bvecsize = bveccount;
2825 } else {
2826 bvec = bvec_on_stack;
2827 bvecsize = NFS_COMMITBVECSIZ;
2828 }
2829 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
2830 if (bvecpos >= bvecsize)
2831 break;
2832 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
2833 nbp = TAILQ_NEXT(bp, b_bobufs);
2834 continue;
2835 }
2836 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
2837 (B_DELWRI | B_NEEDCOMMIT)) {
2838 BUF_UNLOCK(bp);
2839 nbp = TAILQ_NEXT(bp, b_bobufs);
2840 continue;
2841 }
2842 VI_UNLOCK(vp);
2843 bremfree(bp);
2844 /*
2845 * Work out if all buffers are using the same cred
2846 * so we can deal with them all with one commit.
2847 *
2848 * NOTE: we are not clearing B_DONE here, so we have
2849 * to do it later on in this routine if we intend to
2850 * initiate I/O on the bp.
2851 *
2852 * Note: to avoid loopback deadlocks, we do not
2853 * assign b_runningbufspace.
2854 */
2855 if (wcred == NULL)
2856 wcred = bp->b_wcred;
2857 else if (wcred != bp->b_wcred)
2858 wcred = NOCRED;
2859 vfs_busy_pages(bp, 1);
2860
2861 VI_LOCK(vp);
2862 /*
2863 * bp is protected by being locked, but nbp is not
2864 * and vfs_busy_pages() may sleep. We have to
2865 * recalculate nbp.
2866 */
2867 nbp = TAILQ_NEXT(bp, b_bobufs);
2868
2869 /*
2870 * A list of these buffers is kept so that the
2871 * second loop knows which buffers have actually
2872 * been committed. This is necessary, since there
2873 * may be a race between the commit rpc and new
2874 * uncommitted writes on the file.
2875 */
2876 bvec[bvecpos++] = bp;
2877 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
2878 bp->b_dirtyoff;
2879 if (toff < off)
2880 off = toff;
2881 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
2882 if (toff > endoff)
2883 endoff = toff;
2884 }
2885 splx(s);
2886 VI_UNLOCK(vp);
2887 }
2888 if (bvecpos > 0) {
2889 /*
2890 * Commit data on the server, as required.
2891 * If all bufs are using the same wcred, then use that with
2892 * one call for all of them, otherwise commit each one
2893 * separately.
2894 */
2895 if (wcred != NOCRED)
2896 retv = nfs_commit(vp, off, (int)(endoff - off),
2897 wcred, td);
2898 else {
2899 retv = 0;
2900 for (i = 0; i < bvecpos; i++) {
2901 off_t off, size;
2902 bp = bvec[i];
2903 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
2904 bp->b_dirtyoff;
2905 size = (u_quad_t)(bp->b_dirtyend
2906 - bp->b_dirtyoff);
2907 retv = nfs_commit(vp, off, (int)size,
2908 bp->b_wcred, td);
2909 if (retv) break;
2910 }
2911 }
2912
2913 if (retv == NFSERR_STALEWRITEVERF)
2914 nfs_clearcommit(vp->v_mount);
2915
2916 /*
2917 * Now, either mark the blocks I/O done or mark the
2918 * blocks dirty, depending on whether the commit
2919 * succeeded.
2920 */
2921 for (i = 0; i < bvecpos; i++) {
2922 bp = bvec[i];
2923 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
2924 if (retv) {
2925 /*
2926 * Error, leave B_DELWRI intact
2927 */
2928 vfs_unbusy_pages(bp);
2929 brelse(bp);
2930 } else {
2931 /*
2932 * Success, remove B_DELWRI ( bundirty() ).
2933 *
2934 * b_dirtyoff/b_dirtyend seem to be NFS
2935 * specific. We should probably move that
2936 * into bundirty(). XXX
2937 */
2938 s = splbio();
2939 bufobj_wref(&vp->v_bufobj);
2940 bp->b_flags |= B_ASYNC;
2941 bundirty(bp);
2942 bp->b_flags &= ~B_DONE;
2943 bp->b_ioflags &= ~BIO_ERROR;
2944 bp->b_dirtyoff = bp->b_dirtyend = 0;
2945 splx(s);
2946 bufdone(bp);
2947 }
2948 }
2949 }
2950
2951 /*
2952 * Start/do any write(s) that are required.
2953 */
2954 loop:
2955 s = splbio();
2956 VI_LOCK(vp);
2957 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
2958 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
2959 if (waitfor != MNT_WAIT || passone)
2960 continue;
2961
2962 error = BUF_TIMELOCK(bp,
2963 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
2964 VI_MTX(vp), "nfsfsync", slpflag, slptimeo);
2965 splx(s);
2966 if (error == 0) {
2967 BUF_UNLOCK(bp);
2968 goto loop;
2969 }
2970 if (error == ENOLCK)
2971 goto loop;
2972 if (nfs_sigintr(nmp, NULL, td)) {
2973 error = EINTR;
2974 goto done;
2975 }
2976 if (slpflag == PCATCH) {
2977 slpflag = 0;
2978 slptimeo = 2 * hz;
2979 }
2980 goto loop;
2981 }
2982 if ((bp->b_flags & B_DELWRI) == 0)
2983 panic("nfs_fsync: not dirty");
2984 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
2985 BUF_UNLOCK(bp);
2986 continue;
2987 }
2988 VI_UNLOCK(vp);
2989 bremfree(bp);
2990 if (passone || !commit)
2991 bp->b_flags |= B_ASYNC;
2992 else
2993 bp->b_flags |= B_ASYNC;
2994 splx(s);
2995 bwrite(bp);
2996 if (nfs_sigintr(nmp, NULL, td)) {
2997 error = EINTR;
2998 goto done;
2999 }
3000 goto loop;
3001 }
3002 splx(s);
3003 if (passone) {
3004 passone = 0;
3005 VI_UNLOCK(vp);
3006 goto again;
3007 }
3008 if (waitfor == MNT_WAIT) {
3009 while (vp->v_bufobj.bo_numoutput) {
3010 error = bufobj_wwait(&vp->v_bufobj, slpflag, slptimeo);
3011 if (error) {
3012 VI_UNLOCK(vp);
3013 error = nfs_sigintr(nmp, NULL, td);
3014 if (error)
3015 goto done;
3016 if (slpflag == PCATCH) {
3017 slpflag = 0;
3018 slptimeo = 2 * hz;
3019 }
3020 VI_LOCK(vp);
3021 }
3022 }
3023 if (vp->v_bufobj.bo_dirty.bv_cnt != 0 && commit) {
3024 VI_UNLOCK(vp);
3025 goto loop;
3026 }
3027 /*
3028 * Wait for all the async IO requests to drain
3029 */
3030 VI_UNLOCK(vp);
3031 mtx_lock(&np->n_mtx);
3032 while (np->n_directio_asyncwr > 0) {
3033 np->n_flag |= NFSYNCWAIT;
3034 error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr,
3035 &np->n_mtx, slpflag | (PRIBIO + 1),
3036 "nfsfsync", 0);
3037 if (error) {
3038 if (nfs_sigintr(nmp, (struct nfsreq *)0, td)) {
3039 mtx_unlock(&np->n_mtx);
3040 error = EINTR;
3041 goto done;
3042 }
3043 }
3044 }
3045 mtx_unlock(&np->n_mtx);
3046 } else
3047 VI_UNLOCK(vp);
3048 mtx_lock(&np->n_mtx);
3049 if (np->n_flag & NWRITEERR) {
3050 error = np->n_error;
3051 np->n_flag &= ~NWRITEERR;
3052 }
3053 if (commit && vp->v_bufobj.bo_dirty.bv_cnt == 0 &&
3054 vp->v_bufobj.bo_numoutput == 0 && np->n_directio_asyncwr == 0)
3055 np->n_flag &= ~NMODIFIED;
3056 mtx_unlock(&np->n_mtx);
3057 done:
3058 if (bvec != NULL && bvec != bvec_on_stack)
3059 free(bvec, M_TEMP);
3060 return (error);
3061 }
3062
3063 /*
3064 * NFS advisory byte-level locks.
3065 */
3066 static int
3067 nfs_advlock(struct vop_advlock_args *ap)
3068 {
3069 struct vnode *vp = ap->a_vp;
3070 u_quad_t size;
3071 int error;
3072
3073 error = vn_lock(vp, LK_SHARED, curthread);
3074 if (error)
3075 return (error);
3076 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
3077 size = VTONFS(vp)->n_size;
3078 VOP_UNLOCK(vp, 0, curthread);
3079 error = lf_advlock(ap, &(vp->v_lockf), size);
3080 } else {
3081 if (nfs_advlock_p)
3082 error = nfs_advlock_p(ap);
3083 else
3084 error = ENOLCK;
3085 }
3086
3087 return (error);
3088 }
3089
3090 /*
3091 * NFS advisory byte-level locks.
3092 */
3093 static int
3094 nfs_advlockasync(struct vop_advlockasync_args *ap)
3095 {
3096 struct vnode *vp = ap->a_vp;
3097 u_quad_t size;
3098 int error;
3099
3100 error = vn_lock(vp, LK_SHARED, curthread);
3101 if (error)
3102 return (error);
3103 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
3104 size = VTONFS(vp)->n_size;
3105 VOP_UNLOCK(vp, 0, curthread);
3106 error = lf_advlockasync(ap, &(vp->v_lockf), size);
3107 } else {
3108 VOP_UNLOCK(vp, 0, curthread);
3109 error = EOPNOTSUPP;
3110 }
3111 return (error);
3112 }
3113
3114 /*
3115 * Print out the contents of an nfsnode.
3116 */
3117 static int
3118 nfs_print(struct vop_print_args *ap)
3119 {
3120 struct vnode *vp = ap->a_vp;
3121 struct nfsnode *np = VTONFS(vp);
3122
3123 nfs_printf("\tfileid %ld fsid 0x%x",
3124 np->n_vattr.va_fileid, np->n_vattr.va_fsid);
3125 if (vp->v_type == VFIFO)
3126 fifo_printinfo(vp);
3127 printf("\n");
3128 return (0);
3129 }
3130
3131 /*
3132 * This is the "real" nfs::bwrite(struct buf*).
3133 * We set B_CACHE if this is a VMIO buffer.
3134 */
3135 int
3136 nfs_writebp(struct buf *bp, int force __unused, struct thread *td)
3137 {
3138 int s;
3139 int oldflags = bp->b_flags;
3140 #if 0
3141 int retv = 1;
3142 off_t off;
3143 #endif
3144
3145 if (BUF_REFCNT(bp) == 0)
3146 panic("bwrite: buffer is not locked???");
3147
3148 if (bp->b_flags & B_INVAL) {
3149 brelse(bp);
3150 return(0);
3151 }
3152
3153 bp->b_flags |= B_CACHE;
3154
3155 /*
3156 * Undirty the bp. We will redirty it later if the I/O fails.
3157 */
3158
3159 s = splbio();
3160 bundirty(bp);
3161 bp->b_flags &= ~B_DONE;
3162 bp->b_ioflags &= ~BIO_ERROR;
3163 bp->b_iocmd = BIO_WRITE;
3164
3165 bufobj_wref(bp->b_bufobj);
3166 curthread->td_ru.ru_oublock++;
3167 splx(s);
3168
3169 /*
3170 * Note: to avoid loopback deadlocks, we do not
3171 * assign b_runningbufspace.
3172 */
3173 vfs_busy_pages(bp, 1);
3174
3175 BUF_KERNPROC(bp);
3176 bp->b_iooffset = dbtob(bp->b_blkno);
3177 bstrategy(bp);
3178
3179 if( (oldflags & B_ASYNC) == 0) {
3180 int rtval = bufwait(bp);
3181
3182 if (oldflags & B_DELWRI) {
3183 s = splbio();
3184 reassignbuf(bp);
3185 splx(s);
3186 }
3187 brelse(bp);
3188 return (rtval);
3189 }
3190
3191 return (0);
3192 }
3193
3194 /*
3195 * nfs special file access vnode op.
3196 * Essentially just get vattr and then imitate iaccess() since the device is
3197 * local to the client.
3198 */
3199 static int
3200 nfsspec_access(struct vop_access_args *ap)
3201 {
3202 struct vattr *vap;
3203 struct ucred *cred = ap->a_cred;
3204 struct vnode *vp = ap->a_vp;
3205 mode_t mode = ap->a_mode;
3206 struct vattr vattr;
3207 int error;
3208
3209 /*
3210 * Disallow write attempts on filesystems mounted read-only;
3211 * unless the file is a socket, fifo, or a block or character
3212 * device resident on the filesystem.
3213 */
3214 if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3215 switch (vp->v_type) {
3216 case VREG:
3217 case VDIR:
3218 case VLNK:
3219 return (EROFS);
3220 default:
3221 break;
3222 }
3223 }
3224 vap = &vattr;
3225 error = VOP_GETATTR(vp, vap, cred, ap->a_td);
3226 if (error)
3227 goto out;
3228 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
3229 mode, cred, NULL);
3230 out:
3231 return error;
3232 }
3233
3234 /*
3235 * Read wrapper for fifos.
3236 */
3237 static int
3238 nfsfifo_read(struct vop_read_args *ap)
3239 {
3240 struct nfsnode *np = VTONFS(ap->a_vp);
3241 int error;
3242
3243 /*
3244 * Set access flag.
3245 */
3246 mtx_lock(&np->n_mtx);
3247 np->n_flag |= NACC;
3248 getnanotime(&np->n_atim);
3249 mtx_unlock(&np->n_mtx);
3250 error = fifo_specops.vop_read(ap);
3251 return error;
3252 }
3253
3254 /*
3255 * Write wrapper for fifos.
3256 */
3257 static int
3258 nfsfifo_write(struct vop_write_args *ap)
3259 {
3260 struct nfsnode *np = VTONFS(ap->a_vp);
3261
3262 /*
3263 * Set update flag.
3264 */
3265 mtx_lock(&np->n_mtx);
3266 np->n_flag |= NUPD;
3267 getnanotime(&np->n_mtim);
3268 mtx_unlock(&np->n_mtx);
3269 return(fifo_specops.vop_write(ap));
3270 }
3271
3272 /*
3273 * Close wrapper for fifos.
3274 *
3275 * Update the times on the nfsnode then do fifo close.
3276 */
3277 static int
3278 nfsfifo_close(struct vop_close_args *ap)
3279 {
3280 struct vnode *vp = ap->a_vp;
3281 struct nfsnode *np = VTONFS(vp);
3282 struct vattr vattr;
3283 struct timespec ts;
3284
3285 mtx_lock(&np->n_mtx);
3286 if (np->n_flag & (NACC | NUPD)) {
3287 getnanotime(&ts);
3288 if (np->n_flag & NACC)
3289 np->n_atim = ts;
3290 if (np->n_flag & NUPD)
3291 np->n_mtim = ts;
3292 np->n_flag |= NCHG;
3293 if (vrefcnt(vp) == 1 &&
3294 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3295 VATTR_NULL(&vattr);
3296 if (np->n_flag & NACC)
3297 vattr.va_atime = np->n_atim;
3298 if (np->n_flag & NUPD)
3299 vattr.va_mtime = np->n_mtim;
3300 mtx_unlock(&np->n_mtx);
3301 (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_td);
3302 goto out;
3303 }
3304 }
3305 mtx_unlock(&np->n_mtx);
3306 out:
3307 return (fifo_specops.vop_close(ap));
3308 }
3309
3310 /*
3311 * Just call nfs_writebp() with the force argument set to 1.
3312 *
3313 * NOTE: B_DONE may or may not be set in a_bp on call.
3314 */
3315 static int
3316 nfs_bwrite(struct buf *bp)
3317 {
3318
3319 return (nfs_writebp(bp, 1, curthread));
3320 }
3321
3322 struct buf_ops buf_ops_nfs = {
3323 .bop_name = "buf_ops_nfs",
3324 .bop_write = nfs_bwrite,
3325 .bop_strategy = bufstrategy,
3326 .bop_sync = bufsync,
3327 .bop_bdflush = bufbdflush,
3328 };
Cache object: cd203b67d77b2a70ae194cb6604a0ea5
|