1 /*-
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 /*
39 * vnode op calls for Sun NFS version 2 and 3
40 */
41
42 #include "opt_inet.h"
43
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/systm.h>
47 #include <sys/resourcevar.h>
48 #include <sys/proc.h>
49 #include <sys/mount.h>
50 #include <sys/bio.h>
51 #include <sys/buf.h>
52 #include <sys/malloc.h>
53 #include <sys/mbuf.h>
54 #include <sys/namei.h>
55 #include <sys/socket.h>
56 #include <sys/vnode.h>
57 #include <sys/dirent.h>
58 #include <sys/fcntl.h>
59 #include <sys/lockf.h>
60 #include <sys/stat.h>
61 #include <sys/sysctl.h>
62 #include <sys/signalvar.h>
63
64 #include <vm/vm.h>
65 #include <vm/vm_object.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_object.h>
68
69 #include <fs/fifofs/fifo.h>
70
71 #include <rpc/rpcclnt.h>
72
73 #include <nfs/rpcv2.h>
74 #include <nfs/nfsproto.h>
75 #include <nfsclient/nfs.h>
76 #include <nfsclient/nfsnode.h>
77 #include <nfsclient/nfsmount.h>
78 #include <nfsclient/nfs_lock.h>
79 #include <nfs/xdr_subs.h>
80 #include <nfsclient/nfsm_subs.h>
81
82 #include <net/if.h>
83 #include <netinet/in.h>
84 #include <netinet/in_var.h>
85
86 /* Defs */
87 #define TRUE 1
88 #define FALSE 0
89
90 /*
91 * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
92 * calls are not in getblk() and brelse() so that they would not be necessary
93 * here.
94 */
95 #ifndef B_VMIO
96 #define vfs_busy_pages(bp, f)
97 #endif
98
99 static vop_read_t nfsfifo_read;
100 static vop_write_t nfsfifo_write;
101 static vop_close_t nfsfifo_close;
102 static int nfs_flush(struct vnode *, int, struct thread *,
103 int);
104 static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
105 struct thread *);
106 static vop_lookup_t nfs_lookup;
107 static vop_create_t nfs_create;
108 static vop_mknod_t nfs_mknod;
109 static vop_open_t nfs_open;
110 static vop_close_t nfs_close;
111 static vop_access_t nfs_access;
112 static vop_getattr_t nfs_getattr;
113 static vop_setattr_t nfs_setattr;
114 static vop_read_t nfs_read;
115 static vop_fsync_t nfs_fsync;
116 static vop_remove_t nfs_remove;
117 static vop_link_t nfs_link;
118 static vop_rename_t nfs_rename;
119 static vop_mkdir_t nfs_mkdir;
120 static vop_rmdir_t nfs_rmdir;
121 static vop_symlink_t nfs_symlink;
122 static vop_readdir_t nfs_readdir;
123 static vop_strategy_t nfs_strategy;
124 static int nfs_lookitup(struct vnode *, const char *, int,
125 struct ucred *, struct thread *, struct nfsnode **);
126 static int nfs_sillyrename(struct vnode *, struct vnode *,
127 struct componentname *);
128 static vop_access_t nfsspec_access;
129 static vop_readlink_t nfs_readlink;
130 static vop_print_t nfs_print;
131 static vop_advlock_t nfs_advlock;
132 static vop_advlockasync_t nfs_advlockasync;
133
134 /*
135 * Global vfs data structures for nfs
136 */
137 struct vop_vector nfs_vnodeops = {
138 .vop_default = &default_vnodeops,
139 .vop_access = nfs_access,
140 .vop_advlock = nfs_advlock,
141 .vop_advlockasync = nfs_advlockasync,
142 .vop_close = nfs_close,
143 .vop_create = nfs_create,
144 .vop_fsync = nfs_fsync,
145 .vop_getattr = nfs_getattr,
146 .vop_getpages = nfs_getpages,
147 .vop_putpages = nfs_putpages,
148 .vop_inactive = nfs_inactive,
149 .vop_lease = VOP_NULL,
150 .vop_link = nfs_link,
151 .vop_lookup = nfs_lookup,
152 .vop_mkdir = nfs_mkdir,
153 .vop_mknod = nfs_mknod,
154 .vop_open = nfs_open,
155 .vop_print = nfs_print,
156 .vop_read = nfs_read,
157 .vop_readdir = nfs_readdir,
158 .vop_readlink = nfs_readlink,
159 .vop_reclaim = nfs_reclaim,
160 .vop_remove = nfs_remove,
161 .vop_rename = nfs_rename,
162 .vop_rmdir = nfs_rmdir,
163 .vop_setattr = nfs_setattr,
164 .vop_strategy = nfs_strategy,
165 .vop_symlink = nfs_symlink,
166 .vop_write = nfs_write,
167 };
168
169 struct vop_vector nfs_fifoops = {
170 .vop_default = &fifo_specops,
171 .vop_access = nfsspec_access,
172 .vop_close = nfsfifo_close,
173 .vop_fsync = nfs_fsync,
174 .vop_getattr = nfs_getattr,
175 .vop_inactive = nfs_inactive,
176 .vop_print = nfs_print,
177 .vop_read = nfsfifo_read,
178 .vop_reclaim = nfs_reclaim,
179 .vop_setattr = nfs_setattr,
180 .vop_write = nfsfifo_write,
181 };
182
183 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
184 struct componentname *cnp, struct vattr *vap);
185 static int nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
186 struct ucred *cred, struct thread *td);
187 static int nfs_renamerpc(struct vnode *fdvp, const char *fnameptr,
188 int fnamelen, struct vnode *tdvp,
189 const char *tnameptr, int tnamelen,
190 struct ucred *cred, struct thread *td);
191 static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
192 struct sillyrename *sp);
193
194 /*
195 * Global variables
196 */
197 struct mtx nfs_iod_mtx;
198 struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
199 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
200 int nfs_numasync = 0;
201 vop_advlock_t *nfs_advlock_p = nfs_dolock;
202 vop_reclaim_t *nfs_reclaim_p = NULL;
203 #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1))
204
205 SYSCTL_DECL(_vfs_nfs);
206
207 static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
208 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
209 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
210
211 static int nfsv3_commit_on_close = 0;
212 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
213 &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
214
215 static int nfs_clean_pages_on_close = 1;
216 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
217 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
218
219 int nfs_directio_enable = 0;
220 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
221 &nfs_directio_enable, 0, "Enable NFS directio");
222
223 /*
224 * This sysctl allows other processes to mmap a file that has been opened
225 * O_DIRECT by a process. In general, having processes mmap the file while
226 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow
227 * this by default to prevent DoS attacks - to prevent a malicious user from
228 * opening up files O_DIRECT preventing other users from mmap'ing these
229 * files. "Protected" environments where stricter consistency guarantees are
230 * required can disable this knob. The process that opened the file O_DIRECT
231 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
232 * meaningful.
233 */
234 int nfs_directio_allow_mmap = 1;
235 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
236 &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
237
238 #if 0
239 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
240 &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
241
242 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
243 &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
244 #endif
245
246 #define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \
247 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \
248 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
249
250 /*
251 * SMP Locking Note :
252 * The list of locks after the description of the lock is the ordering
253 * of other locks acquired with the lock held.
254 * np->n_mtx : Protects the fields in the nfsnode.
255 VM Object Lock
256 VI_MTX (acquired indirectly)
257 * nmp->nm_mtx : Protects the fields in the nfsmount.
258 rep->r_mtx
259 * nfs_iod_mtx : Global lock, protects shared nfsiod state.
260 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
261 nmp->nm_mtx
262 rep->r_mtx
263 * rep->r_mtx : Protects the fields in an nfsreq.
264 */
265
266 static int
267 nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td,
268 struct ucred *cred)
269 {
270 const int v3 = 1;
271 u_int32_t *tl;
272 int error = 0, attrflag;
273
274 struct mbuf *mreq, *mrep, *md, *mb;
275 caddr_t bpos, dpos;
276 u_int32_t rmode;
277 struct nfsnode *np = VTONFS(vp);
278
279 nfsstats.rpccnt[NFSPROC_ACCESS]++;
280 mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
281 mb = mreq;
282 bpos = mtod(mb, caddr_t);
283 nfsm_fhtom(vp, v3);
284 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
285 *tl = txdr_unsigned(wmode);
286 nfsm_request(vp, NFSPROC_ACCESS, td, cred);
287 nfsm_postop_attr(vp, attrflag);
288 if (!error) {
289 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
290 rmode = fxdr_unsigned(u_int32_t, *tl);
291 mtx_lock(&np->n_mtx);
292 np->n_mode = rmode;
293 np->n_modeuid = cred->cr_uid;
294 np->n_modestamp = time_second;
295 mtx_unlock(&np->n_mtx);
296 }
297 m_freem(mrep);
298 nfsmout:
299 return (error);
300 }
301
302 /*
303 * nfs access vnode op.
304 * For nfs version 2, just return ok. File accesses may fail later.
305 * For nfs version 3, use the access rpc to check accessibility. If file modes
306 * are changed on the server, accesses might still fail later.
307 */
308 static int
309 nfs_access(struct vop_access_args *ap)
310 {
311 struct vnode *vp = ap->a_vp;
312 int error = 0;
313 u_int32_t mode, wmode;
314 int v3 = NFS_ISV3(vp);
315 struct nfsnode *np = VTONFS(vp);
316
317 /*
318 * Disallow write attempts on filesystems mounted read-only;
319 * unless the file is a socket, fifo, or a block or character
320 * device resident on the filesystem.
321 */
322 if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
323 switch (vp->v_type) {
324 case VREG:
325 case VDIR:
326 case VLNK:
327 return (EROFS);
328 default:
329 break;
330 }
331 }
332 /*
333 * For nfs v3, check to see if we have done this recently, and if
334 * so return our cached result instead of making an ACCESS call.
335 * If not, do an access rpc, otherwise you are stuck emulating
336 * ufs_access() locally using the vattr. This may not be correct,
337 * since the server may apply other access criteria such as
338 * client uid-->server uid mapping that we do not know about.
339 */
340 if (v3) {
341 if (ap->a_mode & VREAD)
342 mode = NFSV3ACCESS_READ;
343 else
344 mode = 0;
345 if (vp->v_type != VDIR) {
346 if (ap->a_mode & VWRITE)
347 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
348 if (ap->a_mode & VEXEC)
349 mode |= NFSV3ACCESS_EXECUTE;
350 } else {
351 if (ap->a_mode & VWRITE)
352 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
353 NFSV3ACCESS_DELETE);
354 if (ap->a_mode & VEXEC)
355 mode |= NFSV3ACCESS_LOOKUP;
356 }
357 /* XXX safety belt, only make blanket request if caching */
358 if (nfsaccess_cache_timeout > 0) {
359 wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
360 NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
361 NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
362 } else {
363 wmode = mode;
364 }
365
366 /*
367 * Does our cached result allow us to give a definite yes to
368 * this request?
369 */
370 mtx_lock(&np->n_mtx);
371 if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
372 (ap->a_cred->cr_uid == np->n_modeuid) &&
373 ((np->n_mode & mode) == mode)) {
374 nfsstats.accesscache_hits++;
375 } else {
376 /*
377 * Either a no, or a don't know. Go to the wire.
378 */
379 nfsstats.accesscache_misses++;
380 mtx_unlock(&np->n_mtx);
381 error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred);
382 mtx_lock(&np->n_mtx);
383 if (!error) {
384 if ((np->n_mode & mode) != mode) {
385 error = EACCES;
386 }
387 }
388 }
389 mtx_unlock(&np->n_mtx);
390 return (error);
391 } else {
392 if ((error = nfsspec_access(ap)) != 0) {
393 return (error);
394 }
395 /*
396 * Attempt to prevent a mapped root from accessing a file
397 * which it shouldn't. We try to read a byte from the file
398 * if the user is root and the file is not zero length.
399 * After calling nfsspec_access, we should have the correct
400 * file size cached.
401 */
402 mtx_lock(&np->n_mtx);
403 if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
404 && VTONFS(vp)->n_size > 0) {
405 struct iovec aiov;
406 struct uio auio;
407 char buf[1];
408
409 mtx_unlock(&np->n_mtx);
410 aiov.iov_base = buf;
411 aiov.iov_len = 1;
412 auio.uio_iov = &aiov;
413 auio.uio_iovcnt = 1;
414 auio.uio_offset = 0;
415 auio.uio_resid = 1;
416 auio.uio_segflg = UIO_SYSSPACE;
417 auio.uio_rw = UIO_READ;
418 auio.uio_td = ap->a_td;
419
420 if (vp->v_type == VREG)
421 error = nfs_readrpc(vp, &auio, ap->a_cred);
422 else if (vp->v_type == VDIR) {
423 char* bp;
424 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
425 aiov.iov_base = bp;
426 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
427 error = nfs_readdirrpc(vp, &auio, ap->a_cred);
428 free(bp, M_TEMP);
429 } else if (vp->v_type == VLNK)
430 error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
431 else
432 error = EACCES;
433 } else
434 mtx_unlock(&np->n_mtx);
435 return (error);
436 }
437 }
438
439 int nfs_otw_getattr_avoid = 0;
440
441 /*
442 * nfs open vnode op
443 * Check to see if the type is ok
444 * and that deletion is not in progress.
445 * For paged in text files, you will need to flush the page cache
446 * if consistency is lost.
447 */
448 /* ARGSUSED */
449 static int
450 nfs_open(struct vop_open_args *ap)
451 {
452 struct vnode *vp = ap->a_vp;
453 struct nfsnode *np = VTONFS(vp);
454 struct vattr vattr;
455 int error;
456 int fmode = ap->a_mode;
457
458 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
459 return (EOPNOTSUPP);
460
461 /*
462 * Get a valid lease. If cached data is stale, flush it.
463 */
464 mtx_lock(&np->n_mtx);
465 if (np->n_flag & NMODIFIED) {
466 mtx_unlock(&np->n_mtx);
467 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
468 if (error == EINTR || error == EIO)
469 return (error);
470 np->n_attrstamp = 0;
471 if (vp->v_type == VDIR)
472 np->n_direofoffset = 0;
473 error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);
474 if (error)
475 return (error);
476 mtx_lock(&np->n_mtx);
477 np->n_mtime = vattr.va_mtime;
478 mtx_unlock(&np->n_mtx);
479 } else {
480 struct thread *td = curthread;
481
482 if (np->n_ac_ts_syscalls != td->td_syscalls ||
483 np->n_ac_ts_tid != td->td_tid ||
484 td->td_proc == NULL ||
485 np->n_ac_ts_pid != td->td_proc->p_pid) {
486 np->n_attrstamp = 0;
487 }
488 mtx_unlock(&np->n_mtx);
489 error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);
490 if (error)
491 return (error);
492 mtx_lock(&np->n_mtx);
493 if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
494 if (vp->v_type == VDIR)
495 np->n_direofoffset = 0;
496 mtx_unlock(&np->n_mtx);
497 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
498 if (error == EINTR || error == EIO) {
499 return (error);
500 }
501 mtx_lock(&np->n_mtx);
502 np->n_mtime = vattr.va_mtime;
503 }
504 mtx_unlock(&np->n_mtx);
505 }
506 /*
507 * If the object has >= 1 O_DIRECT active opens, we disable caching.
508 */
509 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
510 if (np->n_directio_opens == 0) {
511 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
512 if (error)
513 return (error);
514 mtx_lock(&np->n_mtx);
515 np->n_flag |= NNONCACHE;
516 mtx_unlock(&np->n_mtx);
517 }
518 np->n_directio_opens++;
519 }
520 vnode_create_vobject(vp, vattr.va_size, ap->a_td);
521 return (0);
522 }
523
524 /*
525 * nfs close vnode op
526 * What an NFS client should do upon close after writing is a debatable issue.
527 * Most NFS clients push delayed writes to the server upon close, basically for
528 * two reasons:
529 * 1 - So that any write errors may be reported back to the client process
530 * doing the close system call. By far the two most likely errors are
531 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
532 * 2 - To put a worst case upper bound on cache inconsistency between
533 * multiple clients for the file.
534 * There is also a consistency problem for Version 2 of the protocol w.r.t.
535 * not being able to tell if other clients are writing a file concurrently,
536 * since there is no way of knowing if the changed modify time in the reply
537 * is only due to the write for this client.
538 * (NFS Version 3 provides weak cache consistency data in the reply that
539 * should be sufficient to detect and handle this case.)
540 *
541 * The current code does the following:
542 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
543 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
544 * or commit them (this satisfies 1 and 2 except for the
545 * case where the server crashes after this close but
546 * before the commit RPC, which is felt to be "good
547 * enough". Changing the last argument to nfs_flush() to
548 * a 1 would force a commit operation, if it is felt a
549 * commit is necessary now.
550 */
551 /* ARGSUSED */
552 static int
553 nfs_close(struct vop_close_args *ap)
554 {
555 struct vnode *vp = ap->a_vp;
556 struct nfsnode *np = VTONFS(vp);
557 int error = 0;
558 int fmode = ap->a_fflag;
559
560 if (vp->v_type == VREG) {
561 /*
562 * Examine and clean dirty pages, regardless of NMODIFIED.
563 * This closes a major hole in close-to-open consistency.
564 * We want to push out all dirty pages (and buffers) on
565 * close, regardless of whether they were dirtied by
566 * mmap'ed writes or via write().
567 */
568 if (nfs_clean_pages_on_close && vp->v_object) {
569 VM_OBJECT_LOCK(vp->v_object);
570 vm_object_page_clean(vp->v_object, 0, 0, 0);
571 VM_OBJECT_UNLOCK(vp->v_object);
572 }
573 mtx_lock(&np->n_mtx);
574 if (np->n_flag & NMODIFIED) {
575 mtx_unlock(&np->n_mtx);
576 if (NFS_ISV3(vp)) {
577 /*
578 * Under NFSv3 we have dirty buffers to dispose of. We
579 * must flush them to the NFS server. We have the option
580 * of waiting all the way through the commit rpc or just
581 * waiting for the initial write. The default is to only
582 * wait through the initial write so the data is in the
583 * server's cache, which is roughly similar to the state
584 * a standard disk subsystem leaves the file in on close().
585 *
586 * We cannot clear the NMODIFIED bit in np->n_flag due to
587 * potential races with other processes, and certainly
588 * cannot clear it if we don't commit.
589 */
590 int cm = nfsv3_commit_on_close ? 1 : 0;
591 error = nfs_flush(vp, MNT_WAIT, ap->a_td, cm);
592 /* np->n_flag &= ~NMODIFIED; */
593 } else
594 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
595 mtx_lock(&np->n_mtx);
596 }
597 /*
598 * Invalidate the attribute cache in all cases.
599 * An open is going to fetch fresh attrs any way, other procs
600 * on this node that have file open will be forced to do an
601 * otw attr fetch, but this is safe.
602 */
603 np->n_attrstamp = 0;
604 if (np->n_flag & NWRITEERR) {
605 np->n_flag &= ~NWRITEERR;
606 error = np->n_error;
607 }
608 mtx_unlock(&np->n_mtx);
609 }
610 if (nfs_directio_enable)
611 KASSERT((np->n_directio_asyncwr == 0),
612 ("nfs_close: dirty unflushed (%d) directio buffers\n",
613 np->n_directio_asyncwr));
614 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
615 mtx_lock(&np->n_mtx);
616 KASSERT((np->n_directio_opens > 0),
617 ("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
618 np->n_directio_opens--;
619 if (np->n_directio_opens == 0)
620 np->n_flag &= ~NNONCACHE;
621 mtx_unlock(&np->n_mtx);
622 }
623 return (error);
624 }
625
626 /*
627 * nfs getattr call from vfs.
628 */
629 static int
630 nfs_getattr(struct vop_getattr_args *ap)
631 {
632 struct vnode *vp = ap->a_vp;
633 struct nfsnode *np = VTONFS(vp);
634 caddr_t bpos, dpos;
635 int error = 0;
636 struct mbuf *mreq, *mrep, *md, *mb;
637 int v3 = NFS_ISV3(vp);
638
639 /*
640 * Update local times for special files.
641 */
642 mtx_lock(&np->n_mtx);
643 if (np->n_flag & (NACC | NUPD))
644 np->n_flag |= NCHG;
645 mtx_unlock(&np->n_mtx);
646 /*
647 * First look in the cache.
648 */
649 if (nfs_getattrcache(vp, ap->a_vap) == 0)
650 goto nfsmout;
651 if (v3 && nfsaccess_cache_timeout > 0) {
652 nfsstats.accesscache_misses++;
653 nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_td, ap->a_cred);
654 if (nfs_getattrcache(vp, ap->a_vap) == 0)
655 goto nfsmout;
656 }
657 nfsstats.rpccnt[NFSPROC_GETATTR]++;
658 mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
659 mb = mreq;
660 bpos = mtod(mb, caddr_t);
661 nfsm_fhtom(vp, v3);
662 nfsm_request(vp, NFSPROC_GETATTR, ap->a_td, ap->a_cred);
663 if (!error) {
664 nfsm_loadattr(vp, ap->a_vap);
665 }
666 m_freem(mrep);
667 nfsmout:
668 return (error);
669 }
670
671 /*
672 * nfs setattr call.
673 */
674 static int
675 nfs_setattr(struct vop_setattr_args *ap)
676 {
677 struct vnode *vp = ap->a_vp;
678 struct nfsnode *np = VTONFS(vp);
679 struct vattr *vap = ap->a_vap;
680 int error = 0;
681 u_quad_t tsize;
682
683 #ifndef nolint
684 tsize = (u_quad_t)0;
685 #endif
686
687 /*
688 * Setting of flags and marking of atimes are not supported.
689 */
690 if (vap->va_flags != VNOVAL || (vap->va_vaflags & VA_MARK_ATIME))
691 return (EOPNOTSUPP);
692
693 /*
694 * Disallow write attempts if the filesystem is mounted read-only.
695 */
696 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
697 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
698 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
699 (vp->v_mount->mnt_flag & MNT_RDONLY)) {
700 error = EROFS;
701 goto out;
702 }
703 if (vap->va_size != VNOVAL) {
704 switch (vp->v_type) {
705 case VDIR:
706 return (EISDIR);
707 case VCHR:
708 case VBLK:
709 case VSOCK:
710 case VFIFO:
711 if (vap->va_mtime.tv_sec == VNOVAL &&
712 vap->va_atime.tv_sec == VNOVAL &&
713 vap->va_mode == (mode_t)VNOVAL &&
714 vap->va_uid == (uid_t)VNOVAL &&
715 vap->va_gid == (gid_t)VNOVAL)
716 return (0);
717 vap->va_size = VNOVAL;
718 break;
719 default:
720 /*
721 * Disallow write attempts if the filesystem is
722 * mounted read-only.
723 */
724 if (vp->v_mount->mnt_flag & MNT_RDONLY)
725 return (EROFS);
726 /*
727 * We run vnode_pager_setsize() early (why?),
728 * we must set np->n_size now to avoid vinvalbuf
729 * V_SAVE races that might setsize a lower
730 * value.
731 */
732 mtx_lock(&np->n_mtx);
733 tsize = np->n_size;
734 mtx_unlock(&np->n_mtx);
735 error = nfs_meta_setsize(vp, ap->a_cred,
736 ap->a_td, vap->va_size);
737 mtx_lock(&np->n_mtx);
738 if (np->n_flag & NMODIFIED) {
739 tsize = np->n_size;
740 mtx_unlock(&np->n_mtx);
741 if (vap->va_size == 0)
742 error = nfs_vinvalbuf(vp, 0, ap->a_td, 1);
743 else
744 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
745 if (error) {
746 vnode_pager_setsize(vp, tsize);
747 goto out;
748 }
749 } else
750 mtx_unlock(&np->n_mtx);
751 /*
752 * np->n_size has already been set to vap->va_size
753 * in nfs_meta_setsize(). We must set it again since
754 * nfs_loadattrcache() could be called through
755 * nfs_meta_setsize() and could modify np->n_size.
756 */
757 mtx_lock(&np->n_mtx);
758 np->n_vattr.va_size = np->n_size = vap->va_size;
759 mtx_unlock(&np->n_mtx);
760 };
761 } else {
762 mtx_lock(&np->n_mtx);
763 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) &&
764 (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
765 mtx_unlock(&np->n_mtx);
766 if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1)) != 0 &&
767 (error == EINTR || error == EIO))
768 return error;
769 } else
770 mtx_unlock(&np->n_mtx);
771 }
772 error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_td);
773 if (error && vap->va_size != VNOVAL) {
774 mtx_lock(&np->n_mtx);
775 np->n_size = np->n_vattr.va_size = tsize;
776 vnode_pager_setsize(vp, tsize);
777 mtx_unlock(&np->n_mtx);
778 }
779 out:
780 return (error);
781 }
782
783 /*
784 * Do an nfs setattr rpc.
785 */
786 static int
787 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
788 struct thread *td)
789 {
790 struct nfsv2_sattr *sp;
791 struct nfsnode *np = VTONFS(vp);
792 caddr_t bpos, dpos;
793 u_int32_t *tl;
794 int error = 0, wccflag = NFSV3_WCCRATTR;
795 struct mbuf *mreq, *mrep, *md, *mb;
796 int v3 = NFS_ISV3(vp);
797
798 nfsstats.rpccnt[NFSPROC_SETATTR]++;
799 mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
800 mb = mreq;
801 bpos = mtod(mb, caddr_t);
802 nfsm_fhtom(vp, v3);
803 if (v3) {
804 nfsm_v3attrbuild(vap, TRUE);
805 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
806 *tl = nfs_false;
807 } else {
808 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
809 if (vap->va_mode == (mode_t)VNOVAL)
810 sp->sa_mode = nfs_xdrneg1;
811 else
812 sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
813 if (vap->va_uid == (uid_t)VNOVAL)
814 sp->sa_uid = nfs_xdrneg1;
815 else
816 sp->sa_uid = txdr_unsigned(vap->va_uid);
817 if (vap->va_gid == (gid_t)VNOVAL)
818 sp->sa_gid = nfs_xdrneg1;
819 else
820 sp->sa_gid = txdr_unsigned(vap->va_gid);
821 sp->sa_size = txdr_unsigned(vap->va_size);
822 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
823 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
824 }
825 nfsm_request(vp, NFSPROC_SETATTR, td, cred);
826 if (v3) {
827 np->n_modestamp = 0;
828 nfsm_wcc_data(vp, wccflag);
829 } else
830 nfsm_loadattr(vp, NULL);
831 m_freem(mrep);
832 nfsmout:
833 return (error);
834 }
835
836 /*
837 * nfs lookup call, one step at a time...
838 * First look in cache
839 * If not found, unlock the directory nfsnode and do the rpc
840 */
841 static int
842 nfs_lookup(struct vop_lookup_args *ap)
843 {
844 struct componentname *cnp = ap->a_cnp;
845 struct vnode *dvp = ap->a_dvp;
846 struct vnode **vpp = ap->a_vpp;
847 int flags = cnp->cn_flags;
848 struct vnode *newvp;
849 struct nfsmount *nmp;
850 caddr_t bpos, dpos;
851 struct mbuf *mreq, *mrep, *md, *mb;
852 long len;
853 nfsfh_t *fhp;
854 struct nfsnode *np;
855 int error = 0, attrflag, fhsize;
856 int v3 = NFS_ISV3(dvp);
857 struct thread *td = cnp->cn_thread;
858
859 *vpp = NULLVP;
860 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
861 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
862 return (EROFS);
863 if (dvp->v_type != VDIR)
864 return (ENOTDIR);
865 nmp = VFSTONFS(dvp->v_mount);
866 np = VTONFS(dvp);
867 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) {
868 *vpp = NULLVP;
869 return (error);
870 }
871 if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
872 struct vattr vattr;
873
874 newvp = *vpp;
875 if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, td)
876 && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
877 nfsstats.lookupcache_hits++;
878 if (cnp->cn_nameiop != LOOKUP &&
879 (flags & ISLASTCN))
880 cnp->cn_flags |= SAVENAME;
881 return (0);
882 }
883 cache_purge(newvp);
884 if (dvp != newvp)
885 vput(newvp);
886 else
887 vrele(newvp);
888 *vpp = NULLVP;
889 }
890 error = 0;
891 newvp = NULLVP;
892 nfsstats.lookupcache_misses++;
893 nfsstats.rpccnt[NFSPROC_LOOKUP]++;
894 len = cnp->cn_namelen;
895 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
896 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
897 mb = mreq;
898 bpos = mtod(mb, caddr_t);
899 nfsm_fhtom(dvp, v3);
900 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
901 nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred);
902 if (error) {
903 if (v3) {
904 nfsm_postop_attr(dvp, attrflag);
905 m_freem(mrep);
906 }
907 goto nfsmout;
908 }
909 nfsm_getfh(fhp, fhsize, v3);
910
911 /*
912 * Handle RENAME case...
913 */
914 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
915 if (NFS_CMPFH(np, fhp, fhsize)) {
916 m_freem(mrep);
917 return (EISDIR);
918 }
919 error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
920 if (error) {
921 m_freem(mrep);
922 return (error);
923 }
924 newvp = NFSTOV(np);
925 if (v3) {
926 nfsm_postop_attr(newvp, attrflag);
927 nfsm_postop_attr(dvp, attrflag);
928 } else
929 nfsm_loadattr(newvp, NULL);
930 *vpp = newvp;
931 m_freem(mrep);
932 cnp->cn_flags |= SAVENAME;
933 return (0);
934 }
935
936 if (flags & ISDOTDOT) {
937 VOP_UNLOCK(dvp, 0, td);
938 error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags);
939 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
940 if (error)
941 return (error);
942 newvp = NFSTOV(np);
943 } else if (NFS_CMPFH(np, fhp, fhsize)) {
944 VREF(dvp);
945 newvp = dvp;
946 } else {
947 error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags);
948 if (error) {
949 m_freem(mrep);
950 return (error);
951 }
952 newvp = NFSTOV(np);
953 }
954 if (v3) {
955 nfsm_postop_attr(newvp, attrflag);
956 nfsm_postop_attr(dvp, attrflag);
957 } else
958 nfsm_loadattr(newvp, NULL);
959 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
960 cnp->cn_flags |= SAVENAME;
961 if ((cnp->cn_flags & MAKEENTRY) &&
962 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
963 np->n_ctime = np->n_vattr.va_ctime.tv_sec;
964 cache_enter(dvp, newvp, cnp);
965 }
966 *vpp = newvp;
967 m_freem(mrep);
968 nfsmout:
969 if (error) {
970 if (newvp != NULLVP) {
971 vput(newvp);
972 *vpp = NULLVP;
973 }
974 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
975 (flags & ISLASTCN) && error == ENOENT) {
976 if (dvp->v_mount->mnt_flag & MNT_RDONLY)
977 error = EROFS;
978 else
979 error = EJUSTRETURN;
980 }
981 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
982 cnp->cn_flags |= SAVENAME;
983 }
984 return (error);
985 }
986
987 /*
988 * nfs read call.
989 * Just call nfs_bioread() to do the work.
990 */
991 static int
992 nfs_read(struct vop_read_args *ap)
993 {
994 struct vnode *vp = ap->a_vp;
995
996 switch (vp->v_type) {
997 case VREG:
998 return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
999 case VDIR:
1000 return (EISDIR);
1001 default:
1002 return (EOPNOTSUPP);
1003 }
1004 }
1005
1006 /*
1007 * nfs readlink call
1008 */
1009 static int
1010 nfs_readlink(struct vop_readlink_args *ap)
1011 {
1012 struct vnode *vp = ap->a_vp;
1013
1014 if (vp->v_type != VLNK)
1015 return (EINVAL);
1016 return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
1017 }
1018
1019 /*
1020 * Do a readlink rpc.
1021 * Called by nfs_doio() from below the buffer cache.
1022 */
1023 int
1024 nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
1025 {
1026 caddr_t bpos, dpos;
1027 int error = 0, len, attrflag;
1028 struct mbuf *mreq, *mrep, *md, *mb;
1029 int v3 = NFS_ISV3(vp);
1030
1031 nfsstats.rpccnt[NFSPROC_READLINK]++;
1032 mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
1033 mb = mreq;
1034 bpos = mtod(mb, caddr_t);
1035 nfsm_fhtom(vp, v3);
1036 nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred);
1037 if (v3)
1038 nfsm_postop_attr(vp, attrflag);
1039 if (!error) {
1040 nfsm_strsiz(len, NFS_MAXPATHLEN);
1041 if (len == NFS_MAXPATHLEN) {
1042 struct nfsnode *np = VTONFS(vp);
1043 mtx_lock(&np->n_mtx);
1044 if (np->n_size && np->n_size < NFS_MAXPATHLEN)
1045 len = np->n_size;
1046 mtx_unlock(&np->n_mtx);
1047 }
1048 nfsm_mtouio(uiop, len);
1049 }
1050 m_freem(mrep);
1051 nfsmout:
1052 return (error);
1053 }
1054
1055 /*
1056 * nfs read rpc call
1057 * Ditto above
1058 */
1059 int
1060 nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
1061 {
1062 u_int32_t *tl;
1063 caddr_t bpos, dpos;
1064 struct mbuf *mreq, *mrep, *md, *mb;
1065 struct nfsmount *nmp;
1066 int error = 0, len, retlen, tsiz, eof, attrflag;
1067 int v3 = NFS_ISV3(vp);
1068 int rsize;
1069
1070 #ifndef nolint
1071 eof = 0;
1072 #endif
1073 nmp = VFSTONFS(vp->v_mount);
1074 tsiz = uiop->uio_resid;
1075 mtx_lock(&nmp->nm_mtx);
1076 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
1077 mtx_unlock(&nmp->nm_mtx);
1078 return (EFBIG);
1079 }
1080 rsize = nmp->nm_rsize;
1081 mtx_unlock(&nmp->nm_mtx);
1082 while (tsiz > 0) {
1083 nfsstats.rpccnt[NFSPROC_READ]++;
1084 len = (tsiz > rsize) ? rsize : tsiz;
1085 mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
1086 mb = mreq;
1087 bpos = mtod(mb, caddr_t);
1088 nfsm_fhtom(vp, v3);
1089 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3);
1090 if (v3) {
1091 txdr_hyper(uiop->uio_offset, tl);
1092 *(tl + 2) = txdr_unsigned(len);
1093 } else {
1094 *tl++ = txdr_unsigned(uiop->uio_offset);
1095 *tl++ = txdr_unsigned(len);
1096 *tl = 0;
1097 }
1098 nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred);
1099 if (v3) {
1100 nfsm_postop_attr(vp, attrflag);
1101 if (error) {
1102 m_freem(mrep);
1103 goto nfsmout;
1104 }
1105 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
1106 eof = fxdr_unsigned(int, *(tl + 1));
1107 } else {
1108 nfsm_loadattr(vp, NULL);
1109 }
1110 nfsm_strsiz(retlen, rsize);
1111 nfsm_mtouio(uiop, retlen);
1112 m_freem(mrep);
1113 tsiz -= retlen;
1114 if (v3) {
1115 if (eof || retlen == 0) {
1116 tsiz = 0;
1117 }
1118 } else if (retlen < len) {
1119 tsiz = 0;
1120 }
1121 }
1122 nfsmout:
1123 return (error);
1124 }
1125
1126 /*
1127 * nfs write call
1128 */
1129 int
1130 nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
1131 int *iomode, int *must_commit)
1132 {
1133 u_int32_t *tl;
1134 int32_t backup;
1135 caddr_t bpos, dpos;
1136 struct mbuf *mreq, *mrep, *md, *mb;
1137 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1138 int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
1139 int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
1140 int wsize;
1141
1142 #ifndef DIAGNOSTIC
1143 if (uiop->uio_iovcnt != 1)
1144 panic("nfs: writerpc iovcnt > 1");
1145 #endif
1146 *must_commit = 0;
1147 tsiz = uiop->uio_resid;
1148 mtx_lock(&nmp->nm_mtx);
1149 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
1150 mtx_unlock(&nmp->nm_mtx);
1151 return (EFBIG);
1152 }
1153 wsize = nmp->nm_wsize;
1154 mtx_unlock(&nmp->nm_mtx);
1155 while (tsiz > 0) {
1156 nfsstats.rpccnt[NFSPROC_WRITE]++;
1157 len = (tsiz > wsize) ? wsize : tsiz;
1158 mreq = nfsm_reqhead(vp, NFSPROC_WRITE,
1159 NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
1160 mb = mreq;
1161 bpos = mtod(mb, caddr_t);
1162 nfsm_fhtom(vp, v3);
1163 if (v3) {
1164 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
1165 txdr_hyper(uiop->uio_offset, tl);
1166 tl += 2;
1167 *tl++ = txdr_unsigned(len);
1168 *tl++ = txdr_unsigned(*iomode);
1169 *tl = txdr_unsigned(len);
1170 } else {
1171 u_int32_t x;
1172
1173 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
1174 /* Set both "begin" and "current" to non-garbage. */
1175 x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1176 *tl++ = x; /* "begin offset" */
1177 *tl++ = x; /* "current offset" */
1178 x = txdr_unsigned(len);
1179 *tl++ = x; /* total to this offset */
1180 *tl = x; /* size of this write */
1181 }
1182 nfsm_uiotom(uiop, len);
1183 nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred);
1184 if (v3) {
1185 wccflag = NFSV3_WCCCHK;
1186 nfsm_wcc_data(vp, wccflag);
1187 if (!error) {
1188 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED
1189 + NFSX_V3WRITEVERF);
1190 rlen = fxdr_unsigned(int, *tl++);
1191 if (rlen == 0) {
1192 error = NFSERR_IO;
1193 m_freem(mrep);
1194 break;
1195 } else if (rlen < len) {
1196 backup = len - rlen;
1197 uiop->uio_iov->iov_base =
1198 (char *)uiop->uio_iov->iov_base -
1199 backup;
1200 uiop->uio_iov->iov_len += backup;
1201 uiop->uio_offset -= backup;
1202 uiop->uio_resid += backup;
1203 len = rlen;
1204 }
1205 commit = fxdr_unsigned(int, *tl++);
1206
1207 /*
1208 * Return the lowest committment level
1209 * obtained by any of the RPCs.
1210 */
1211 if (committed == NFSV3WRITE_FILESYNC)
1212 committed = commit;
1213 else if (committed == NFSV3WRITE_DATASYNC &&
1214 commit == NFSV3WRITE_UNSTABLE)
1215 committed = commit;
1216 mtx_lock(&nmp->nm_mtx);
1217 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
1218 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1219 NFSX_V3WRITEVERF);
1220 nmp->nm_state |= NFSSTA_HASWRITEVERF;
1221 } else if (bcmp((caddr_t)tl,
1222 (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
1223 *must_commit = 1;
1224 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1225 NFSX_V3WRITEVERF);
1226 }
1227 mtx_unlock(&nmp->nm_mtx);
1228 }
1229 } else {
1230 nfsm_loadattr(vp, NULL);
1231 }
1232 if (wccflag) {
1233 mtx_lock(&(VTONFS(vp))->n_mtx);
1234 VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime;
1235 mtx_unlock(&(VTONFS(vp))->n_mtx);
1236 }
1237 m_freem(mrep);
1238 if (error)
1239 break;
1240 tsiz -= len;
1241 }
1242 nfsmout:
1243 if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC)
1244 committed = NFSV3WRITE_FILESYNC;
1245 *iomode = committed;
1246 if (error)
1247 uiop->uio_resid = tsiz;
1248 return (error);
1249 }
1250
1251 /*
1252 * nfs mknod rpc
1253 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1254 * mode set to specify the file type and the size field for rdev.
1255 */
1256 static int
1257 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
1258 struct vattr *vap)
1259 {
1260 struct nfsv2_sattr *sp;
1261 u_int32_t *tl;
1262 struct vnode *newvp = NULL;
1263 struct nfsnode *np = NULL;
1264 struct vattr vattr;
1265 caddr_t bpos, dpos;
1266 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
1267 struct mbuf *mreq, *mrep, *md, *mb;
1268 u_int32_t rdev;
1269 int v3 = NFS_ISV3(dvp);
1270
1271 if (vap->va_type == VCHR || vap->va_type == VBLK)
1272 rdev = txdr_unsigned(vap->va_rdev);
1273 else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
1274 rdev = nfs_xdrneg1;
1275 else {
1276 return (EOPNOTSUPP);
1277 }
1278 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) {
1279 return (error);
1280 }
1281 nfsstats.rpccnt[NFSPROC_MKNOD]++;
1282 mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
1283 + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1284 mb = mreq;
1285 bpos = mtod(mb, caddr_t);
1286 nfsm_fhtom(dvp, v3);
1287 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1288 if (v3) {
1289 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
1290 *tl++ = vtonfsv3_type(vap->va_type);
1291 nfsm_v3attrbuild(vap, FALSE);
1292 if (vap->va_type == VCHR || vap->va_type == VBLK) {
1293 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
1294 *tl++ = txdr_unsigned(umajor(vap->va_rdev));
1295 *tl = txdr_unsigned(uminor(vap->va_rdev));
1296 }
1297 } else {
1298 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1299 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1300 sp->sa_uid = nfs_xdrneg1;
1301 sp->sa_gid = nfs_xdrneg1;
1302 sp->sa_size = rdev;
1303 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1304 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1305 }
1306 nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred);
1307 if (!error) {
1308 nfsm_mtofh(dvp, newvp, v3, gotvp);
1309 if (!gotvp) {
1310 if (newvp) {
1311 vput(newvp);
1312 newvp = NULL;
1313 }
1314 error = nfs_lookitup(dvp, cnp->cn_nameptr,
1315 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
1316 if (!error)
1317 newvp = NFSTOV(np);
1318 }
1319 }
1320 if (v3)
1321 nfsm_wcc_data(dvp, wccflag);
1322 m_freem(mrep);
1323 nfsmout:
1324 if (error) {
1325 if (newvp)
1326 vput(newvp);
1327 } else {
1328 if (cnp->cn_flags & MAKEENTRY)
1329 cache_enter(dvp, newvp, cnp);
1330 *vpp = newvp;
1331 }
1332 mtx_lock(&(VTONFS(dvp))->n_mtx);
1333 VTONFS(dvp)->n_flag |= NMODIFIED;
1334 if (!wccflag)
1335 VTONFS(dvp)->n_attrstamp = 0;
1336 mtx_unlock(&(VTONFS(dvp))->n_mtx);
1337 return (error);
1338 }
1339
1340 /*
1341 * nfs mknod vop
1342 * just call nfs_mknodrpc() to do the work.
1343 */
1344 /* ARGSUSED */
1345 static int
1346 nfs_mknod(struct vop_mknod_args *ap)
1347 {
1348 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
1349 }
1350
1351 static u_long create_verf;
1352 /*
1353 * nfs file create call
1354 */
1355 static int
1356 nfs_create(struct vop_create_args *ap)
1357 {
1358 struct vnode *dvp = ap->a_dvp;
1359 struct vattr *vap = ap->a_vap;
1360 struct componentname *cnp = ap->a_cnp;
1361 struct nfsv2_sattr *sp;
1362 u_int32_t *tl;
1363 struct nfsnode *np = NULL;
1364 struct vnode *newvp = NULL;
1365 caddr_t bpos, dpos;
1366 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
1367 struct mbuf *mreq, *mrep, *md, *mb;
1368 struct vattr vattr;
1369 int v3 = NFS_ISV3(dvp);
1370
1371 /*
1372 * Oops, not for me..
1373 */
1374 if (vap->va_type == VSOCK)
1375 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
1376
1377 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) {
1378 return (error);
1379 }
1380 if (vap->va_vaflags & VA_EXCLUSIVE)
1381 fmode |= O_EXCL;
1382 again:
1383 nfsstats.rpccnt[NFSPROC_CREATE]++;
1384 mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
1385 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1386 mb = mreq;
1387 bpos = mtod(mb, caddr_t);
1388 nfsm_fhtom(dvp, v3);
1389 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1390 if (v3) {
1391 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
1392 if (fmode & O_EXCL) {
1393 *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
1394 tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF);
1395 #ifdef INET
1396 if (!TAILQ_EMPTY(&in_ifaddrhead))
1397 *tl++ = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr.s_addr;
1398 else
1399 #endif
1400 *tl++ = create_verf;
1401 *tl = ++create_verf;
1402 } else {
1403 *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
1404 nfsm_v3attrbuild(vap, FALSE);
1405 }
1406 } else {
1407 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1408 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1409 sp->sa_uid = nfs_xdrneg1;
1410 sp->sa_gid = nfs_xdrneg1;
1411 sp->sa_size = 0;
1412 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1413 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1414 }
1415 nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred);
1416 if (!error) {
1417 nfsm_mtofh(dvp, newvp, v3, gotvp);
1418 if (!gotvp) {
1419 if (newvp) {
1420 vput(newvp);
1421 newvp = NULL;
1422 }
1423 error = nfs_lookitup(dvp, cnp->cn_nameptr,
1424 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
1425 if (!error)
1426 newvp = NFSTOV(np);
1427 }
1428 }
1429 if (v3)
1430 nfsm_wcc_data(dvp, wccflag);
1431 m_freem(mrep);
1432 nfsmout:
1433 if (error) {
1434 if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
1435 fmode &= ~O_EXCL;
1436 goto again;
1437 }
1438 if (newvp)
1439 vput(newvp);
1440 } else if (v3 && (fmode & O_EXCL)) {
1441 /*
1442 * We are normally called with only a partially initialized
1443 * VAP. Since the NFSv3 spec says that server may use the
1444 * file attributes to store the verifier, the spec requires
1445 * us to do a SETATTR RPC. FreeBSD servers store the verifier
1446 * in atime, but we can't really assume that all servers will
1447 * so we ensure that our SETATTR sets both atime and mtime.
1448 */
1449 if (vap->va_mtime.tv_sec == VNOVAL)
1450 vfs_timestamp(&vap->va_mtime);
1451 if (vap->va_atime.tv_sec == VNOVAL)
1452 vap->va_atime = vap->va_mtime;
1453 error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_thread);
1454 if (error)
1455 vput(newvp);
1456 }
1457 if (!error) {
1458 if (cnp->cn_flags & MAKEENTRY)
1459 cache_enter(dvp, newvp, cnp);
1460 *ap->a_vpp = newvp;
1461 }
1462 mtx_lock(&(VTONFS(dvp))->n_mtx);
1463 VTONFS(dvp)->n_flag |= NMODIFIED;
1464 if (!wccflag)
1465 VTONFS(dvp)->n_attrstamp = 0;
1466 mtx_unlock(&(VTONFS(dvp))->n_mtx);
1467 return (error);
1468 }
1469
1470 /*
1471 * nfs file remove call
1472 * To try and make nfs semantics closer to ufs semantics, a file that has
1473 * other processes using the vnode is renamed instead of removed and then
1474 * removed later on the last close.
1475 * - If v_usecount > 1
1476 * If a rename is not already in the works
1477 * call nfs_sillyrename() to set it up
1478 * else
1479 * do the remove rpc
1480 */
1481 static int
1482 nfs_remove(struct vop_remove_args *ap)
1483 {
1484 struct vnode *vp = ap->a_vp;
1485 struct vnode *dvp = ap->a_dvp;
1486 struct componentname *cnp = ap->a_cnp;
1487 struct nfsnode *np = VTONFS(vp);
1488 int error = 0;
1489 struct vattr vattr;
1490
1491 #ifndef DIAGNOSTIC
1492 if ((cnp->cn_flags & HASBUF) == 0)
1493 panic("nfs_remove: no name");
1494 if (vrefcnt(vp) < 1)
1495 panic("nfs_remove: bad v_usecount");
1496 #endif
1497 if (vp->v_type == VDIR)
1498 error = EPERM;
1499 else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
1500 VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_thread) == 0 &&
1501 vattr.va_nlink > 1)) {
1502 /*
1503 * Purge the name cache so that the chance of a lookup for
1504 * the name succeeding while the remove is in progress is
1505 * minimized. Without node locking it can still happen, such
1506 * that an I/O op returns ESTALE, but since you get this if
1507 * another host removes the file..
1508 */
1509 cache_purge(vp);
1510 /*
1511 * throw away biocache buffers, mainly to avoid
1512 * unnecessary delayed writes later.
1513 */
1514 error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1);
1515 /* Do the rpc */
1516 if (error != EINTR && error != EIO)
1517 error = nfs_removerpc(dvp, cnp->cn_nameptr,
1518 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
1519 /*
1520 * Kludge City: If the first reply to the remove rpc is lost..
1521 * the reply to the retransmitted request will be ENOENT
1522 * since the file was in fact removed
1523 * Therefore, we cheat and return success.
1524 */
1525 if (error == ENOENT)
1526 error = 0;
1527 } else if (!np->n_sillyrename)
1528 error = nfs_sillyrename(dvp, vp, cnp);
1529 np->n_attrstamp = 0;
1530 return (error);
1531 }
1532
1533 /*
1534 * nfs file remove rpc called from nfs_inactive
1535 */
1536 int
1537 nfs_removeit(struct sillyrename *sp)
1538 {
1539 /*
1540 * Make sure that the directory vnode is still valid.
1541 * XXX we should lock sp->s_dvp here.
1542 */
1543 if (sp->s_dvp->v_type == VBAD)
1544 return (0);
1545 return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
1546 NULL));
1547 }
1548
1549 /*
1550 * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
1551 */
1552 static int
1553 nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
1554 struct ucred *cred, struct thread *td)
1555 {
1556 caddr_t bpos, dpos;
1557 int error = 0, wccflag = NFSV3_WCCRATTR;
1558 struct mbuf *mreq, *mrep, *md, *mb;
1559 int v3 = NFS_ISV3(dvp);
1560
1561 nfsstats.rpccnt[NFSPROC_REMOVE]++;
1562 mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE,
1563 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
1564 mb = mreq;
1565 bpos = mtod(mb, caddr_t);
1566 nfsm_fhtom(dvp, v3);
1567 nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
1568 nfsm_request(dvp, NFSPROC_REMOVE, td, cred);
1569 if (v3)
1570 nfsm_wcc_data(dvp, wccflag);
1571 m_freem(mrep);
1572 nfsmout:
1573 mtx_lock(&(VTONFS(dvp))->n_mtx);
1574 VTONFS(dvp)->n_flag |= NMODIFIED;
1575 if (!wccflag)
1576 VTONFS(dvp)->n_attrstamp = 0;
1577 mtx_unlock(&(VTONFS(dvp))->n_mtx);
1578 return (error);
1579 }
1580
1581 /*
1582 * nfs file rename call
1583 */
1584 static int
1585 nfs_rename(struct vop_rename_args *ap)
1586 {
1587 struct vnode *fvp = ap->a_fvp;
1588 struct vnode *tvp = ap->a_tvp;
1589 struct vnode *fdvp = ap->a_fdvp;
1590 struct vnode *tdvp = ap->a_tdvp;
1591 struct componentname *tcnp = ap->a_tcnp;
1592 struct componentname *fcnp = ap->a_fcnp;
1593 int error;
1594
1595 #ifndef DIAGNOSTIC
1596 if ((tcnp->cn_flags & HASBUF) == 0 ||
1597 (fcnp->cn_flags & HASBUF) == 0)
1598 panic("nfs_rename: no name");
1599 #endif
1600 /* Check for cross-device rename */
1601 if ((fvp->v_mount != tdvp->v_mount) ||
1602 (tvp && (fvp->v_mount != tvp->v_mount))) {
1603 error = EXDEV;
1604 goto out;
1605 }
1606
1607 if (fvp == tvp) {
1608 nfs_printf("nfs_rename: fvp == tvp (can't happen)\n");
1609 error = 0;
1610 goto out;
1611 }
1612 if ((error = vn_lock(fvp, LK_EXCLUSIVE, fcnp->cn_thread)) != 0)
1613 goto out;
1614
1615 /*
1616 * We have to flush B_DELWRI data prior to renaming
1617 * the file. If we don't, the delayed-write buffers
1618 * can be flushed out later after the file has gone stale
1619 * under NFSV3. NFSV2 does not have this problem because
1620 * ( as far as I can tell ) it flushes dirty buffers more
1621 * often.
1622 *
1623 * Skip the rename operation if the fsync fails, this can happen
1624 * due to the server's volume being full, when we pushed out data
1625 * that was written back to our cache earlier. Not checking for
1626 * this condition can result in potential (silent) data loss.
1627 */
1628 error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread);
1629 VOP_UNLOCK(fvp, 0, fcnp->cn_thread);
1630 if (!error && tvp)
1631 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread);
1632 if (error)
1633 goto out;
1634
1635 /*
1636 * If the tvp exists and is in use, sillyrename it before doing the
1637 * rename of the new file over it.
1638 * XXX Can't sillyrename a directory.
1639 */
1640 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
1641 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
1642 vput(tvp);
1643 tvp = NULL;
1644 }
1645
1646 error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
1647 tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
1648 tcnp->cn_thread);
1649
1650 if (fvp->v_type == VDIR) {
1651 if (tvp != NULL && tvp->v_type == VDIR)
1652 cache_purge(tdvp);
1653 cache_purge(fdvp);
1654 }
1655
1656 out:
1657 if (tdvp == tvp)
1658 vrele(tdvp);
1659 else
1660 vput(tdvp);
1661 if (tvp)
1662 vput(tvp);
1663 vrele(fdvp);
1664 vrele(fvp);
1665 /*
1666 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
1667 */
1668 if (error == ENOENT)
1669 error = 0;
1670 return (error);
1671 }
1672
1673 /*
1674 * nfs file rename rpc called from nfs_remove() above
1675 */
1676 static int
1677 nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
1678 struct sillyrename *sp)
1679 {
1680
1681 return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp,
1682 sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread));
1683 }
1684
1685 /*
1686 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
1687 */
1688 static int
1689 nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen,
1690 struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred,
1691 struct thread *td)
1692 {
1693 caddr_t bpos, dpos;
1694 int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
1695 struct mbuf *mreq, *mrep, *md, *mb;
1696 int v3 = NFS_ISV3(fdvp);
1697
1698 nfsstats.rpccnt[NFSPROC_RENAME]++;
1699 mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME,
1700 (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
1701 nfsm_rndup(tnamelen));
1702 mb = mreq;
1703 bpos = mtod(mb, caddr_t);
1704 nfsm_fhtom(fdvp, v3);
1705 nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
1706 nfsm_fhtom(tdvp, v3);
1707 nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
1708 nfsm_request(fdvp, NFSPROC_RENAME, td, cred);
1709 if (v3) {
1710 nfsm_wcc_data(fdvp, fwccflag);
1711 nfsm_wcc_data(tdvp, twccflag);
1712 }
1713 m_freem(mrep);
1714 nfsmout:
1715 mtx_lock(&(VTONFS(fdvp))->n_mtx);
1716 VTONFS(fdvp)->n_flag |= NMODIFIED;
1717 mtx_unlock(&(VTONFS(fdvp))->n_mtx);
1718 mtx_lock(&(VTONFS(tdvp))->n_mtx);
1719 VTONFS(tdvp)->n_flag |= NMODIFIED;
1720 mtx_unlock(&(VTONFS(tdvp))->n_mtx);
1721 if (!fwccflag)
1722 VTONFS(fdvp)->n_attrstamp = 0;
1723 if (!twccflag)
1724 VTONFS(tdvp)->n_attrstamp = 0;
1725 return (error);
1726 }
1727
1728 /*
1729 * nfs hard link create call
1730 */
1731 static int
1732 nfs_link(struct vop_link_args *ap)
1733 {
1734 struct vnode *vp = ap->a_vp;
1735 struct vnode *tdvp = ap->a_tdvp;
1736 struct componentname *cnp = ap->a_cnp;
1737 caddr_t bpos, dpos;
1738 int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
1739 struct mbuf *mreq, *mrep, *md, *mb;
1740 int v3;
1741
1742 if (vp->v_mount != tdvp->v_mount) {
1743 return (EXDEV);
1744 }
1745
1746 /*
1747 * Push all writes to the server, so that the attribute cache
1748 * doesn't get "out of sync" with the server.
1749 * XXX There should be a better way!
1750 */
1751 VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread);
1752
1753 v3 = NFS_ISV3(vp);
1754 nfsstats.rpccnt[NFSPROC_LINK]++;
1755 mreq = nfsm_reqhead(vp, NFSPROC_LINK,
1756 NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1757 mb = mreq;
1758 bpos = mtod(mb, caddr_t);
1759 nfsm_fhtom(vp, v3);
1760 nfsm_fhtom(tdvp, v3);
1761 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1762 nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred);
1763 if (v3) {
1764 nfsm_postop_attr(vp, attrflag);
1765 nfsm_wcc_data(tdvp, wccflag);
1766 }
1767 m_freem(mrep);
1768 nfsmout:
1769 mtx_lock(&(VTONFS(tdvp))->n_mtx);
1770 VTONFS(tdvp)->n_flag |= NMODIFIED;
1771 mtx_unlock(&(VTONFS(tdvp))->n_mtx);
1772 if (!attrflag)
1773 VTONFS(vp)->n_attrstamp = 0;
1774 if (!wccflag)
1775 VTONFS(tdvp)->n_attrstamp = 0;
1776 return (error);
1777 }
1778
1779 /*
1780 * nfs symbolic link create call
1781 */
1782 static int
1783 nfs_symlink(struct vop_symlink_args *ap)
1784 {
1785 struct vnode *dvp = ap->a_dvp;
1786 struct vattr *vap = ap->a_vap;
1787 struct componentname *cnp = ap->a_cnp;
1788 struct nfsv2_sattr *sp;
1789 caddr_t bpos, dpos;
1790 int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
1791 struct mbuf *mreq, *mrep, *md, *mb;
1792 struct vnode *newvp = NULL;
1793 int v3 = NFS_ISV3(dvp);
1794
1795 nfsstats.rpccnt[NFSPROC_SYMLINK]++;
1796 slen = strlen(ap->a_target);
1797 mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
1798 nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
1799 mb = mreq;
1800 bpos = mtod(mb, caddr_t);
1801 nfsm_fhtom(dvp, v3);
1802 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1803 if (v3) {
1804 nfsm_v3attrbuild(vap, FALSE);
1805 }
1806 nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
1807 if (!v3) {
1808 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1809 sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
1810 sp->sa_uid = nfs_xdrneg1;
1811 sp->sa_gid = nfs_xdrneg1;
1812 sp->sa_size = nfs_xdrneg1;
1813 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1814 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1815 }
1816
1817 /*
1818 * Issue the NFS request and get the rpc response.
1819 *
1820 * Only NFSv3 responses returning an error of 0 actually return
1821 * a file handle that can be converted into newvp without having
1822 * to do an extra lookup rpc.
1823 */
1824 nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred);
1825 if (v3) {
1826 if (error == 0)
1827 nfsm_mtofh(dvp, newvp, v3, gotvp);
1828 nfsm_wcc_data(dvp, wccflag);
1829 }
1830
1831 /*
1832 * out code jumps -> here, mrep is also freed.
1833 */
1834
1835 m_freem(mrep);
1836 nfsmout:
1837
1838 /*
1839 * If we do not have an error and we could not extract the newvp from
1840 * the response due to the request being NFSv2, we have to do a
1841 * lookup in order to obtain a newvp to return.
1842 */
1843 if (error == 0 && newvp == NULL) {
1844 struct nfsnode *np = NULL;
1845
1846 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
1847 cnp->cn_cred, cnp->cn_thread, &np);
1848 if (!error)
1849 newvp = NFSTOV(np);
1850 }
1851 if (error) {
1852 if (newvp)
1853 vput(newvp);
1854 } else {
1855 *ap->a_vpp = newvp;
1856 }
1857 mtx_lock(&(VTONFS(dvp))->n_mtx);
1858 VTONFS(dvp)->n_flag |= NMODIFIED;
1859 mtx_unlock(&(VTONFS(dvp))->n_mtx);
1860 if (!wccflag)
1861 VTONFS(dvp)->n_attrstamp = 0;
1862 return (error);
1863 }
1864
1865 /*
1866 * nfs make dir call
1867 */
1868 static int
1869 nfs_mkdir(struct vop_mkdir_args *ap)
1870 {
1871 struct vnode *dvp = ap->a_dvp;
1872 struct vattr *vap = ap->a_vap;
1873 struct componentname *cnp = ap->a_cnp;
1874 struct nfsv2_sattr *sp;
1875 int len;
1876 struct nfsnode *np = NULL;
1877 struct vnode *newvp = NULL;
1878 caddr_t bpos, dpos;
1879 int error = 0, wccflag = NFSV3_WCCRATTR;
1880 int gotvp = 0;
1881 struct mbuf *mreq, *mrep, *md, *mb;
1882 struct vattr vattr;
1883 int v3 = NFS_ISV3(dvp);
1884
1885 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) {
1886 return (error);
1887 }
1888 len = cnp->cn_namelen;
1889 nfsstats.rpccnt[NFSPROC_MKDIR]++;
1890 mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR,
1891 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
1892 mb = mreq;
1893 bpos = mtod(mb, caddr_t);
1894 nfsm_fhtom(dvp, v3);
1895 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
1896 if (v3) {
1897 nfsm_v3attrbuild(vap, FALSE);
1898 } else {
1899 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1900 sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
1901 sp->sa_uid = nfs_xdrneg1;
1902 sp->sa_gid = nfs_xdrneg1;
1903 sp->sa_size = nfs_xdrneg1;
1904 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1905 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1906 }
1907 nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred);
1908 if (!error)
1909 nfsm_mtofh(dvp, newvp, v3, gotvp);
1910 if (v3)
1911 nfsm_wcc_data(dvp, wccflag);
1912 m_freem(mrep);
1913 nfsmout:
1914 mtx_lock(&(VTONFS(dvp))->n_mtx);
1915 VTONFS(dvp)->n_flag |= NMODIFIED;
1916 mtx_unlock(&(VTONFS(dvp))->n_mtx);
1917 if (!wccflag)
1918 VTONFS(dvp)->n_attrstamp = 0;
1919 if (error == 0 && newvp == NULL) {
1920 error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
1921 cnp->cn_thread, &np);
1922 if (!error) {
1923 newvp = NFSTOV(np);
1924 if (newvp->v_type != VDIR)
1925 error = EEXIST;
1926 }
1927 }
1928 if (error) {
1929 if (newvp)
1930 vput(newvp);
1931 } else
1932 *ap->a_vpp = newvp;
1933 return (error);
1934 }
1935
1936 /*
1937 * nfs remove directory call
1938 */
1939 static int
1940 nfs_rmdir(struct vop_rmdir_args *ap)
1941 {
1942 struct vnode *vp = ap->a_vp;
1943 struct vnode *dvp = ap->a_dvp;
1944 struct componentname *cnp = ap->a_cnp;
1945 caddr_t bpos, dpos;
1946 int error = 0, wccflag = NFSV3_WCCRATTR;
1947 struct mbuf *mreq, *mrep, *md, *mb;
1948 int v3 = NFS_ISV3(dvp);
1949
1950 if (dvp == vp)
1951 return (EINVAL);
1952 nfsstats.rpccnt[NFSPROC_RMDIR]++;
1953 mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR,
1954 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1955 mb = mreq;
1956 bpos = mtod(mb, caddr_t);
1957 nfsm_fhtom(dvp, v3);
1958 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1959 nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred);
1960 if (v3)
1961 nfsm_wcc_data(dvp, wccflag);
1962 m_freem(mrep);
1963 nfsmout:
1964 mtx_lock(&(VTONFS(dvp))->n_mtx);
1965 VTONFS(dvp)->n_flag |= NMODIFIED;
1966 mtx_unlock(&(VTONFS(dvp))->n_mtx);
1967 if (!wccflag)
1968 VTONFS(dvp)->n_attrstamp = 0;
1969 cache_purge(dvp);
1970 cache_purge(vp);
1971 /*
1972 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
1973 */
1974 if (error == ENOENT)
1975 error = 0;
1976 return (error);
1977 }
1978
1979 /*
1980 * nfs readdir call
1981 */
1982 static int
1983 nfs_readdir(struct vop_readdir_args *ap)
1984 {
1985 struct vnode *vp = ap->a_vp;
1986 struct nfsnode *np = VTONFS(vp);
1987 struct uio *uio = ap->a_uio;
1988 int tresid, error = 0;
1989 struct vattr vattr;
1990
1991 if (vp->v_type != VDIR)
1992 return(EPERM);
1993
1994 /*
1995 * First, check for hit on the EOF offset cache
1996 */
1997 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
1998 (np->n_flag & NMODIFIED) == 0) {
1999 if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_td) == 0) {
2000 mtx_lock(&np->n_mtx);
2001 if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
2002 mtx_unlock(&np->n_mtx);
2003 nfsstats.direofcache_hits++;
2004 goto out;
2005 } else
2006 mtx_unlock(&np->n_mtx);
2007 }
2008 }
2009
2010 /*
2011 * Call nfs_bioread() to do the real work.
2012 */
2013 tresid = uio->uio_resid;
2014 error = nfs_bioread(vp, uio, 0, ap->a_cred);
2015
2016 if (!error && uio->uio_resid == tresid) {
2017 nfsstats.direofcache_misses++;
2018 }
2019 out:
2020 return (error);
2021 }
2022
2023 /*
2024 * Readdir rpc call.
2025 * Called from below the buffer cache by nfs_doio().
2026 */
2027 int
2028 nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
2029 {
2030 int len, left;
2031 struct dirent *dp = NULL;
2032 u_int32_t *tl;
2033 caddr_t cp;
2034 nfsuint64 *cookiep;
2035 caddr_t bpos, dpos;
2036 struct mbuf *mreq, *mrep, *md, *mb;
2037 nfsuint64 cookie;
2038 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2039 struct nfsnode *dnp = VTONFS(vp);
2040 u_quad_t fileno;
2041 int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2042 int attrflag;
2043 int v3 = NFS_ISV3(vp);
2044
2045 #ifndef DIAGNOSTIC
2046 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2047 (uiop->uio_resid & (DIRBLKSIZ - 1)))
2048 panic("nfs readdirrpc bad uio");
2049 #endif
2050
2051 /*
2052 * If there is no cookie, assume directory was stale.
2053 */
2054 nfs_dircookie_lock(dnp);
2055 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2056 if (cookiep) {
2057 cookie = *cookiep;
2058 nfs_dircookie_unlock(dnp);
2059 } else {
2060 nfs_dircookie_unlock(dnp);
2061 return (NFSERR_BAD_COOKIE);
2062 }
2063
2064 /*
2065 * Loop around doing readdir rpc's of size nm_readdirsize
2066 * truncated to a multiple of DIRBLKSIZ.
2067 * The stopping criteria is EOF or buffer full.
2068 */
2069 while (more_dirs && bigenough) {
2070 nfsstats.rpccnt[NFSPROC_READDIR]++;
2071 mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
2072 NFSX_READDIR(v3));
2073 mb = mreq;
2074 bpos = mtod(mb, caddr_t);
2075 nfsm_fhtom(vp, v3);
2076 if (v3) {
2077 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
2078 *tl++ = cookie.nfsuquad[0];
2079 *tl++ = cookie.nfsuquad[1];
2080 mtx_lock(&dnp->n_mtx);
2081 *tl++ = dnp->n_cookieverf.nfsuquad[0];
2082 *tl++ = dnp->n_cookieverf.nfsuquad[1];
2083 mtx_unlock(&dnp->n_mtx);
2084 } else {
2085 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
2086 *tl++ = cookie.nfsuquad[0];
2087 }
2088 *tl = txdr_unsigned(nmp->nm_readdirsize);
2089 nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred);
2090 if (v3) {
2091 nfsm_postop_attr(vp, attrflag);
2092 if (!error) {
2093 tl = nfsm_dissect(u_int32_t *,
2094 2 * NFSX_UNSIGNED);
2095 mtx_lock(&dnp->n_mtx);
2096 dnp->n_cookieverf.nfsuquad[0] = *tl++;
2097 dnp->n_cookieverf.nfsuquad[1] = *tl;
2098 mtx_unlock(&dnp->n_mtx);
2099 } else {
2100 m_freem(mrep);
2101 goto nfsmout;
2102 }
2103 }
2104 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2105 more_dirs = fxdr_unsigned(int, *tl);
2106
2107 /* loop thru the dir entries, doctoring them to 4bsd form */
2108 while (more_dirs && bigenough) {
2109 if (v3) {
2110 tl = nfsm_dissect(u_int32_t *,
2111 3 * NFSX_UNSIGNED);
2112 fileno = fxdr_hyper(tl);
2113 len = fxdr_unsigned(int, *(tl + 2));
2114 } else {
2115 tl = nfsm_dissect(u_int32_t *,
2116 2 * NFSX_UNSIGNED);
2117 fileno = fxdr_unsigned(u_quad_t, *tl++);
2118 len = fxdr_unsigned(int, *tl);
2119 }
2120 if (len <= 0 || len > NFS_MAXNAMLEN) {
2121 error = EBADRPC;
2122 m_freem(mrep);
2123 goto nfsmout;
2124 }
2125 tlen = nfsm_rndup(len);
2126 if (tlen == len)
2127 tlen += 4; /* To ensure null termination */
2128 left = DIRBLKSIZ - blksiz;
2129 if ((tlen + DIRHDSIZ) > left) {
2130 dp->d_reclen += left;
2131 uiop->uio_iov->iov_base =
2132 (char *)uiop->uio_iov->iov_base + left;
2133 uiop->uio_iov->iov_len -= left;
2134 uiop->uio_offset += left;
2135 uiop->uio_resid -= left;
2136 blksiz = 0;
2137 }
2138 if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2139 bigenough = 0;
2140 if (bigenough) {
2141 dp = (struct dirent *)uiop->uio_iov->iov_base;
2142 dp->d_fileno = (int)fileno;
2143 dp->d_namlen = len;
2144 dp->d_reclen = tlen + DIRHDSIZ;
2145 dp->d_type = DT_UNKNOWN;
2146 blksiz += dp->d_reclen;
2147 if (blksiz == DIRBLKSIZ)
2148 blksiz = 0;
2149 uiop->uio_offset += DIRHDSIZ;
2150 uiop->uio_resid -= DIRHDSIZ;
2151 uiop->uio_iov->iov_base =
2152 (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
2153 uiop->uio_iov->iov_len -= DIRHDSIZ;
2154 nfsm_mtouio(uiop, len);
2155 cp = uiop->uio_iov->iov_base;
2156 tlen -= len;
2157 *cp = '\0'; /* null terminate */
2158 uiop->uio_iov->iov_base =
2159 (char *)uiop->uio_iov->iov_base + tlen;
2160 uiop->uio_iov->iov_len -= tlen;
2161 uiop->uio_offset += tlen;
2162 uiop->uio_resid -= tlen;
2163 } else
2164 nfsm_adv(nfsm_rndup(len));
2165 if (v3) {
2166 tl = nfsm_dissect(u_int32_t *,
2167 3 * NFSX_UNSIGNED);
2168 } else {
2169 tl = nfsm_dissect(u_int32_t *,
2170 2 * NFSX_UNSIGNED);
2171 }
2172 if (bigenough) {
2173 cookie.nfsuquad[0] = *tl++;
2174 if (v3)
2175 cookie.nfsuquad[1] = *tl++;
2176 } else if (v3)
2177 tl += 2;
2178 else
2179 tl++;
2180 more_dirs = fxdr_unsigned(int, *tl);
2181 }
2182 /*
2183 * If at end of rpc data, get the eof boolean
2184 */
2185 if (!more_dirs) {
2186 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2187 more_dirs = (fxdr_unsigned(int, *tl) == 0);
2188 }
2189 m_freem(mrep);
2190 }
2191 /*
2192 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2193 * by increasing d_reclen for the last record.
2194 */
2195 if (blksiz > 0) {
2196 left = DIRBLKSIZ - blksiz;
2197 dp->d_reclen += left;
2198 uiop->uio_iov->iov_base =
2199 (char *)uiop->uio_iov->iov_base + left;
2200 uiop->uio_iov->iov_len -= left;
2201 uiop->uio_offset += left;
2202 uiop->uio_resid -= left;
2203 }
2204
2205 /*
2206 * We are now either at the end of the directory or have filled the
2207 * block.
2208 */
2209 if (bigenough)
2210 dnp->n_direofoffset = uiop->uio_offset;
2211 else {
2212 if (uiop->uio_resid > 0)
2213 nfs_printf("EEK! readdirrpc resid > 0\n");
2214 nfs_dircookie_lock(dnp);
2215 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2216 *cookiep = cookie;
2217 nfs_dircookie_unlock(dnp);
2218 }
2219 nfsmout:
2220 return (error);
2221 }
2222
2223 /*
2224 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
2225 */
2226 int
2227 nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
2228 {
2229 int len, left;
2230 struct dirent *dp;
2231 u_int32_t *tl;
2232 caddr_t cp;
2233 struct vnode *newvp;
2234 nfsuint64 *cookiep;
2235 caddr_t bpos, dpos, dpossav1, dpossav2;
2236 struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2;
2237 struct nameidata nami, *ndp = &nami;
2238 struct componentname *cnp = &ndp->ni_cnd;
2239 nfsuint64 cookie;
2240 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2241 struct nfsnode *dnp = VTONFS(vp), *np;
2242 nfsfh_t *fhp;
2243 u_quad_t fileno;
2244 int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
2245 int attrflag, fhsize;
2246
2247 #ifndef nolint
2248 dp = NULL;
2249 #endif
2250 #ifndef DIAGNOSTIC
2251 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2252 (uiop->uio_resid & (DIRBLKSIZ - 1)))
2253 panic("nfs readdirplusrpc bad uio");
2254 #endif
2255 ndp->ni_dvp = vp;
2256 newvp = NULLVP;
2257
2258 /*
2259 * If there is no cookie, assume directory was stale.
2260 */
2261 nfs_dircookie_lock(dnp);
2262 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2263 if (cookiep) {
2264 cookie = *cookiep;
2265 nfs_dircookie_unlock(dnp);
2266 } else {
2267 nfs_dircookie_unlock(dnp);
2268 return (NFSERR_BAD_COOKIE);
2269 }
2270 /*
2271 * Loop around doing readdir rpc's of size nm_readdirsize
2272 * truncated to a multiple of DIRBLKSIZ.
2273 * The stopping criteria is EOF or buffer full.
2274 */
2275 while (more_dirs && bigenough) {
2276 nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
2277 mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
2278 NFSX_FH(1) + 6 * NFSX_UNSIGNED);
2279 mb = mreq;
2280 bpos = mtod(mb, caddr_t);
2281 nfsm_fhtom(vp, 1);
2282 tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED);
2283 *tl++ = cookie.nfsuquad[0];
2284 *tl++ = cookie.nfsuquad[1];
2285 mtx_lock(&dnp->n_mtx);
2286 *tl++ = dnp->n_cookieverf.nfsuquad[0];
2287 *tl++ = dnp->n_cookieverf.nfsuquad[1];
2288 mtx_unlock(&dnp->n_mtx);
2289 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
2290 *tl = txdr_unsigned(nmp->nm_rsize);
2291 nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred);
2292 nfsm_postop_attr(vp, attrflag);
2293 if (error) {
2294 m_freem(mrep);
2295 goto nfsmout;
2296 }
2297 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2298 mtx_lock(&dnp->n_mtx);
2299 dnp->n_cookieverf.nfsuquad[0] = *tl++;
2300 dnp->n_cookieverf.nfsuquad[1] = *tl++;
2301 mtx_unlock(&dnp->n_mtx);
2302 more_dirs = fxdr_unsigned(int, *tl);
2303
2304 /* loop thru the dir entries, doctoring them to 4bsd form */
2305 while (more_dirs && bigenough) {
2306 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2307 fileno = fxdr_hyper(tl);
2308 len = fxdr_unsigned(int, *(tl + 2));
2309 if (len <= 0 || len > NFS_MAXNAMLEN) {
2310 error = EBADRPC;
2311 m_freem(mrep);
2312 goto nfsmout;
2313 }
2314 tlen = nfsm_rndup(len);
2315 if (tlen == len)
2316 tlen += 4; /* To ensure null termination*/
2317 left = DIRBLKSIZ - blksiz;
2318 if ((tlen + DIRHDSIZ) > left) {
2319 dp->d_reclen += left;
2320 uiop->uio_iov->iov_base =
2321 (char *)uiop->uio_iov->iov_base + left;
2322 uiop->uio_iov->iov_len -= left;
2323 uiop->uio_offset += left;
2324 uiop->uio_resid -= left;
2325 blksiz = 0;
2326 }
2327 if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2328 bigenough = 0;
2329 if (bigenough) {
2330 dp = (struct dirent *)uiop->uio_iov->iov_base;
2331 dp->d_fileno = (int)fileno;
2332 dp->d_namlen = len;
2333 dp->d_reclen = tlen + DIRHDSIZ;
2334 dp->d_type = DT_UNKNOWN;
2335 blksiz += dp->d_reclen;
2336 if (blksiz == DIRBLKSIZ)
2337 blksiz = 0;
2338 uiop->uio_offset += DIRHDSIZ;
2339 uiop->uio_resid -= DIRHDSIZ;
2340 uiop->uio_iov->iov_base =
2341 (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
2342 uiop->uio_iov->iov_len -= DIRHDSIZ;
2343 cnp->cn_nameptr = uiop->uio_iov->iov_base;
2344 cnp->cn_namelen = len;
2345 nfsm_mtouio(uiop, len);
2346 cp = uiop->uio_iov->iov_base;
2347 tlen -= len;
2348 *cp = '\0';
2349 uiop->uio_iov->iov_base =
2350 (char *)uiop->uio_iov->iov_base + tlen;
2351 uiop->uio_iov->iov_len -= tlen;
2352 uiop->uio_offset += tlen;
2353 uiop->uio_resid -= tlen;
2354 } else
2355 nfsm_adv(nfsm_rndup(len));
2356 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2357 if (bigenough) {
2358 cookie.nfsuquad[0] = *tl++;
2359 cookie.nfsuquad[1] = *tl++;
2360 } else
2361 tl += 2;
2362
2363 /*
2364 * Since the attributes are before the file handle
2365 * (sigh), we must skip over the attributes and then
2366 * come back and get them.
2367 */
2368 attrflag = fxdr_unsigned(int, *tl);
2369 if (attrflag) {
2370 dpossav1 = dpos;
2371 mdsav1 = md;
2372 nfsm_adv(NFSX_V3FATTR);
2373 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2374 doit = fxdr_unsigned(int, *tl);
2375 /*
2376 * Skip loading the attrs for "..". There's a
2377 * race between loading the attrs here and
2378 * lookups that look for the directory currently
2379 * being read (in the parent). We try to acquire
2380 * the exclusive lock on ".." here, owning the
2381 * lock on the directory being read. Lookup will
2382 * hold the lock on ".." and try to acquire the
2383 * lock on the directory being read.
2384 *
2385 * There are other ways of fixing this, one would
2386 * be to do a trylock on the ".." vnode and skip
2387 * loading the attrs on ".." if it happens to be
2388 * locked by another process. But skipping the
2389 * attrload on ".." seems the easiest option.
2390 */
2391 if (strcmp(dp->d_name, "..") == 0) {
2392 doit = 0;
2393 /*
2394 * We've already skipped over the attrs,
2395 * skip over the filehandle. And store d_type
2396 * as VDIR.
2397 */
2398 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2399 i = fxdr_unsigned(int, *tl);
2400 nfsm_adv(nfsm_rndup(i));
2401 dp->d_type = IFTODT(VTTOIF(VDIR));
2402 }
2403 if (doit) {
2404 nfsm_getfh(fhp, fhsize, 1);
2405 if (NFS_CMPFH(dnp, fhp, fhsize)) {
2406 VREF(vp);
2407 newvp = vp;
2408 np = dnp;
2409 } else {
2410 error = nfs_nget(vp->v_mount, fhp,
2411 fhsize, &np, LK_EXCLUSIVE);
2412 if (error)
2413 doit = 0;
2414 else
2415 newvp = NFSTOV(np);
2416 }
2417 }
2418 if (doit && bigenough) {
2419 dpossav2 = dpos;
2420 dpos = dpossav1;
2421 mdsav2 = md;
2422 md = mdsav1;
2423 nfsm_loadattr(newvp, NULL);
2424 dpos = dpossav2;
2425 md = mdsav2;
2426 dp->d_type =
2427 IFTODT(VTTOIF(np->n_vattr.va_type));
2428 ndp->ni_vp = newvp;
2429 /* Update n_ctime, so subsequent lookup doesn't purge entry */
2430 np->n_ctime = np->n_vattr.va_ctime.tv_sec;
2431 cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
2432 }
2433 } else {
2434 /* Just skip over the file handle */
2435 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2436 i = fxdr_unsigned(int, *tl);
2437 if (i) {
2438 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2439 fhsize = fxdr_unsigned(int, *tl);
2440 nfsm_adv(nfsm_rndup(fhsize));
2441 }
2442 }
2443 if (newvp != NULLVP) {
2444 if (newvp == vp)
2445 vrele(newvp);
2446 else
2447 vput(newvp);
2448 newvp = NULLVP;
2449 }
2450 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2451 more_dirs = fxdr_unsigned(int, *tl);
2452 }
2453 /*
2454 * If at end of rpc data, get the eof boolean
2455 */
2456 if (!more_dirs) {
2457 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2458 more_dirs = (fxdr_unsigned(int, *tl) == 0);
2459 }
2460 m_freem(mrep);
2461 }
2462 /*
2463 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2464 * by increasing d_reclen for the last record.
2465 */
2466 if (blksiz > 0) {
2467 left = DIRBLKSIZ - blksiz;
2468 dp->d_reclen += left;
2469 uiop->uio_iov->iov_base =
2470 (char *)uiop->uio_iov->iov_base + left;
2471 uiop->uio_iov->iov_len -= left;
2472 uiop->uio_offset += left;
2473 uiop->uio_resid -= left;
2474 }
2475
2476 /*
2477 * We are now either at the end of the directory or have filled the
2478 * block.
2479 */
2480 if (bigenough)
2481 dnp->n_direofoffset = uiop->uio_offset;
2482 else {
2483 if (uiop->uio_resid > 0)
2484 nfs_printf("EEK! readdirplusrpc resid > 0\n");
2485 nfs_dircookie_lock(dnp);
2486 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2487 *cookiep = cookie;
2488 nfs_dircookie_unlock(dnp);
2489 }
2490 nfsmout:
2491 if (newvp != NULLVP) {
2492 if (newvp == vp)
2493 vrele(newvp);
2494 else
2495 vput(newvp);
2496 newvp = NULLVP;
2497 }
2498 return (error);
2499 }
2500
2501 /*
2502 * Silly rename. To make the NFS filesystem that is stateless look a little
2503 * more like the "ufs" a remove of an active vnode is translated to a rename
2504 * to a funny looking filename that is removed by nfs_inactive on the
2505 * nfsnode. There is the potential for another process on a different client
2506 * to create the same funny name between the nfs_lookitup() fails and the
2507 * nfs_rename() completes, but...
2508 */
2509 static int
2510 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
2511 {
2512 struct sillyrename *sp;
2513 struct nfsnode *np;
2514 int error;
2515 short pid;
2516 unsigned int lticks;
2517
2518 cache_purge(dvp);
2519 np = VTONFS(vp);
2520 #ifndef DIAGNOSTIC
2521 if (vp->v_type == VDIR)
2522 panic("nfs: sillyrename dir");
2523 #endif
2524 MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
2525 M_NFSREQ, M_WAITOK);
2526 sp->s_cred = crhold(cnp->cn_cred);
2527 sp->s_dvp = dvp;
2528 sp->s_removeit = nfs_removeit;
2529 VREF(dvp);
2530
2531 /*
2532 * Fudge together a funny name.
2533 * Changing the format of the funny name to accomodate more
2534 * sillynames per directory.
2535 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is
2536 * CPU ticks since boot.
2537 */
2538 pid = cnp->cn_thread->td_proc->p_pid;
2539 lticks = (unsigned int)ticks;
2540 for ( ; ; ) {
2541 sp->s_namlen = sprintf(sp->s_name,
2542 ".nfs.%08x.%04x4.4", lticks,
2543 pid);
2544 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2545 cnp->cn_thread, NULL))
2546 break;
2547 lticks++;
2548 }
2549 error = nfs_renameit(dvp, cnp, sp);
2550 if (error)
2551 goto bad;
2552 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2553 cnp->cn_thread, &np);
2554 np->n_sillyrename = sp;
2555 return (0);
2556 bad:
2557 vrele(sp->s_dvp);
2558 crfree(sp->s_cred);
2559 free((caddr_t)sp, M_NFSREQ);
2560 return (error);
2561 }
2562
2563 /*
2564 * Look up a file name and optionally either update the file handle or
2565 * allocate an nfsnode, depending on the value of npp.
2566 * npp == NULL --> just do the lookup
2567 * *npp == NULL --> allocate a new nfsnode and make sure attributes are
2568 * handled too
2569 * *npp != NULL --> update the file handle in the vnode
2570 */
2571 static int
2572 nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
2573 struct thread *td, struct nfsnode **npp)
2574 {
2575 struct vnode *newvp = NULL;
2576 struct nfsnode *np, *dnp = VTONFS(dvp);
2577 caddr_t bpos, dpos;
2578 int error = 0, fhlen, attrflag;
2579 struct mbuf *mreq, *mrep, *md, *mb;
2580 nfsfh_t *nfhp;
2581 int v3 = NFS_ISV3(dvp);
2582
2583 nfsstats.rpccnt[NFSPROC_LOOKUP]++;
2584 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
2585 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
2586 mb = mreq;
2587 bpos = mtod(mb, caddr_t);
2588 nfsm_fhtom(dvp, v3);
2589 nfsm_strtom(name, len, NFS_MAXNAMLEN);
2590 nfsm_request(dvp, NFSPROC_LOOKUP, td, cred);
2591 if (npp && !error) {
2592 nfsm_getfh(nfhp, fhlen, v3);
2593 if (*npp) {
2594 np = *npp;
2595 if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
2596 free((caddr_t)np->n_fhp, M_NFSBIGFH);
2597 np->n_fhp = &np->n_fh;
2598 } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
2599 np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK);
2600 bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
2601 np->n_fhsize = fhlen;
2602 newvp = NFSTOV(np);
2603 } else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
2604 VREF(dvp);
2605 newvp = dvp;
2606 } else {
2607 error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE);
2608 if (error) {
2609 m_freem(mrep);
2610 return (error);
2611 }
2612 newvp = NFSTOV(np);
2613 }
2614 if (v3) {
2615 nfsm_postop_attr(newvp, attrflag);
2616 if (!attrflag && *npp == NULL) {
2617 m_freem(mrep);
2618 if (newvp == dvp)
2619 vrele(newvp);
2620 else
2621 vput(newvp);
2622 return (ENOENT);
2623 }
2624 } else
2625 nfsm_loadattr(newvp, NULL);
2626 }
2627 m_freem(mrep);
2628 nfsmout:
2629 if (npp && *npp == NULL) {
2630 if (error) {
2631 if (newvp) {
2632 if (newvp == dvp)
2633 vrele(newvp);
2634 else
2635 vput(newvp);
2636 }
2637 } else
2638 *npp = np;
2639 }
2640 return (error);
2641 }
2642
2643 /*
2644 * Nfs Version 3 commit rpc
2645 */
2646 int
2647 nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
2648 struct thread *td)
2649 {
2650 u_int32_t *tl;
2651 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2652 caddr_t bpos, dpos;
2653 int error = 0, wccflag = NFSV3_WCCRATTR;
2654 struct mbuf *mreq, *mrep, *md, *mb;
2655
2656 mtx_lock(&nmp->nm_mtx);
2657 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
2658 mtx_unlock(&nmp->nm_mtx);
2659 return (0);
2660 }
2661 mtx_unlock(&nmp->nm_mtx);
2662 nfsstats.rpccnt[NFSPROC_COMMIT]++;
2663 mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
2664 mb = mreq;
2665 bpos = mtod(mb, caddr_t);
2666 nfsm_fhtom(vp, 1);
2667 tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED);
2668 txdr_hyper(offset, tl);
2669 tl += 2;
2670 *tl = txdr_unsigned(cnt);
2671 nfsm_request(vp, NFSPROC_COMMIT, td, cred);
2672 nfsm_wcc_data(vp, wccflag);
2673 if (!error) {
2674 tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF);
2675 if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
2676 NFSX_V3WRITEVERF)) {
2677 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
2678 NFSX_V3WRITEVERF);
2679 error = NFSERR_STALEWRITEVERF;
2680 }
2681 }
2682 m_freem(mrep);
2683 nfsmout:
2684 return (error);
2685 }
2686
2687 /*
2688 * Strategy routine.
2689 * For async requests when nfsiod(s) are running, queue the request by
2690 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
2691 * request.
2692 */
2693 static int
2694 nfs_strategy(struct vop_strategy_args *ap)
2695 {
2696 struct buf *bp = ap->a_bp;
2697 struct ucred *cr;
2698
2699 KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
2700 KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp));
2701
2702 if (bp->b_iocmd == BIO_READ)
2703 cr = bp->b_rcred;
2704 else
2705 cr = bp->b_wcred;
2706
2707 /*
2708 * If the op is asynchronous and an i/o daemon is waiting
2709 * queue the request, wake it up and wait for completion
2710 * otherwise just do it ourselves.
2711 */
2712 if ((bp->b_flags & B_ASYNC) == 0 ||
2713 nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread))
2714 (void)nfs_doio(ap->a_vp, bp, cr, curthread);
2715 return (0);
2716 }
2717
2718 /*
2719 * fsync vnode op. Just call nfs_flush() with commit == 1.
2720 */
2721 /* ARGSUSED */
2722 static int
2723 nfs_fsync(struct vop_fsync_args *ap)
2724 {
2725 return (nfs_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1));
2726 }
2727
2728 /*
2729 * Flush all the blocks associated with a vnode.
2730 * Walk through the buffer pool and push any dirty pages
2731 * associated with the vnode.
2732 */
2733 static int
2734 nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
2735 int commit)
2736 {
2737 struct nfsnode *np = VTONFS(vp);
2738 struct buf *bp;
2739 int i;
2740 struct buf *nbp;
2741 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2742 int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
2743 int passone = 1;
2744 u_quad_t off, endoff, toff;
2745 struct ucred* wcred = NULL;
2746 struct buf **bvec = NULL;
2747 #ifndef NFS_COMMITBVECSIZ
2748 #define NFS_COMMITBVECSIZ 20
2749 #endif
2750 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
2751 int bvecsize = 0, bveccount;
2752
2753 if (nmp->nm_flag & NFSMNT_INT)
2754 slpflag = PCATCH;
2755 if (!commit)
2756 passone = 0;
2757 /*
2758 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
2759 * server, but has not been committed to stable storage on the server
2760 * yet. On the first pass, the byte range is worked out and the commit
2761 * rpc is done. On the second pass, nfs_writebp() is called to do the
2762 * job.
2763 */
2764 again:
2765 off = (u_quad_t)-1;
2766 endoff = 0;
2767 bvecpos = 0;
2768 if (NFS_ISV3(vp) && commit) {
2769 s = splbio();
2770 if (bvec != NULL && bvec != bvec_on_stack)
2771 free(bvec, M_TEMP);
2772 /*
2773 * Count up how many buffers waiting for a commit.
2774 */
2775 bveccount = 0;
2776 VI_LOCK(vp);
2777 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
2778 if (BUF_REFCNT(bp) == 0 &&
2779 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
2780 == (B_DELWRI | B_NEEDCOMMIT))
2781 bveccount++;
2782 }
2783 /*
2784 * Allocate space to remember the list of bufs to commit. It is
2785 * important to use M_NOWAIT here to avoid a race with nfs_write.
2786 * If we can't get memory (for whatever reason), we will end up
2787 * committing the buffers one-by-one in the loop below.
2788 */
2789 if (bveccount > NFS_COMMITBVECSIZ) {
2790 /*
2791 * Release the vnode interlock to avoid a lock
2792 * order reversal.
2793 */
2794 VI_UNLOCK(vp);
2795 bvec = (struct buf **)
2796 malloc(bveccount * sizeof(struct buf *),
2797 M_TEMP, M_NOWAIT);
2798 VI_LOCK(vp);
2799 if (bvec == NULL) {
2800 bvec = bvec_on_stack;
2801 bvecsize = NFS_COMMITBVECSIZ;
2802 } else
2803 bvecsize = bveccount;
2804 } else {
2805 bvec = bvec_on_stack;
2806 bvecsize = NFS_COMMITBVECSIZ;
2807 }
2808 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
2809 if (bvecpos >= bvecsize)
2810 break;
2811 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
2812 nbp = TAILQ_NEXT(bp, b_bobufs);
2813 continue;
2814 }
2815 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
2816 (B_DELWRI | B_NEEDCOMMIT)) {
2817 BUF_UNLOCK(bp);
2818 nbp = TAILQ_NEXT(bp, b_bobufs);
2819 continue;
2820 }
2821 VI_UNLOCK(vp);
2822 bremfree(bp);
2823 /*
2824 * Work out if all buffers are using the same cred
2825 * so we can deal with them all with one commit.
2826 *
2827 * NOTE: we are not clearing B_DONE here, so we have
2828 * to do it later on in this routine if we intend to
2829 * initiate I/O on the bp.
2830 *
2831 * Note: to avoid loopback deadlocks, we do not
2832 * assign b_runningbufspace.
2833 */
2834 if (wcred == NULL)
2835 wcred = bp->b_wcred;
2836 else if (wcred != bp->b_wcred)
2837 wcred = NOCRED;
2838 vfs_busy_pages(bp, 1);
2839
2840 VI_LOCK(vp);
2841 /*
2842 * bp is protected by being locked, but nbp is not
2843 * and vfs_busy_pages() may sleep. We have to
2844 * recalculate nbp.
2845 */
2846 nbp = TAILQ_NEXT(bp, b_bobufs);
2847
2848 /*
2849 * A list of these buffers is kept so that the
2850 * second loop knows which buffers have actually
2851 * been committed. This is necessary, since there
2852 * may be a race between the commit rpc and new
2853 * uncommitted writes on the file.
2854 */
2855 bvec[bvecpos++] = bp;
2856 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
2857 bp->b_dirtyoff;
2858 if (toff < off)
2859 off = toff;
2860 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
2861 if (toff > endoff)
2862 endoff = toff;
2863 }
2864 splx(s);
2865 VI_UNLOCK(vp);
2866 }
2867 if (bvecpos > 0) {
2868 /*
2869 * Commit data on the server, as required.
2870 * If all bufs are using the same wcred, then use that with
2871 * one call for all of them, otherwise commit each one
2872 * separately.
2873 */
2874 if (wcred != NOCRED)
2875 retv = nfs_commit(vp, off, (int)(endoff - off),
2876 wcred, td);
2877 else {
2878 retv = 0;
2879 for (i = 0; i < bvecpos; i++) {
2880 off_t off, size;
2881 bp = bvec[i];
2882 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
2883 bp->b_dirtyoff;
2884 size = (u_quad_t)(bp->b_dirtyend
2885 - bp->b_dirtyoff);
2886 retv = nfs_commit(vp, off, (int)size,
2887 bp->b_wcred, td);
2888 if (retv) break;
2889 }
2890 }
2891
2892 if (retv == NFSERR_STALEWRITEVERF)
2893 nfs_clearcommit(vp->v_mount);
2894
2895 /*
2896 * Now, either mark the blocks I/O done or mark the
2897 * blocks dirty, depending on whether the commit
2898 * succeeded.
2899 */
2900 for (i = 0; i < bvecpos; i++) {
2901 bp = bvec[i];
2902 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
2903 if (retv) {
2904 /*
2905 * Error, leave B_DELWRI intact
2906 */
2907 vfs_unbusy_pages(bp);
2908 brelse(bp);
2909 } else {
2910 /*
2911 * Success, remove B_DELWRI ( bundirty() ).
2912 *
2913 * b_dirtyoff/b_dirtyend seem to be NFS
2914 * specific. We should probably move that
2915 * into bundirty(). XXX
2916 */
2917 s = splbio();
2918 bufobj_wref(&vp->v_bufobj);
2919 bp->b_flags |= B_ASYNC;
2920 bundirty(bp);
2921 bp->b_flags &= ~B_DONE;
2922 bp->b_ioflags &= ~BIO_ERROR;
2923 bp->b_dirtyoff = bp->b_dirtyend = 0;
2924 splx(s);
2925 bufdone(bp);
2926 }
2927 }
2928 }
2929
2930 /*
2931 * Start/do any write(s) that are required.
2932 */
2933 loop:
2934 s = splbio();
2935 VI_LOCK(vp);
2936 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
2937 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
2938 if (waitfor != MNT_WAIT || passone)
2939 continue;
2940
2941 error = BUF_TIMELOCK(bp,
2942 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
2943 VI_MTX(vp), "nfsfsync", slpflag, slptimeo);
2944 splx(s);
2945 if (error == 0) {
2946 BUF_UNLOCK(bp);
2947 goto loop;
2948 }
2949 if (error == ENOLCK)
2950 goto loop;
2951 if (nfs_sigintr(nmp, NULL, td)) {
2952 error = EINTR;
2953 goto done;
2954 }
2955 if (slpflag == PCATCH) {
2956 slpflag = 0;
2957 slptimeo = 2 * hz;
2958 }
2959 goto loop;
2960 }
2961 if ((bp->b_flags & B_DELWRI) == 0)
2962 panic("nfs_fsync: not dirty");
2963 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
2964 BUF_UNLOCK(bp);
2965 continue;
2966 }
2967 VI_UNLOCK(vp);
2968 bremfree(bp);
2969 if (passone || !commit)
2970 bp->b_flags |= B_ASYNC;
2971 else
2972 bp->b_flags |= B_ASYNC;
2973 splx(s);
2974 bwrite(bp);
2975 if (nfs_sigintr(nmp, NULL, td)) {
2976 error = EINTR;
2977 goto done;
2978 }
2979 goto loop;
2980 }
2981 splx(s);
2982 if (passone) {
2983 passone = 0;
2984 VI_UNLOCK(vp);
2985 goto again;
2986 }
2987 if (waitfor == MNT_WAIT) {
2988 while (vp->v_bufobj.bo_numoutput) {
2989 error = bufobj_wwait(&vp->v_bufobj, slpflag, slptimeo);
2990 if (error) {
2991 VI_UNLOCK(vp);
2992 error = nfs_sigintr(nmp, NULL, td);
2993 if (error)
2994 goto done;
2995 if (slpflag == PCATCH) {
2996 slpflag = 0;
2997 slptimeo = 2 * hz;
2998 }
2999 VI_LOCK(vp);
3000 }
3001 }
3002 if (vp->v_bufobj.bo_dirty.bv_cnt != 0 && commit) {
3003 VI_UNLOCK(vp);
3004 goto loop;
3005 }
3006 /*
3007 * Wait for all the async IO requests to drain
3008 */
3009 VI_UNLOCK(vp);
3010 mtx_lock(&np->n_mtx);
3011 while (np->n_directio_asyncwr > 0) {
3012 np->n_flag |= NFSYNCWAIT;
3013 error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr,
3014 &np->n_mtx, slpflag | (PRIBIO + 1),
3015 "nfsfsync", 0);
3016 if (error) {
3017 if (nfs_sigintr(nmp, (struct nfsreq *)0, td)) {
3018 mtx_unlock(&np->n_mtx);
3019 error = EINTR;
3020 goto done;
3021 }
3022 }
3023 }
3024 mtx_unlock(&np->n_mtx);
3025 } else
3026 VI_UNLOCK(vp);
3027 mtx_lock(&np->n_mtx);
3028 if (np->n_flag & NWRITEERR) {
3029 error = np->n_error;
3030 np->n_flag &= ~NWRITEERR;
3031 }
3032 if (commit && vp->v_bufobj.bo_dirty.bv_cnt == 0 &&
3033 vp->v_bufobj.bo_numoutput == 0 && np->n_directio_asyncwr == 0)
3034 np->n_flag &= ~NMODIFIED;
3035 mtx_unlock(&np->n_mtx);
3036 done:
3037 if (bvec != NULL && bvec != bvec_on_stack)
3038 free(bvec, M_TEMP);
3039 return (error);
3040 }
3041
3042 /*
3043 * NFS advisory byte-level locks.
3044 */
3045 static int
3046 nfs_advlock(struct vop_advlock_args *ap)
3047 {
3048 struct vnode *vp = ap->a_vp;
3049 u_quad_t size;
3050 int error;
3051
3052 error = vn_lock(vp, LK_SHARED, curthread);
3053 if (error)
3054 return (error);
3055 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
3056 size = VTONFS(vp)->n_size;
3057 VOP_UNLOCK(vp, 0, curthread);
3058 error = lf_advlock(ap, &(vp->v_lockf), size);
3059 } else {
3060 if (nfs_advlock_p)
3061 error = nfs_advlock_p(ap);
3062 else
3063 error = ENOLCK;
3064 }
3065
3066 return (error);
3067 }
3068
3069 /*
3070 * NFS advisory byte-level locks.
3071 */
3072 static int
3073 nfs_advlockasync(struct vop_advlockasync_args *ap)
3074 {
3075 struct vnode *vp = ap->a_vp;
3076 u_quad_t size;
3077 int error;
3078
3079 error = vn_lock(vp, LK_SHARED, curthread);
3080 if (error)
3081 return (error);
3082 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
3083 size = VTONFS(vp)->n_size;
3084 VOP_UNLOCK(vp, 0, curthread);
3085 error = lf_advlockasync(ap, &(vp->v_lockf), size);
3086 } else {
3087 VOP_UNLOCK(vp, 0, curthread);
3088 error = EOPNOTSUPP;
3089 }
3090 return (error);
3091 }
3092
3093 /*
3094 * Print out the contents of an nfsnode.
3095 */
3096 static int
3097 nfs_print(struct vop_print_args *ap)
3098 {
3099 struct vnode *vp = ap->a_vp;
3100 struct nfsnode *np = VTONFS(vp);
3101
3102 nfs_printf("\tfileid %ld fsid 0x%x",
3103 np->n_vattr.va_fileid, np->n_vattr.va_fsid);
3104 if (vp->v_type == VFIFO)
3105 fifo_printinfo(vp);
3106 printf("\n");
3107 return (0);
3108 }
3109
3110 /*
3111 * This is the "real" nfs::bwrite(struct buf*).
3112 * We set B_CACHE if this is a VMIO buffer.
3113 */
3114 int
3115 nfs_writebp(struct buf *bp, int force __unused, struct thread *td)
3116 {
3117 int s;
3118 int oldflags = bp->b_flags;
3119 #if 0
3120 int retv = 1;
3121 off_t off;
3122 #endif
3123
3124 if (BUF_REFCNT(bp) == 0)
3125 panic("bwrite: buffer is not locked???");
3126
3127 if (bp->b_flags & B_INVAL) {
3128 brelse(bp);
3129 return(0);
3130 }
3131
3132 bp->b_flags |= B_CACHE;
3133
3134 /*
3135 * Undirty the bp. We will redirty it later if the I/O fails.
3136 */
3137
3138 s = splbio();
3139 bundirty(bp);
3140 bp->b_flags &= ~B_DONE;
3141 bp->b_ioflags &= ~BIO_ERROR;
3142 bp->b_iocmd = BIO_WRITE;
3143
3144 bufobj_wref(bp->b_bufobj);
3145 curthread->td_ru.ru_oublock++;
3146 splx(s);
3147
3148 /*
3149 * Note: to avoid loopback deadlocks, we do not
3150 * assign b_runningbufspace.
3151 */
3152 vfs_busy_pages(bp, 1);
3153
3154 BUF_KERNPROC(bp);
3155 bp->b_iooffset = dbtob(bp->b_blkno);
3156 bstrategy(bp);
3157
3158 if( (oldflags & B_ASYNC) == 0) {
3159 int rtval = bufwait(bp);
3160
3161 if (oldflags & B_DELWRI) {
3162 s = splbio();
3163 reassignbuf(bp);
3164 splx(s);
3165 }
3166 brelse(bp);
3167 return (rtval);
3168 }
3169
3170 return (0);
3171 }
3172
3173 /*
3174 * nfs special file access vnode op.
3175 * Essentially just get vattr and then imitate iaccess() since the device is
3176 * local to the client.
3177 */
3178 static int
3179 nfsspec_access(struct vop_access_args *ap)
3180 {
3181 struct vattr *vap;
3182 struct ucred *cred = ap->a_cred;
3183 struct vnode *vp = ap->a_vp;
3184 mode_t mode = ap->a_mode;
3185 struct vattr vattr;
3186 int error;
3187
3188 /*
3189 * Disallow write attempts on filesystems mounted read-only;
3190 * unless the file is a socket, fifo, or a block or character
3191 * device resident on the filesystem.
3192 */
3193 if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3194 switch (vp->v_type) {
3195 case VREG:
3196 case VDIR:
3197 case VLNK:
3198 return (EROFS);
3199 default:
3200 break;
3201 }
3202 }
3203 vap = &vattr;
3204 error = VOP_GETATTR(vp, vap, cred, ap->a_td);
3205 if (error)
3206 goto out;
3207 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
3208 mode, cred, NULL);
3209 out:
3210 return error;
3211 }
3212
3213 /*
3214 * Read wrapper for fifos.
3215 */
3216 static int
3217 nfsfifo_read(struct vop_read_args *ap)
3218 {
3219 struct nfsnode *np = VTONFS(ap->a_vp);
3220 int error;
3221
3222 /*
3223 * Set access flag.
3224 */
3225 mtx_lock(&np->n_mtx);
3226 np->n_flag |= NACC;
3227 getnanotime(&np->n_atim);
3228 mtx_unlock(&np->n_mtx);
3229 error = fifo_specops.vop_read(ap);
3230 return error;
3231 }
3232
3233 /*
3234 * Write wrapper for fifos.
3235 */
3236 static int
3237 nfsfifo_write(struct vop_write_args *ap)
3238 {
3239 struct nfsnode *np = VTONFS(ap->a_vp);
3240
3241 /*
3242 * Set update flag.
3243 */
3244 mtx_lock(&np->n_mtx);
3245 np->n_flag |= NUPD;
3246 getnanotime(&np->n_mtim);
3247 mtx_unlock(&np->n_mtx);
3248 return(fifo_specops.vop_write(ap));
3249 }
3250
3251 /*
3252 * Close wrapper for fifos.
3253 *
3254 * Update the times on the nfsnode then do fifo close.
3255 */
3256 static int
3257 nfsfifo_close(struct vop_close_args *ap)
3258 {
3259 struct vnode *vp = ap->a_vp;
3260 struct nfsnode *np = VTONFS(vp);
3261 struct vattr vattr;
3262 struct timespec ts;
3263
3264 mtx_lock(&np->n_mtx);
3265 if (np->n_flag & (NACC | NUPD)) {
3266 getnanotime(&ts);
3267 if (np->n_flag & NACC)
3268 np->n_atim = ts;
3269 if (np->n_flag & NUPD)
3270 np->n_mtim = ts;
3271 np->n_flag |= NCHG;
3272 if (vrefcnt(vp) == 1 &&
3273 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3274 VATTR_NULL(&vattr);
3275 if (np->n_flag & NACC)
3276 vattr.va_atime = np->n_atim;
3277 if (np->n_flag & NUPD)
3278 vattr.va_mtime = np->n_mtim;
3279 mtx_unlock(&np->n_mtx);
3280 (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_td);
3281 goto out;
3282 }
3283 }
3284 mtx_unlock(&np->n_mtx);
3285 out:
3286 return (fifo_specops.vop_close(ap));
3287 }
3288
3289 /*
3290 * Just call nfs_writebp() with the force argument set to 1.
3291 *
3292 * NOTE: B_DONE may or may not be set in a_bp on call.
3293 */
3294 static int
3295 nfs_bwrite(struct buf *bp)
3296 {
3297
3298 return (nfs_writebp(bp, 1, curthread));
3299 }
3300
3301 struct buf_ops buf_ops_nfs = {
3302 .bop_name = "buf_ops_nfs",
3303 .bop_write = nfs_bwrite,
3304 .bop_strategy = bufstrategy,
3305 .bop_sync = bufsync,
3306 .bop_bdflush = bufbdflush,
3307 };
Cache object: ca82be89295d9c0043d8fa87ed1b8389
|