1 /*-
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_serv.c 8.8 (Berkeley) 7/31/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 /*
39 * nfs version 2 and 3 server calls to vnode ops
40 * - these routines generally have 3 phases
41 * 1 - break down and validate rpc request in mbuf list
42 * 2 - do the vnode ops for the request
43 * (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
44 * 3 - build the rpc reply in an mbuf list
45 * nb:
46 * - do not mix the phases, since the nfsm_?? macros can return failures
47 * on a bad rpc or similar and do not do any vrele() or vput()'s
48 *
49 * - the nfsm_reply() macro generates an nfs rpc reply with the nfs
50 * error number iff error != 0 whereas
51 * returning an error from the server function implies a fatal error
52 * such as a badly constructed rpc request that should be dropped without
53 * a reply.
54 * For nfsm_reply(), the case where error == EBADRPC is treated
55 * specially; after constructing a reply, it does an immediate
56 * `goto nfsmout' to avoid getting any V3 post-op status appended.
57 *
58 * Other notes:
59 * Warning: always pay careful attention to resource cleanup on return
60 * and note that nfsm_*() macros can terminate a procedure on certain
61 * errors.
62 *
63 * lookup() and namei()
64 * may return garbage in various structural fields/return elements
65 * if an error is returned, and may garbage up nd.ni_dvp even if no
66 * error is returned and you did not request LOCKPARENT or WANTPARENT.
67 *
68 * We use the ni_cnd.cn_flags 'HASBUF' flag to track whether the name
69 * buffer has been freed or not.
70 */
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/proc.h>
75 #include <sys/namei.h>
76 #include <sys/unistd.h>
77 #include <sys/vnode.h>
78 #include <sys/mount.h>
79 #include <sys/socket.h>
80 #include <sys/socketvar.h>
81 #include <sys/malloc.h>
82 #include <sys/mbuf.h>
83 #include <sys/priv.h>
84 #include <sys/dirent.h>
85 #include <sys/stat.h>
86 #include <sys/kernel.h>
87 #include <sys/sysctl.h>
88 #include <sys/bio.h>
89 #include <sys/buf.h>
90
91 #include <vm/vm.h>
92 #include <vm/vm_extern.h>
93 #include <vm/vm_object.h>
94
95 #include <nfs/nfsproto.h>
96 #include <nfs/rpcv2.h>
97 #include <nfsserver/nfs.h>
98 #include <nfs/xdr_subs.h>
99 #include <nfsserver/nfsm_subs.h>
100
101 #ifdef NFSRV_DEBUG
102 #define nfsdbprintf(info) printf info
103 #else
104 #define nfsdbprintf(info)
105 #endif
106
107 #define MAX_COMMIT_COUNT (1024 * 1024)
108
109 #define NUM_HEURISTIC 1017
110 #define NHUSE_INIT 64
111 #define NHUSE_INC 16
112 #define NHUSE_MAX 2048
113
114 static struct nfsheur {
115 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */
116 off_t nh_nextr; /* next offset for sequential detection */
117 int nh_use; /* use count for selection */
118 int nh_seqcount; /* heuristic */
119 } nfsheur[NUM_HEURISTIC];
120
121 /* Global vars */
122
123 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
124 int nfsrvw_procrastinate_v3 = 0;
125
126 static struct timeval nfsver = { 0 };
127
128 SYSCTL_NODE(_vfs, OID_AUTO, nfsrv, CTLFLAG_RW, 0, "NFS server");
129
130 static int nfs_async;
131 static int nfs_commit_blks;
132 static int nfs_commit_miss;
133 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
134 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
135 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
136
137 struct nfsrvstats nfsrvstats;
138 SYSCTL_STRUCT(_vfs_nfsrv, NFS_NFSRVSTATS, nfsrvstats, CTLFLAG_RW,
139 &nfsrvstats, nfsrvstats, "S,nfsrvstats");
140
141 static int nfsrv_access(struct vnode *, int, struct ucred *, int,
142 struct thread *, int);
143 static void nfsrvw_coalesce(struct nfsrv_descript *,
144 struct nfsrv_descript *);
145
146 /*
147 * Clear nameidata fields that are tested in nsfmout cleanup code prior
148 * to using first nfsm macro (that might jump to the cleanup code).
149 */
150
151 static __inline void
152 ndclear(struct nameidata *nd)
153 {
154
155 nd->ni_cnd.cn_flags = 0;
156 nd->ni_vp = NULL;
157 nd->ni_dvp = NULL;
158 nd->ni_startdir = NULL;
159 }
160
161 /*
162 * Takes two vfslocked integers and returns with at most one
163 * reference to giant. The return value indicates whether giant
164 * is held by either lock. This simplifies nfsrv ops by allowing
165 * them to track only one vfslocked var.
166 */
167 static __inline int
168 nfsrv_lockedpair(int vfs1, int vfs2)
169 {
170
171 if (vfs1 && vfs2)
172 VFS_UNLOCK_GIANT(vfs2);
173
174 return (vfs1 | vfs2);
175 }
176
177 static __inline int
178 nfsrv_lockedpair_nd(int vfs1, struct nameidata *nd)
179 {
180 int vfs2;
181
182 vfs2 = NDHASGIANT(nd);
183
184 return nfsrv_lockedpair(vfs1, vfs2);
185 }
186
187 /*
188 * nfs v3 access service
189 */
190 int
191 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
192 struct thread *td, struct mbuf **mrq)
193 {
194 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
195 struct sockaddr *nam = nfsd->nd_nam;
196 caddr_t dpos = nfsd->nd_dpos;
197 struct ucred *cred = nfsd->nd_cr;
198 struct vnode *vp = NULL;
199 nfsfh_t nfh;
200 fhandle_t *fhp;
201 u_int32_t *tl;
202 caddr_t bpos;
203 int error = 0, rdonly, getret;
204 struct mbuf *mb, *mreq;
205 struct vattr vattr, *vap = &vattr;
206 u_long testmode, nfsmode;
207 int v3 = (nfsd->nd_flag & ND_NFSV3);
208 int vfslocked;
209
210 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
211 if (!v3)
212 panic("nfsrv3_access: v3 proc called on a v2 connection");
213 vfslocked = 0;
214 fhp = &nfh.fh_generic;
215 nfsm_srvmtofh(fhp);
216 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
217 error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
218 nam, &rdonly, TRUE);
219 if (error) {
220 nfsm_reply(NFSX_UNSIGNED);
221 nfsm_srvpostop_attr(1, NULL);
222 error = 0;
223 goto nfsmout;
224 }
225 nfsmode = fxdr_unsigned(u_int32_t, *tl);
226 if ((nfsmode & NFSV3ACCESS_READ) &&
227 nfsrv_access(vp, VREAD, cred, rdonly, td, 0))
228 nfsmode &= ~NFSV3ACCESS_READ;
229 if (vp->v_type == VDIR)
230 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
231 NFSV3ACCESS_DELETE);
232 else
233 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
234 if ((nfsmode & testmode) &&
235 nfsrv_access(vp, VWRITE, cred, rdonly, td, 0))
236 nfsmode &= ~testmode;
237 if (vp->v_type == VDIR)
238 testmode = NFSV3ACCESS_LOOKUP;
239 else
240 testmode = NFSV3ACCESS_EXECUTE;
241 if ((nfsmode & testmode) &&
242 nfsrv_access(vp, VEXEC, cred, rdonly, td, 0))
243 nfsmode &= ~testmode;
244 getret = VOP_GETATTR(vp, vap, cred, td);
245 vput(vp);
246 vp = NULL;
247 nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED);
248 nfsm_srvpostop_attr(getret, vap);
249 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
250 *tl = txdr_unsigned(nfsmode);
251 nfsmout:
252 if (vp)
253 vput(vp);
254 VFS_UNLOCK_GIANT(vfslocked);
255 return(error);
256 }
257
258 /*
259 * nfs getattr service
260 */
261 int
262 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
263 struct thread *td, struct mbuf **mrq)
264 {
265 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
266 struct sockaddr *nam = nfsd->nd_nam;
267 caddr_t dpos = nfsd->nd_dpos;
268 struct ucred *cred = nfsd->nd_cr;
269 struct nfs_fattr *fp;
270 struct vattr va;
271 struct vattr *vap = &va;
272 struct vnode *vp = NULL;
273 nfsfh_t nfh;
274 fhandle_t *fhp;
275 caddr_t bpos;
276 int error = 0, rdonly;
277 struct mbuf *mb, *mreq;
278 int vfslocked;
279
280 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
281 vfslocked = 0;
282 fhp = &nfh.fh_generic;
283 nfsm_srvmtofh(fhp);
284 error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp, nam,
285 &rdonly, TRUE);
286 if (error) {
287 nfsm_reply(0);
288 error = 0;
289 goto nfsmout;
290 }
291 error = VOP_GETATTR(vp, vap, cred, td);
292 vput(vp);
293 vp = NULL;
294 nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
295 if (error) {
296 error = 0;
297 goto nfsmout;
298 }
299 fp = nfsm_build(struct nfs_fattr *,
300 NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
301 nfsm_srvfillattr(vap, fp);
302 /* fall through */
303
304 nfsmout:
305 if (vp)
306 vput(vp);
307 VFS_UNLOCK_GIANT(vfslocked);
308 return(error);
309 }
310
311 /*
312 * nfs setattr service
313 */
314 int
315 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
316 struct thread *td, struct mbuf **mrq)
317 {
318 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
319 struct sockaddr *nam = nfsd->nd_nam;
320 caddr_t dpos = nfsd->nd_dpos;
321 struct ucred *cred = nfsd->nd_cr;
322 struct vattr va, preat;
323 struct vattr *vap = &va;
324 struct nfsv2_sattr *sp;
325 struct nfs_fattr *fp;
326 struct vnode *vp = NULL;
327 nfsfh_t nfh;
328 fhandle_t *fhp;
329 u_int32_t *tl;
330 caddr_t bpos;
331 int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
332 int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0;
333 struct mbuf *mb, *mreq;
334 struct timespec guard = { 0, 0 };
335 struct mount *mp = NULL;
336 int tvfslocked;
337 int vfslocked;
338
339 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
340 vfslocked = 0;
341 fhp = &nfh.fh_generic;
342 nfsm_srvmtofh(fhp);
343 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
344 error = ESTALE;
345 goto out;
346 }
347 vfslocked = VFS_LOCK_GIANT(mp);
348 (void) vn_start_write(NULL, &mp, V_WAIT);
349 vfs_rel(mp); /* The write holds a ref. */
350 VATTR_NULL(vap);
351 if (v3) {
352 nfsm_srvsattr(vap);
353 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
354 gcheck = fxdr_unsigned(int, *tl);
355 if (gcheck) {
356 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
357 fxdr_nfsv3time(tl, &guard);
358 }
359 } else {
360 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
361 /*
362 * Nah nah nah nah na nah
363 * There is a bug in the Sun client that puts 0xffff in the mode
364 * field of sattr when it should put in 0xffffffff. The u_short
365 * doesn't sign extend.
366 * --> check the low order 2 bytes for 0xffff
367 */
368 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
369 vap->va_mode = nfstov_mode(sp->sa_mode);
370 if (sp->sa_uid != nfsrv_nfs_xdrneg1)
371 vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
372 if (sp->sa_gid != nfsrv_nfs_xdrneg1)
373 vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
374 if (sp->sa_size != nfsrv_nfs_xdrneg1)
375 vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
376 if (sp->sa_atime.nfsv2_sec != nfsrv_nfs_xdrneg1) {
377 #ifdef notyet
378 fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
379 #else
380 vap->va_atime.tv_sec =
381 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
382 vap->va_atime.tv_nsec = 0;
383 #endif
384 }
385 if (sp->sa_mtime.nfsv2_sec != nfsrv_nfs_xdrneg1)
386 fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
387
388 }
389
390 /*
391 * Now that we have all the fields, lets do it.
392 */
393 error = nfsrv_fhtovp(fhp, 1, &vp, &tvfslocked, cred, slp,
394 nam, &rdonly, TRUE);
395 vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
396 if (error) {
397 nfsm_reply(2 * NFSX_UNSIGNED);
398 if (v3)
399 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
400 error = 0;
401 goto nfsmout;
402 }
403
404 /*
405 * vp now an active resource, pay careful attention to cleanup
406 */
407 if (v3) {
408 error = preat_ret = VOP_GETATTR(vp, &preat, cred, td);
409 if (!error && gcheck &&
410 (preat.va_ctime.tv_sec != guard.tv_sec ||
411 preat.va_ctime.tv_nsec != guard.tv_nsec))
412 error = NFSERR_NOT_SYNC;
413 if (error) {
414 vput(vp);
415 vp = NULL;
416 nfsm_reply(NFSX_WCCDATA(v3));
417 if (v3)
418 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
419 error = 0;
420 goto nfsmout;
421 }
422 }
423
424 /*
425 * If the size is being changed write acces is required, otherwise
426 * just check for a read only filesystem.
427 */
428 if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
429 if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
430 error = EROFS;
431 goto out;
432 }
433 } else {
434 if (vp->v_type == VDIR) {
435 error = EISDIR;
436 goto out;
437 } else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
438 td, 0)) != 0)
439 goto out;
440 }
441 error = VOP_SETATTR(vp, vap, cred, td);
442 postat_ret = VOP_GETATTR(vp, vap, cred, td);
443 if (!error)
444 error = postat_ret;
445 out:
446 if (vp != NULL)
447 vput(vp);
448
449 vp = NULL;
450 nfsm_reply(NFSX_WCCORFATTR(v3));
451 if (v3) {
452 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
453 } else if (!error) {
454 /* v2 non-error case. */
455 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
456 nfsm_srvfillattr(vap, fp);
457 }
458 error = 0;
459 /* fall through */
460
461 nfsmout:
462 if (vp)
463 vput(vp);
464 vn_finished_write(mp);
465 VFS_UNLOCK_GIANT(vfslocked);
466 return(error);
467 }
468
469 /*
470 * nfs lookup rpc
471 */
472 int
473 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
474 struct thread *td, struct mbuf **mrq)
475 {
476 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
477 struct sockaddr *nam = nfsd->nd_nam;
478 caddr_t dpos = nfsd->nd_dpos;
479 struct ucred *cred = nfsd->nd_cr;
480 struct nfs_fattr *fp;
481 struct nameidata nd, ind, *ndp = &nd;
482 struct vnode *vp, *dirp = NULL;
483 nfsfh_t nfh;
484 fhandle_t *fhp;
485 caddr_t bpos;
486 int error = 0, len, dirattr_ret = 1;
487 int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag;
488 struct mbuf *mb, *mreq;
489 struct vattr va, dirattr, *vap = &va;
490 int tvfslocked;
491 int vfslocked;
492
493 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
494 ndclear(&nd);
495 vfslocked = 0;
496
497 fhp = &nfh.fh_generic;
498 nfsm_srvmtofh(fhp);
499 nfsm_srvnamesiz(len);
500
501 pubflag = nfs_ispublicfh(fhp);
502
503 nd.ni_cnd.cn_cred = cred;
504 nd.ni_cnd.cn_nameiop = LOOKUP;
505 nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART | MPSAFE;
506 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
507 &dirp, v3, &dirattr, &dirattr_ret, td, pubflag);
508 vfslocked = NDHASGIANT(&nd);
509
510 /*
511 * namei failure, only dirp to cleanup. Clear out garbarge from
512 * structure in case macros jump to nfsmout.
513 */
514
515 if (error) {
516 if (dirp) {
517 vrele(dirp);
518 dirp = NULL;
519 }
520 nfsm_reply(NFSX_POSTOPATTR(v3));
521 if (v3)
522 nfsm_srvpostop_attr(dirattr_ret, &dirattr);
523 error = 0;
524 goto nfsmout;
525 }
526
527 /*
528 * Locate index file for public filehandle
529 *
530 * error is 0 on entry and 0 on exit from this block.
531 */
532
533 if (pubflag) {
534 if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) {
535 /*
536 * Setup call to lookup() to see if we can find
537 * the index file. Arguably, this doesn't belong
538 * in a kernel.. Ugh. If an error occurs, do not
539 * try to install an index file and then clear the
540 * error.
541 *
542 * When we replace nd with ind and redirect ndp,
543 * maintenance of ni_startdir and ni_vp shift to
544 * ind and we have to clean them up in the old nd.
545 * However, the cnd resource continues to be maintained
546 * via the original nd. Confused? You aren't alone!
547 */
548 ind = nd;
549 VOP_UNLOCK(nd.ni_vp, 0, td);
550 ind.ni_pathlen = strlen(nfs_pub.np_index);
551 ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf =
552 nfs_pub.np_index;
553 ind.ni_startdir = nd.ni_vp;
554 VREF(ind.ni_startdir);
555 ind.ni_cnd.cn_flags &= ~GIANTHELD;
556 tvfslocked = VFS_LOCK_GIANT(ind.ni_startdir->v_mount);
557 if (tvfslocked)
558 nd.ni_cnd.cn_flags |= GIANTHELD;
559 error = lookup(&ind);
560 ind.ni_dvp = NULL;
561 vfslocked = nfsrv_lockedpair_nd(vfslocked, &ind);
562 ind.ni_cnd.cn_flags &= ~GIANTHELD;
563
564 if (error == 0) {
565 /*
566 * Found an index file. Get rid of
567 * the old references. transfer nd.ni_vp'
568 */
569 if (dirp)
570 vrele(dirp);
571 dirp = nd.ni_vp;
572 nd.ni_vp = NULL;
573 vrele(nd.ni_startdir);
574 nd.ni_startdir = NULL;
575 ndp = &ind;
576 }
577 error = 0;
578 }
579 /*
580 * If the public filehandle was used, check that this lookup
581 * didn't result in a filehandle outside the publicly exported
582 * filesystem. We clear the poor vp here to avoid lockups due
583 * to NFS I/O.
584 */
585
586 if (ndp->ni_vp->v_mount != nfs_pub.np_mount) {
587 vput(nd.ni_vp);
588 nd.ni_vp = NULL;
589 error = EPERM;
590 }
591 }
592
593 /*
594 * Resources at this point:
595 * ndp->ni_vp may not be NULL
596 */
597
598 if (error) {
599 nfsm_reply(NFSX_POSTOPATTR(v3));
600 if (v3)
601 nfsm_srvpostop_attr(dirattr_ret, &dirattr);
602 error = 0;
603 goto nfsmout;
604 }
605
606 /*
607 * Get underlying attribute, then release remaining resources ( for
608 * the same potential blocking reason ) and reply.
609 */
610 vp = ndp->ni_vp;
611 bzero((caddr_t)fhp, sizeof(nfh));
612 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
613 error = VOP_VPTOFH(vp, &fhp->fh_fid);
614 if (!error)
615 error = VOP_GETATTR(vp, vap, cred, td);
616
617 vput(vp);
618 vrele(ndp->ni_startdir);
619 vrele(dirp);
620 ndp->ni_vp = NULL;
621 ndp->ni_startdir = NULL;
622 dirp = NULL;
623 nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3));
624 if (error) {
625 if (v3)
626 nfsm_srvpostop_attr(dirattr_ret, &dirattr);
627 error = 0;
628 goto nfsmout;
629 }
630 nfsm_srvfhtom(fhp, v3);
631 if (v3) {
632 nfsm_srvpostop_attr(0, vap);
633 nfsm_srvpostop_attr(dirattr_ret, &dirattr);
634 } else {
635 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
636 nfsm_srvfillattr(vap, fp);
637 }
638
639 nfsmout:
640 if (ndp->ni_vp || dirp || ndp->ni_startdir) {
641 if (ndp->ni_vp)
642 vput(ndp->ni_vp);
643 if (dirp)
644 vrele(dirp);
645 if (ndp->ni_startdir)
646 vrele(ndp->ni_startdir);
647 }
648 NDFREE(&nd, NDF_ONLY_PNBUF);
649 VFS_UNLOCK_GIANT(vfslocked);
650 return (error);
651 }
652
653 /*
654 * nfs readlink service
655 */
656 int
657 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
658 struct thread *td, struct mbuf **mrq)
659 {
660 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
661 struct sockaddr *nam = nfsd->nd_nam;
662 caddr_t dpos = nfsd->nd_dpos;
663 struct ucred *cred = nfsd->nd_cr;
664 struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
665 struct iovec *ivp = iv;
666 struct mbuf *mp;
667 u_int32_t *tl;
668 caddr_t bpos;
669 int error = 0, rdonly, i, tlen, len, getret;
670 int v3 = (nfsd->nd_flag & ND_NFSV3);
671 struct mbuf *mb, *mp3, *nmp, *mreq;
672 struct vnode *vp = NULL;
673 struct vattr attr;
674 nfsfh_t nfh;
675 fhandle_t *fhp;
676 struct uio io, *uiop = &io;
677 int vfslocked;
678
679 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
680 vfslocked = 0;
681 #ifndef nolint
682 mp = NULL;
683 #endif
684 mp3 = NULL;
685 fhp = &nfh.fh_generic;
686 nfsm_srvmtofh(fhp);
687 len = 0;
688 i = 0;
689 while (len < NFS_MAXPATHLEN) {
690 MGET(nmp, M_TRYWAIT, MT_DATA);
691 MCLGET(nmp, M_TRYWAIT);
692 nmp->m_len = NFSMSIZ(nmp);
693 if (len == 0)
694 mp3 = mp = nmp;
695 else {
696 mp->m_next = nmp;
697 mp = nmp;
698 }
699 if ((len + mp->m_len) > NFS_MAXPATHLEN) {
700 mp->m_len = NFS_MAXPATHLEN - len;
701 len = NFS_MAXPATHLEN;
702 } else
703 len += mp->m_len;
704 ivp->iov_base = mtod(mp, caddr_t);
705 ivp->iov_len = mp->m_len;
706 i++;
707 ivp++;
708 }
709 uiop->uio_iov = iv;
710 uiop->uio_iovcnt = i;
711 uiop->uio_offset = 0;
712 uiop->uio_resid = len;
713 uiop->uio_rw = UIO_READ;
714 uiop->uio_segflg = UIO_SYSSPACE;
715 uiop->uio_td = NULL;
716 error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
717 nam, &rdonly, TRUE);
718 if (error) {
719 nfsm_reply(2 * NFSX_UNSIGNED);
720 if (v3)
721 nfsm_srvpostop_attr(1, NULL);
722 error = 0;
723 goto nfsmout;
724 }
725 if (vp->v_type != VLNK) {
726 if (v3)
727 error = EINVAL;
728 else
729 error = ENXIO;
730 } else
731 error = VOP_READLINK(vp, uiop, cred);
732 getret = VOP_GETATTR(vp, &attr, cred, td);
733 vput(vp);
734 vp = NULL;
735 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED);
736 if (v3)
737 nfsm_srvpostop_attr(getret, &attr);
738 if (error) {
739 error = 0;
740 goto nfsmout;
741 }
742 if (uiop->uio_resid > 0) {
743 len -= uiop->uio_resid;
744 tlen = nfsm_rndup(len);
745 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
746 }
747 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
748 *tl = txdr_unsigned(len);
749 mb->m_next = mp3;
750 mp3 = NULL;
751 nfsmout:
752 if (mp3)
753 m_freem(mp3);
754 if (vp)
755 vput(vp);
756 VFS_UNLOCK_GIANT(vfslocked);
757 return(error);
758 }
759
760 /*
761 * nfs read service
762 */
763 int
764 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
765 struct thread *td, struct mbuf **mrq)
766 {
767 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
768 struct sockaddr *nam = nfsd->nd_nam;
769 caddr_t dpos = nfsd->nd_dpos;
770 struct ucred *cred = nfsd->nd_cr;
771 struct iovec *iv;
772 struct iovec *iv2;
773 struct mbuf *m;
774 struct nfs_fattr *fp;
775 u_int32_t *tl;
776 int i;
777 caddr_t bpos;
778 int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
779 int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen;
780 struct mbuf *mb, *mreq;
781 struct mbuf *m2;
782 struct vnode *vp = NULL;
783 nfsfh_t nfh;
784 fhandle_t *fhp;
785 struct uio io, *uiop = &io;
786 struct vattr va, *vap = &va;
787 struct nfsheur *nh;
788 off_t off;
789 int ioflag = 0;
790 int vfslocked;
791
792
793 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
794 vfslocked = 0;
795 fhp = &nfh.fh_generic;
796 nfsm_srvmtofh(fhp);
797 if (v3) {
798 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
799 off = fxdr_hyper(tl);
800 } else {
801 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
802 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
803 }
804 nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd));
805
806 /*
807 * Reference vp. If an error occurs, vp will be invalid, but we
808 * have to NULL it just in case. The macros might goto nfsmout
809 * as well.
810 */
811
812 error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
813 nam, &rdonly, TRUE);
814 if (error) {
815 vp = NULL;
816 nfsm_reply(2 * NFSX_UNSIGNED);
817 if (v3)
818 nfsm_srvpostop_attr(1, NULL);
819 error = 0;
820 goto nfsmout;
821 }
822
823 if (vp->v_type != VREG) {
824 if (v3)
825 error = EINVAL;
826 else
827 error = (vp->v_type == VDIR) ? EISDIR : EACCES;
828 }
829 if (!error) {
830 if ((error = nfsrv_access(vp, VREAD, cred, rdonly,
831 td, 1)) != 0)
832 error = nfsrv_access(vp, VEXEC, cred,
833 rdonly, td, 1);
834 }
835 getret = VOP_GETATTR(vp, vap, cred, td);
836 if (!error)
837 error = getret;
838 if (error) {
839 vput(vp);
840 vp = NULL;
841 nfsm_reply(NFSX_POSTOPATTR(v3));
842 if (v3)
843 nfsm_srvpostop_attr(getret, vap);
844 error = 0;
845 goto nfsmout;
846 }
847
848 /*
849 * Calculate byte count to read
850 */
851
852 if (off >= vap->va_size)
853 cnt = 0;
854 else if ((off + reqlen) > vap->va_size)
855 cnt = vap->va_size - off;
856 else
857 cnt = reqlen;
858
859 /*
860 * Calculate seqcount for heuristic
861 */
862
863 {
864 int hi;
865 int try = 32;
866
867 /*
868 * Locate best candidate
869 */
870
871 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
872 nh = &nfsheur[hi];
873
874 while (try--) {
875 if (nfsheur[hi].nh_vp == vp) {
876 nh = &nfsheur[hi];
877 break;
878 }
879 if (nfsheur[hi].nh_use > 0)
880 --nfsheur[hi].nh_use;
881 hi = (hi + 1) % NUM_HEURISTIC;
882 if (nfsheur[hi].nh_use < nh->nh_use)
883 nh = &nfsheur[hi];
884 }
885
886 if (nh->nh_vp != vp) {
887 nh->nh_vp = vp;
888 nh->nh_nextr = off;
889 nh->nh_use = NHUSE_INIT;
890 if (off == 0)
891 nh->nh_seqcount = 4;
892 else
893 nh->nh_seqcount = 1;
894 }
895
896 /*
897 * Calculate heuristic
898 */
899
900 if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
901 if (++nh->nh_seqcount > IO_SEQMAX)
902 nh->nh_seqcount = IO_SEQMAX;
903 } else if (nh->nh_seqcount > 1) {
904 nh->nh_seqcount = 1;
905 } else {
906 nh->nh_seqcount = 0;
907 }
908 nh->nh_use += NHUSE_INC;
909 if (nh->nh_use > NHUSE_MAX)
910 nh->nh_use = NHUSE_MAX;
911 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
912 }
913
914 nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
915 if (v3) {
916 tl = nfsm_build(u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
917 *tl++ = nfsrv_nfs_true;
918 fp = (struct nfs_fattr *)tl;
919 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
920 } else {
921 tl = nfsm_build(u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED);
922 fp = (struct nfs_fattr *)tl;
923 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
924 }
925 len = left = nfsm_rndup(cnt);
926 if (cnt > 0) {
927 /*
928 * Generate the mbuf list with the uio_iov ref. to it.
929 */
930 i = 0;
931 m = m2 = mb;
932 while (left > 0) {
933 siz = min(M_TRAILINGSPACE(m), left);
934 if (siz > 0) {
935 left -= siz;
936 i++;
937 }
938 if (left > 0) {
939 MGET(m, M_TRYWAIT, MT_DATA);
940 MCLGET(m, M_TRYWAIT);
941 m->m_len = 0;
942 m2->m_next = m;
943 m2 = m;
944 }
945 }
946 MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
947 M_TEMP, M_WAITOK);
948 uiop->uio_iov = iv2 = iv;
949 m = mb;
950 left = len;
951 i = 0;
952 while (left > 0) {
953 if (m == NULL)
954 panic("nfsrv_read iov");
955 siz = min(M_TRAILINGSPACE(m), left);
956 if (siz > 0) {
957 iv->iov_base = mtod(m, caddr_t) + m->m_len;
958 iv->iov_len = siz;
959 m->m_len += siz;
960 left -= siz;
961 iv++;
962 i++;
963 }
964 m = m->m_next;
965 }
966 uiop->uio_iovcnt = i;
967 uiop->uio_offset = off;
968 uiop->uio_resid = len;
969 uiop->uio_rw = UIO_READ;
970 uiop->uio_segflg = UIO_SYSSPACE;
971 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
972 off = uiop->uio_offset;
973 nh->nh_nextr = off;
974 FREE((caddr_t)iv2, M_TEMP);
975 if (error || (getret = VOP_GETATTR(vp, vap, cred, td))) {
976 if (!error)
977 error = getret;
978 m_freem(mreq);
979 vput(vp);
980 vp = NULL;
981 nfsm_reply(NFSX_POSTOPATTR(v3));
982 if (v3)
983 nfsm_srvpostop_attr(getret, vap);
984 error = 0;
985 goto nfsmout;
986 }
987 } else
988 uiop->uio_resid = 0;
989 vput(vp);
990 vp = NULL;
991 nfsm_srvfillattr(vap, fp);
992 tlen = len - uiop->uio_resid;
993 cnt = cnt < tlen ? cnt : tlen;
994 tlen = nfsm_rndup(cnt);
995 if (len != tlen || tlen != cnt)
996 nfsm_adj(mb, len - tlen, tlen - cnt);
997 if (v3) {
998 *tl++ = txdr_unsigned(cnt);
999 if (cnt < reqlen)
1000 *tl++ = nfsrv_nfs_true;
1001 else
1002 *tl++ = nfsrv_nfs_false;
1003 }
1004 *tl = txdr_unsigned(cnt);
1005 nfsmout:
1006 if (vp)
1007 vput(vp);
1008 VFS_UNLOCK_GIANT(vfslocked);
1009 return(error);
1010 }
1011
1012 /*
1013 * nfs write service
1014 */
1015 int
1016 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1017 struct thread *td, struct mbuf **mrq)
1018 {
1019 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1020 struct sockaddr *nam = nfsd->nd_nam;
1021 caddr_t dpos = nfsd->nd_dpos;
1022 struct ucred *cred = nfsd->nd_cr;
1023 struct iovec *ivp;
1024 int i, cnt;
1025 struct mbuf *mp;
1026 struct nfs_fattr *fp;
1027 struct iovec *iv;
1028 struct vattr va, forat;
1029 struct vattr *vap = &va;
1030 u_int32_t *tl;
1031 caddr_t bpos;
1032 int error = 0, rdonly, len, forat_ret = 1;
1033 int ioflags, aftat_ret = 1, retlen = 0, zeroing, adjust;
1034 int stable = NFSV3WRITE_FILESYNC;
1035 int v3 = (nfsd->nd_flag & ND_NFSV3);
1036 struct mbuf *mb, *mreq;
1037 struct vnode *vp = NULL;
1038 nfsfh_t nfh;
1039 fhandle_t *fhp;
1040 struct uio io, *uiop = &io;
1041 off_t off;
1042 struct mount *mntp = NULL;
1043 int tvfslocked;
1044 int vfslocked;
1045
1046 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1047 vfslocked = 0;
1048 if (mrep == NULL) {
1049 *mrq = NULL;
1050 error = 0;
1051 goto nfsmout;
1052 }
1053 fhp = &nfh.fh_generic;
1054 nfsm_srvmtofh(fhp);
1055 if ((mntp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
1056 error = ESTALE;
1057 goto ereply;
1058 }
1059 vfslocked = VFS_LOCK_GIANT(mntp);
1060 (void) vn_start_write(NULL, &mntp, V_WAIT);
1061 vfs_rel(mntp); /* The write holds a ref. */
1062 if (v3) {
1063 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
1064 off = fxdr_hyper(tl);
1065 tl += 3;
1066 stable = fxdr_unsigned(int, *tl++);
1067 } else {
1068 tl = nfsm_dissect_nonblock(u_int32_t *, 4 * NFSX_UNSIGNED);
1069 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1070 tl += 2;
1071 if (nfs_async)
1072 stable = NFSV3WRITE_UNSTABLE;
1073 }
1074 retlen = len = fxdr_unsigned(int32_t, *tl);
1075 cnt = i = 0;
1076
1077 /*
1078 * For NFS Version 2, it is not obvious what a write of zero length
1079 * should do, but I might as well be consistent with Version 3,
1080 * which is to return ok so long as there are no permission problems.
1081 */
1082 if (len > 0) {
1083 zeroing = 1;
1084 mp = mrep;
1085 while (mp) {
1086 if (mp == md) {
1087 zeroing = 0;
1088 adjust = dpos - mtod(mp, caddr_t);
1089 mp->m_len -= adjust;
1090 if (mp->m_len > 0 && adjust > 0)
1091 mp->m_data += adjust;
1092 }
1093 if (zeroing)
1094 mp->m_len = 0;
1095 else if (mp->m_len > 0) {
1096 i += mp->m_len;
1097 if (i > len) {
1098 mp->m_len -= (i - len);
1099 zeroing = 1;
1100 }
1101 if (mp->m_len > 0)
1102 cnt++;
1103 }
1104 mp = mp->m_next;
1105 }
1106 }
1107 if (len > NFS_MAXDATA || len < 0 || i < len) {
1108 error = EIO;
1109 nfsm_reply(2 * NFSX_UNSIGNED);
1110 if (v3)
1111 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1112 error = 0;
1113 goto nfsmout;
1114 }
1115 error = nfsrv_fhtovp(fhp, 1, &vp, &tvfslocked, cred, slp,
1116 nam, &rdonly, TRUE);
1117 vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
1118 if (error) {
1119 vp = NULL;
1120 nfsm_reply(2 * NFSX_UNSIGNED);
1121 if (v3)
1122 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1123 error = 0;
1124 goto nfsmout;
1125 }
1126 if (v3)
1127 forat_ret = VOP_GETATTR(vp, &forat, cred, td);
1128 if (vp->v_type != VREG) {
1129 if (v3)
1130 error = EINVAL;
1131 else
1132 error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1133 }
1134 if (!error)
1135 error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1);
1136 if (error) {
1137 vput(vp);
1138 vp = NULL;
1139 nfsm_reply(NFSX_WCCDATA(v3));
1140 if (v3)
1141 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1142 error = 0;
1143 goto nfsmout;
1144 }
1145
1146 if (len > 0) {
1147 MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
1148 M_WAITOK);
1149 uiop->uio_iov = iv = ivp;
1150 uiop->uio_iovcnt = cnt;
1151 mp = mrep;
1152 while (mp) {
1153 if (mp->m_len > 0) {
1154 ivp->iov_base = mtod(mp, caddr_t);
1155 ivp->iov_len = mp->m_len;
1156 ivp++;
1157 }
1158 mp = mp->m_next;
1159 }
1160
1161 /*
1162 * XXX
1163 * The IO_METASYNC flag indicates that all metadata (and not just
1164 * enough to ensure data integrity) mus be written to stable storage
1165 * synchronously.
1166 * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1167 */
1168 if (stable == NFSV3WRITE_UNSTABLE)
1169 ioflags = IO_NODELOCKED;
1170 else if (stable == NFSV3WRITE_DATASYNC)
1171 ioflags = (IO_SYNC | IO_NODELOCKED);
1172 else
1173 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1174 uiop->uio_resid = len;
1175 uiop->uio_rw = UIO_WRITE;
1176 uiop->uio_segflg = UIO_SYSSPACE;
1177 uiop->uio_td = NULL;
1178 uiop->uio_offset = off;
1179 error = VOP_WRITE(vp, uiop, ioflags, cred);
1180 /* Unlocked write. */
1181 nfsrvstats.srvvop_writes++;
1182 FREE((caddr_t)iv, M_TEMP);
1183 }
1184 aftat_ret = VOP_GETATTR(vp, vap, cred, td);
1185 vput(vp);
1186 vp = NULL;
1187 if (!error)
1188 error = aftat_ret;
1189 ereply:
1190 nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
1191 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
1192 if (v3) {
1193 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1194 if (error) {
1195 error = 0;
1196 goto nfsmout;
1197 }
1198 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
1199 *tl++ = txdr_unsigned(retlen);
1200 /*
1201 * If nfs_async is set, then pretend the write was FILESYNC.
1202 */
1203 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1204 *tl++ = txdr_unsigned(stable);
1205 else
1206 *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1207 /*
1208 * Actually, there is no need to txdr these fields,
1209 * but it may make the values more human readable,
1210 * for debugging purposes.
1211 */
1212 if (nfsver.tv_sec == 0)
1213 nfsver = boottime;
1214 *tl++ = txdr_unsigned(nfsver.tv_sec);
1215 *tl = txdr_unsigned(nfsver.tv_usec);
1216 } else if (!error) {
1217 /* v2 non-error case. */
1218 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
1219 nfsm_srvfillattr(vap, fp);
1220 }
1221 error = 0;
1222 nfsmout:
1223 if (vp)
1224 vput(vp);
1225 vn_finished_write(mntp);
1226 VFS_UNLOCK_GIANT(vfslocked);
1227 return(error);
1228 }
1229
1230 /*
1231 * For the purposes of write gathering, we must decide if the credential
1232 * associated with two pending requests have equivilent privileges. Since
1233 * NFS only uses a subset of the BSD ucred -- the effective uid and group
1234 * IDs -- we have a compare routine that checks only the relevant fields.
1235 */
1236 static int
1237 nfsrv_samecred(struct ucred *cr1, struct ucred *cr2)
1238 {
1239 int i;
1240
1241 if (cr1->cr_uid != cr2->cr_uid)
1242 return (0);
1243 if (cr1->cr_ngroups != cr2->cr_ngroups)
1244 return (0);
1245 for (i = 0; i < cr1->cr_ngroups; i++) {
1246 if (cr1->cr_groups[i] != cr2->cr_groups[i])
1247 return (0);
1248 }
1249 return (1);
1250 }
1251
1252 /*
1253 * NFS write service with write gathering support. Called when
1254 * nfsrvw_procrastinate > 0.
1255 * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1256 * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1257 * Jan. 1994.
1258 */
1259 int
1260 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1261 struct thread *td, struct mbuf **mrq)
1262 {
1263 struct iovec *ivp;
1264 struct mbuf *mp;
1265 struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1266 struct nfs_fattr *fp;
1267 int i;
1268 struct iovec *iov;
1269 struct nfsrvw_delayhash *wpp;
1270 struct ucred *cred;
1271 struct vattr va, forat;
1272 u_int32_t *tl;
1273 caddr_t bpos, dpos;
1274 int error = 0, rdonly, len, forat_ret = 1;
1275 int ioflags, aftat_ret = 1, s, adjust, v3, zeroing;
1276 struct mbuf *mb, *mreq, *mrep, *md;
1277 struct vnode *vp = NULL;
1278 struct uio io, *uiop = &io;
1279 u_quad_t cur_usec;
1280 struct mount *mntp = NULL;
1281 int mvfslocked;
1282 int vfslocked;
1283
1284 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1285 #ifndef nolint
1286 i = 0;
1287 len = 0;
1288 #endif
1289 vfslocked = 0;
1290 *mrq = NULL;
1291 if (*ndp) {
1292 nfsd = *ndp;
1293 *ndp = NULL;
1294 mrep = nfsd->nd_mrep;
1295 md = nfsd->nd_md;
1296 dpos = nfsd->nd_dpos;
1297 cred = nfsd->nd_cr;
1298 v3 = (nfsd->nd_flag & ND_NFSV3);
1299 LIST_INIT(&nfsd->nd_coalesce);
1300 nfsd->nd_mreq = NULL;
1301 nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1302 cur_usec = nfs_curusec();
1303 nfsd->nd_time = cur_usec +
1304 (v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1305
1306 /*
1307 * Now, get the write header..
1308 */
1309 nfsm_srvmtofh(&nfsd->nd_fh);
1310 if (v3) {
1311 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
1312 nfsd->nd_off = fxdr_hyper(tl);
1313 tl += 3;
1314 nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1315 } else {
1316 tl = nfsm_dissect_nonblock(u_int32_t *, 4 * NFSX_UNSIGNED);
1317 nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1318 tl += 2;
1319 if (nfs_async)
1320 nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1321 }
1322 len = fxdr_unsigned(int32_t, *tl);
1323 nfsd->nd_len = len;
1324 nfsd->nd_eoff = nfsd->nd_off + len;
1325
1326 /*
1327 * Trim the header out of the mbuf list and trim off any trailing
1328 * junk so that the mbuf list has only the write data.
1329 */
1330 zeroing = 1;
1331 i = 0;
1332 mp = mrep;
1333 while (mp) {
1334 if (mp == md) {
1335 zeroing = 0;
1336 adjust = dpos - mtod(mp, caddr_t);
1337 mp->m_len -= adjust;
1338 if (mp->m_len > 0 && adjust > 0)
1339 mp->m_data += adjust;
1340 }
1341 if (zeroing)
1342 mp->m_len = 0;
1343 else {
1344 i += mp->m_len;
1345 if (i > len) {
1346 mp->m_len -= (i - len);
1347 zeroing = 1;
1348 }
1349 }
1350 mp = mp->m_next;
1351 }
1352 if (len > NFS_MAXDATA || len < 0 || i < len) {
1353 nfsmout:
1354 m_freem(mrep);
1355 error = EIO;
1356 nfsm_writereply(2 * NFSX_UNSIGNED);
1357 if (v3)
1358 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1359 nfsd->nd_mreq = mreq;
1360 nfsd->nd_mrep = NULL;
1361 nfsd->nd_time = 0;
1362 }
1363
1364 /*
1365 * Add this entry to the hash and time queues.
1366 */
1367 s = splsoftclock();
1368 owp = NULL;
1369 wp = LIST_FIRST(&slp->ns_tq);
1370 while (wp && wp->nd_time < nfsd->nd_time) {
1371 owp = wp;
1372 wp = LIST_NEXT(wp, nd_tq);
1373 }
1374 NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1375 if (owp) {
1376 LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1377 } else {
1378 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1379 }
1380 if (nfsd->nd_mrep) {
1381 wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1382 owp = NULL;
1383 wp = LIST_FIRST(wpp);
1384 while (wp &&
1385 bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh, NFSX_V3FH)){
1386 owp = wp;
1387 wp = LIST_NEXT(wp, nd_hash);
1388 }
1389 while (wp && wp->nd_off < nfsd->nd_off &&
1390 !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh, NFSX_V3FH)) {
1391 owp = wp;
1392 wp = LIST_NEXT(wp, nd_hash);
1393 }
1394 if (owp) {
1395 LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1396
1397 /*
1398 * Search the hash list for overlapping entries and
1399 * coalesce.
1400 */
1401 for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1402 wp = LIST_NEXT(nfsd, nd_hash);
1403 if (nfsrv_samecred(owp->nd_cr, nfsd->nd_cr))
1404 nfsrvw_coalesce(owp, nfsd);
1405 }
1406 } else {
1407 LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1408 }
1409 }
1410 splx(s);
1411 }
1412
1413 /*
1414 * Now, do VOP_WRITE()s for any one(s) that need to be done now
1415 * and generate the associated reply mbuf list(s).
1416 */
1417 loop1:
1418 cur_usec = nfs_curusec();
1419 s = splsoftclock();
1420 for (nfsd = LIST_FIRST(&slp->ns_tq); nfsd; nfsd = owp) {
1421 owp = LIST_NEXT(nfsd, nd_tq);
1422 if (nfsd->nd_time > cur_usec)
1423 break;
1424 if (nfsd->nd_mreq)
1425 continue;
1426 NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1427 LIST_REMOVE(nfsd, nd_tq);
1428 LIST_REMOVE(nfsd, nd_hash);
1429 splx(s);
1430 mrep = nfsd->nd_mrep;
1431 nfsd->nd_mrep = NULL;
1432 cred = nfsd->nd_cr;
1433 v3 = (nfsd->nd_flag & ND_NFSV3);
1434 forat_ret = aftat_ret = 1;
1435 error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, &vfslocked, cred,
1436 slp, nfsd->nd_nam, &rdonly, TRUE);
1437 if (!error) {
1438 if (v3)
1439 forat_ret = VOP_GETATTR(vp, &forat, cred, td);
1440 if (vp->v_type != VREG) {
1441 if (v3)
1442 error = EINVAL;
1443 else
1444 error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1445 }
1446 } else {
1447 vp = NULL;
1448 }
1449 if (!error)
1450 error = nfsrv_access(vp, VWRITE, cred, rdonly,
1451 td, 1);
1452 if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1453 ioflags = IO_NODELOCKED;
1454 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1455 ioflags = (IO_SYNC | IO_NODELOCKED);
1456 else
1457 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1458 uiop->uio_rw = UIO_WRITE;
1459 uiop->uio_segflg = UIO_SYSSPACE;
1460 uiop->uio_td = NULL;
1461 uiop->uio_offset = nfsd->nd_off;
1462 uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1463 if (uiop->uio_resid > 0) {
1464 mp = mrep;
1465 i = 0;
1466 while (mp) {
1467 if (mp->m_len > 0)
1468 i++;
1469 mp = mp->m_next;
1470 }
1471 uiop->uio_iovcnt = i;
1472 MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
1473 M_TEMP, M_WAITOK);
1474 uiop->uio_iov = ivp = iov;
1475 mp = mrep;
1476 while (mp) {
1477 if (mp->m_len > 0) {
1478 ivp->iov_base = mtod(mp, caddr_t);
1479 ivp->iov_len = mp->m_len;
1480 ivp++;
1481 }
1482 mp = mp->m_next;
1483 }
1484 mvfslocked = 0;
1485 if (!error) {
1486 if (vn_start_write(vp, &mntp, V_NOWAIT) != 0) {
1487 VOP_UNLOCK(vp, 0, td);
1488 error = vn_start_write(NULL, &mntp, V_WAIT);
1489 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1490 }
1491 mvfslocked = VFS_LOCK_GIANT(mntp);
1492 }
1493 if (!error) {
1494 error = VOP_WRITE(vp, uiop, ioflags, cred);
1495 /* Unlocked write. */
1496 nfsrvstats.srvvop_writes++;
1497 vn_finished_write(mntp);
1498 }
1499 VFS_UNLOCK_GIANT(mvfslocked);
1500 FREE((caddr_t)iov, M_TEMP);
1501 }
1502 m_freem(mrep);
1503 if (vp) {
1504 aftat_ret = VOP_GETATTR(vp, &va, cred, td);
1505 vput(vp);
1506 vp = NULL;
1507 }
1508 VFS_UNLOCK_GIANT(vfslocked);
1509 /*
1510 * Loop around generating replies for all write rpcs that have
1511 * now been completed.
1512 */
1513 swp = nfsd;
1514 do {
1515 NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1516 if (error) {
1517 nfsm_writereply(NFSX_WCCDATA(v3));
1518 if (v3) {
1519 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1520 }
1521 } else {
1522 nfsm_writereply(NFSX_PREOPATTR(v3) +
1523 NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED +
1524 NFSX_WRITEVERF(v3));
1525 if (v3) {
1526 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1527 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
1528 *tl++ = txdr_unsigned(nfsd->nd_len);
1529 *tl++ = txdr_unsigned(swp->nd_stable);
1530 /*
1531 * Actually, there is no need to txdr these fields,
1532 * but it may make the values more human readable,
1533 * for debugging purposes.
1534 */
1535 if (nfsver.tv_sec == 0)
1536 nfsver = boottime;
1537 *tl++ = txdr_unsigned(nfsver.tv_sec);
1538 *tl = txdr_unsigned(nfsver.tv_usec);
1539 } else {
1540 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
1541 nfsm_srvfillattr(&va, fp);
1542 }
1543 }
1544 nfsd->nd_mreq = mreq;
1545 if (nfsd->nd_mrep)
1546 panic("nfsrv_write: nd_mrep not free");
1547
1548 /*
1549 * Done. Put it at the head of the timer queue so that
1550 * the final phase can return the reply.
1551 */
1552 s = splsoftclock();
1553 if (nfsd != swp) {
1554 nfsd->nd_time = 0;
1555 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1556 }
1557 nfsd = LIST_FIRST(&swp->nd_coalesce);
1558 if (nfsd) {
1559 LIST_REMOVE(nfsd, nd_tq);
1560 }
1561 splx(s);
1562 } while (nfsd);
1563 s = splsoftclock();
1564 swp->nd_time = 0;
1565 LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1566 splx(s);
1567 goto loop1;
1568 }
1569 splx(s);
1570
1571 /*
1572 * Search for a reply to return.
1573 */
1574 s = splsoftclock();
1575 LIST_FOREACH(nfsd, &slp->ns_tq, nd_tq)
1576 if (nfsd->nd_mreq) {
1577 NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1578 LIST_REMOVE(nfsd, nd_tq);
1579 *mrq = nfsd->nd_mreq;
1580 *ndp = nfsd;
1581 break;
1582 }
1583 splx(s);
1584 return (0);
1585 }
1586
1587 /*
1588 * Coalesce the write request nfsd into owp. To do this we must:
1589 * - remove nfsd from the queues
1590 * - merge nfsd->nd_mrep into owp->nd_mrep
1591 * - update the nd_eoff and nd_stable for owp
1592 * - put nfsd on owp's nd_coalesce list
1593 * NB: Must be called at splsoftclock().
1594 */
1595 static void
1596 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1597 {
1598 int overlap;
1599 struct mbuf *mp;
1600 struct nfsrv_descript *p;
1601
1602 NFS_DPF(WG, ("C%03x-%03x",
1603 nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1604 LIST_REMOVE(nfsd, nd_hash);
1605 LIST_REMOVE(nfsd, nd_tq);
1606 if (owp->nd_eoff < nfsd->nd_eoff) {
1607 overlap = owp->nd_eoff - nfsd->nd_off;
1608 if (overlap < 0)
1609 panic("nfsrv_coalesce: bad off");
1610 if (overlap > 0)
1611 m_adj(nfsd->nd_mrep, overlap);
1612 mp = owp->nd_mrep;
1613 while (mp->m_next)
1614 mp = mp->m_next;
1615 mp->m_next = nfsd->nd_mrep;
1616 owp->nd_eoff = nfsd->nd_eoff;
1617 } else
1618 m_freem(nfsd->nd_mrep);
1619 nfsd->nd_mrep = NULL;
1620 if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1621 owp->nd_stable = NFSV3WRITE_FILESYNC;
1622 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1623 owp->nd_stable == NFSV3WRITE_UNSTABLE)
1624 owp->nd_stable = NFSV3WRITE_DATASYNC;
1625 LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1626
1627 /*
1628 * If nfsd had anything else coalesced into it, transfer them
1629 * to owp, otherwise their replies will never get sent.
1630 */
1631 for (p = LIST_FIRST(&nfsd->nd_coalesce); p;
1632 p = LIST_FIRST(&nfsd->nd_coalesce)) {
1633 LIST_REMOVE(p, nd_tq);
1634 LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1635 }
1636 }
1637
1638 /*
1639 * nfs create service
1640 * now does a truncate to 0 length via. setattr if it already exists
1641 */
1642 int
1643 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1644 struct thread *td, struct mbuf **mrq)
1645 {
1646 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1647 struct sockaddr *nam = nfsd->nd_nam;
1648 caddr_t dpos = nfsd->nd_dpos;
1649 struct ucred *cred = nfsd->nd_cr;
1650 struct nfs_fattr *fp;
1651 struct vattr va, dirfor, diraft;
1652 struct vattr *vap = &va;
1653 struct nfsv2_sattr *sp;
1654 u_int32_t *tl;
1655 struct nameidata nd;
1656 caddr_t bpos;
1657 int error = 0, rdev, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1658 int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0;
1659 struct mbuf *mb, *mreq;
1660 struct vnode *dirp = NULL;
1661 nfsfh_t nfh;
1662 fhandle_t *fhp;
1663 u_quad_t tempsize;
1664 struct timespec cverf;
1665 struct mount *mp = NULL;
1666 int tvfslocked;
1667 int vfslocked;
1668
1669 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1670 vfslocked = 0;
1671 #ifndef nolint
1672 rdev = 0;
1673 #endif
1674 ndclear(&nd);
1675
1676 fhp = &nfh.fh_generic;
1677 nfsm_srvmtofh(fhp);
1678 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
1679 error = ESTALE;
1680 goto ereply;
1681 }
1682 vfslocked = VFS_LOCK_GIANT(mp);
1683 (void) vn_start_write(NULL, &mp, V_WAIT);
1684 vfs_rel(mp); /* The write holds a ref. */
1685 nfsm_srvnamesiz(len);
1686
1687 nd.ni_cnd.cn_cred = cred;
1688 nd.ni_cnd.cn_nameiop = CREATE;
1689 nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE;
1690
1691 /*
1692 * Call namei and do initial cleanup to get a few things
1693 * out of the way. If we get an initial error we cleanup
1694 * and return here to avoid special-casing the invalid nd
1695 * structure through the rest of the case. dirp may be
1696 * set even if an error occurs, but the nd structure will not
1697 * be valid at all if an error occurs so we have to invalidate it
1698 * prior to calling nfsm_reply ( which might goto nfsmout ).
1699 */
1700 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
1701 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
1702 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
1703 if (dirp && !v3) {
1704 vrele(dirp);
1705 dirp = NULL;
1706 }
1707 if (error) {
1708 nfsm_reply(NFSX_WCCDATA(v3));
1709 if (v3)
1710 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1711 error = 0;
1712 goto nfsmout;
1713 }
1714
1715 /*
1716 * No error. Continue. State:
1717 *
1718 * startdir is valid ( we release this immediately )
1719 * dirp may be valid
1720 * nd.ni_vp may be valid
1721 * nd.ni_dvp is valid
1722 *
1723 * The error state is set through the code and we may also do some
1724 * opportunistic releasing of vnodes to avoid holding locks through
1725 * NFS I/O. The cleanup at the end is a catch-all
1726 */
1727
1728 VATTR_NULL(vap);
1729 if (v3) {
1730 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
1731 how = fxdr_unsigned(int, *tl);
1732 switch (how) {
1733 case NFSV3CREATE_GUARDED:
1734 if (nd.ni_vp) {
1735 error = EEXIST;
1736 break;
1737 }
1738 /* fall through */
1739 case NFSV3CREATE_UNCHECKED:
1740 nfsm_srvsattr(vap);
1741 break;
1742 case NFSV3CREATE_EXCLUSIVE:
1743 tl = nfsm_dissect_nonblock(u_int32_t *,
1744 NFSX_V3CREATEVERF);
1745 /* Unique bytes, endianness is not important. */
1746 cverf.tv_sec = (int32_t)tl[0];
1747 cverf.tv_nsec = tl[1];
1748 exclusive_flag = 1;
1749 break;
1750 };
1751 vap->va_type = VREG;
1752 } else {
1753 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
1754 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1755 if (vap->va_type == VNON)
1756 vap->va_type = VREG;
1757 vap->va_mode = nfstov_mode(sp->sa_mode);
1758 switch (vap->va_type) {
1759 case VREG:
1760 tsize = fxdr_unsigned(int32_t, sp->sa_size);
1761 if (tsize != -1)
1762 vap->va_size = (u_quad_t)tsize;
1763 break;
1764 case VCHR:
1765 case VBLK:
1766 case VFIFO:
1767 rdev = fxdr_unsigned(long, sp->sa_size);
1768 break;
1769 default:
1770 break;
1771 };
1772 }
1773
1774 /*
1775 * Iff doesn't exist, create it
1776 * otherwise just truncate to 0 length
1777 * should I set the mode too ?
1778 *
1779 * The only possible error we can have at this point is EEXIST.
1780 * nd.ni_vp will also be non-NULL in that case.
1781 */
1782 if (nd.ni_vp == NULL) {
1783 if (vap->va_mode == (mode_t)VNOVAL)
1784 vap->va_mode = 0;
1785 if (vap->va_type == VREG || vap->va_type == VSOCK) {
1786 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1787 if (error)
1788 NDFREE(&nd, NDF_ONLY_PNBUF);
1789 else {
1790 if (exclusive_flag) {
1791 exclusive_flag = 0;
1792 VATTR_NULL(vap);
1793 vap->va_atime = cverf;
1794 error = VOP_SETATTR(nd.ni_vp, vap, cred,
1795 td);
1796 }
1797 }
1798 } else if (vap->va_type == VCHR || vap->va_type == VBLK ||
1799 vap->va_type == VFIFO) {
1800 /*
1801 * NFSv2-specific code for creating device nodes
1802 * and fifos.
1803 *
1804 * Handle SysV FIFO node special cases. All other
1805 * devices require super user to access.
1806 */
1807 if (vap->va_type == VCHR && rdev == 0xffffffff)
1808 vap->va_type = VFIFO;
1809 if (vap->va_type != VFIFO &&
1810 (error = suser_cred(cred, 0))) {
1811 goto ereply;
1812 }
1813 vap->va_rdev = rdev;
1814 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1815 if (error) {
1816 NDFREE(&nd, NDF_ONLY_PNBUF);
1817 goto ereply;
1818 }
1819 vput(nd.ni_vp);
1820 nd.ni_vp = NULL;
1821
1822 /*
1823 * release dvp prior to lookup
1824 */
1825 vput(nd.ni_dvp);
1826 nd.ni_dvp = NULL;
1827 /*
1828 * Setup for lookup.
1829 *
1830 * Even though LOCKPARENT was cleared, ni_dvp may
1831 * be garbage.
1832 */
1833 nd.ni_cnd.cn_nameiop = LOOKUP;
1834 nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
1835 nd.ni_cnd.cn_thread = td;
1836 nd.ni_cnd.cn_cred = cred;
1837 tvfslocked = VFS_LOCK_GIANT(nd.ni_startdir->v_mount);
1838 if (tvfslocked)
1839 nd.ni_cnd.cn_flags |= GIANTHELD;
1840 error = lookup(&nd);
1841 nd.ni_dvp = NULL;
1842 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
1843 nd.ni_cnd.cn_flags &= ~GIANTHELD;
1844 if (error)
1845 goto ereply;
1846
1847 if (nd.ni_cnd.cn_flags & ISSYMLINK) {
1848 error = EINVAL;
1849 goto ereply;
1850 }
1851 } else {
1852 error = ENXIO;
1853 }
1854 } else {
1855 if (vap->va_size != -1) {
1856 error = nfsrv_access(nd.ni_vp, VWRITE,
1857 cred, (nd.ni_cnd.cn_flags & RDONLY), td, 0);
1858 if (!error) {
1859 tempsize = vap->va_size;
1860 VATTR_NULL(vap);
1861 vap->va_size = tempsize;
1862 error = VOP_SETATTR(nd.ni_vp, vap, cred,
1863 td);
1864 }
1865 }
1866 }
1867
1868 if (!error) {
1869 bzero((caddr_t)fhp, sizeof(nfh));
1870 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
1871 error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
1872 if (!error)
1873 error = VOP_GETATTR(nd.ni_vp, vap, cred, td);
1874 }
1875 if (v3) {
1876 if (exclusive_flag && !error &&
1877 bcmp(&cverf, &vap->va_atime, sizeof (cverf)))
1878 error = EEXIST;
1879 if (dirp == nd.ni_dvp)
1880 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
1881 else {
1882 /* Drop the other locks to avoid deadlock. */
1883 if (nd.ni_dvp) {
1884 if (nd.ni_dvp == nd.ni_vp)
1885 vrele(nd.ni_dvp);
1886 else
1887 vput(nd.ni_dvp);
1888 }
1889 if (nd.ni_vp)
1890 vput(nd.ni_vp);
1891 nd.ni_dvp = NULL;
1892 nd.ni_vp = NULL;
1893
1894 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
1895 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
1896 VOP_UNLOCK(dirp, 0, td);
1897 }
1898 }
1899 ereply:
1900 nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
1901 if (v3) {
1902 if (!error) {
1903 nfsm_srvpostop_fh(fhp);
1904 nfsm_srvpostop_attr(0, vap);
1905 }
1906 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1907 } else if (!error) {
1908 /* v2 non-error case. */
1909 nfsm_srvfhtom(fhp, v3);
1910 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
1911 nfsm_srvfillattr(vap, fp);
1912 }
1913 error = 0;
1914
1915 nfsmout:
1916 if (nd.ni_dvp) {
1917 if (nd.ni_dvp == nd.ni_vp)
1918 vrele(nd.ni_dvp);
1919 else
1920 vput(nd.ni_dvp);
1921 }
1922 if (nd.ni_vp)
1923 vput(nd.ni_vp);
1924 if (nd.ni_startdir) {
1925 vrele(nd.ni_startdir);
1926 nd.ni_startdir = NULL;
1927 }
1928 if (dirp)
1929 vrele(dirp);
1930 NDFREE(&nd, NDF_ONLY_PNBUF);
1931 vn_finished_write(mp);
1932 VFS_UNLOCK_GIANT(vfslocked);
1933 return (error);
1934 }
1935
1936 /*
1937 * nfs v3 mknod service
1938 */
1939 int
1940 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1941 struct thread *td, struct mbuf **mrq)
1942 {
1943 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1944 struct sockaddr *nam = nfsd->nd_nam;
1945 caddr_t dpos = nfsd->nd_dpos;
1946 struct ucred *cred = nfsd->nd_cr;
1947 struct vattr va, dirfor, diraft;
1948 struct vattr *vap = &va;
1949 u_int32_t *tl;
1950 struct nameidata nd;
1951 caddr_t bpos;
1952 int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1953 u_int32_t major, minor;
1954 enum vtype vtyp;
1955 struct mbuf *mb, *mreq;
1956 struct vnode *vp, *dirp = NULL;
1957 nfsfh_t nfh;
1958 fhandle_t *fhp;
1959 struct mount *mp = NULL;
1960 int v3 = (nfsd->nd_flag & ND_NFSV3);
1961 int tvfslocked;
1962 int vfslocked;
1963
1964 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1965 vfslocked = 0;
1966 if (!v3)
1967 panic("nfsrv_mknod: v3 proc called on a v2 connection");
1968 ndclear(&nd);
1969
1970 fhp = &nfh.fh_generic;
1971 nfsm_srvmtofh(fhp);
1972 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
1973 error = ESTALE;
1974 goto ereply;
1975 }
1976 vfslocked = VFS_LOCK_GIANT(mp);
1977 (void) vn_start_write(NULL, &mp, V_WAIT);
1978 vfs_rel(mp); /* The write holds a ref. */
1979 nfsm_srvnamesiz(len);
1980
1981 nd.ni_cnd.cn_cred = cred;
1982 nd.ni_cnd.cn_nameiop = CREATE;
1983 nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE;
1984
1985 /*
1986 * Handle nfs_namei() call. If an error occurs, the nd structure
1987 * is not valid. However, nfsm_*() routines may still jump to
1988 * nfsmout.
1989 */
1990
1991 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
1992 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
1993 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
1994 if (error) {
1995 nfsm_reply(NFSX_WCCDATA(1));
1996 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1997 error = 0;
1998 goto nfsmout;
1999 }
2000 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
2001 vtyp = nfsv3tov_type(*tl);
2002 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
2003 error = NFSERR_BADTYPE;
2004 goto out;
2005 }
2006 VATTR_NULL(vap);
2007 nfsm_srvsattr(vap);
2008 if (vtyp == VCHR || vtyp == VBLK) {
2009 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
2010 major = fxdr_unsigned(u_int32_t, *tl++);
2011 minor = fxdr_unsigned(u_int32_t, *tl);
2012 vap->va_rdev = makedev(major, minor);
2013 }
2014
2015 /*
2016 * Iff doesn't exist, create it.
2017 */
2018 if (nd.ni_vp) {
2019 error = EEXIST;
2020 goto out;
2021 }
2022 vap->va_type = vtyp;
2023 if (vap->va_mode == (mode_t)VNOVAL)
2024 vap->va_mode = 0;
2025 if (vtyp == VSOCK) {
2026 vrele(nd.ni_startdir);
2027 nd.ni_startdir = NULL;
2028 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
2029 if (error)
2030 NDFREE(&nd, NDF_ONLY_PNBUF);
2031 } else {
2032 if (vtyp != VFIFO && (error = suser_cred(cred, 0)))
2033 goto out;
2034 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
2035 if (error) {
2036 NDFREE(&nd, NDF_ONLY_PNBUF);
2037 goto out;
2038 }
2039 vput(nd.ni_vp);
2040 nd.ni_vp = NULL;
2041
2042 /*
2043 * Release dvp prior to lookup
2044 */
2045 vput(nd.ni_dvp);
2046 nd.ni_dvp = NULL;
2047
2048 nd.ni_cnd.cn_nameiop = LOOKUP;
2049 nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
2050 nd.ni_cnd.cn_thread = td;
2051 nd.ni_cnd.cn_cred = td->td_ucred;
2052 tvfslocked = VFS_LOCK_GIANT(nd.ni_startdir->v_mount);
2053 if (tvfslocked)
2054 nd.ni_cnd.cn_flags |= GIANTHELD;
2055 error = lookup(&nd);
2056 nd.ni_dvp = NULL;
2057 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
2058 nd.ni_cnd.cn_flags &= ~GIANTHELD;
2059
2060 if (error)
2061 goto out;
2062 if (nd.ni_cnd.cn_flags & ISSYMLINK)
2063 error = EINVAL;
2064 }
2065
2066 /*
2067 * send response, cleanup, return.
2068 */
2069 out:
2070 vp = nd.ni_vp;
2071 if (!error) {
2072 bzero((caddr_t)fhp, sizeof(nfh));
2073 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
2074 error = VOP_VPTOFH(vp, &fhp->fh_fid);
2075 if (!error)
2076 error = VOP_GETATTR(vp, vap, cred, td);
2077 }
2078 if (nd.ni_dvp) {
2079 if (nd.ni_dvp == nd.ni_vp)
2080 vrele(nd.ni_dvp);
2081 else
2082 vput(nd.ni_dvp);
2083 nd.ni_dvp = NULL;
2084 }
2085 if (vp) {
2086 vput(vp);
2087 vp = NULL;
2088 nd.ni_vp = NULL;
2089 }
2090 if (nd.ni_startdir) {
2091 vrele(nd.ni_startdir);
2092 nd.ni_startdir = NULL;
2093 }
2094 NDFREE(&nd, NDF_ONLY_PNBUF);
2095 if (dirp) {
2096 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
2097 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2098 VOP_UNLOCK(dirp, 0, td);
2099 }
2100 ereply:
2101 nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1));
2102 if (v3) {
2103 if (!error) {
2104 nfsm_srvpostop_fh(fhp);
2105 nfsm_srvpostop_attr(0, vap);
2106 }
2107 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2108 }
2109 vn_finished_write(mp);
2110 VFS_UNLOCK_GIANT(vfslocked);
2111 return (0);
2112 nfsmout:
2113 if (nd.ni_dvp) {
2114 if (nd.ni_dvp == nd.ni_vp)
2115 vrele(nd.ni_dvp);
2116 else
2117 vput(nd.ni_dvp);
2118 }
2119 if (nd.ni_vp)
2120 vput(nd.ni_vp);
2121 if (dirp)
2122 vrele(dirp);
2123 if (nd.ni_startdir)
2124 vrele(nd.ni_startdir);
2125 NDFREE(&nd, NDF_ONLY_PNBUF);
2126 vn_finished_write(mp);
2127 VFS_UNLOCK_GIANT(vfslocked);
2128 return (error);
2129 }
2130
2131 /*
2132 * nfs remove service
2133 */
2134 int
2135 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2136 struct thread *td, struct mbuf **mrq)
2137 {
2138 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2139 struct sockaddr *nam = nfsd->nd_nam;
2140 caddr_t dpos = nfsd->nd_dpos;
2141 struct ucred *cred = nfsd->nd_cr;
2142 struct nameidata nd;
2143 caddr_t bpos;
2144 int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2145 int v3 = (nfsd->nd_flag & ND_NFSV3);
2146 struct mbuf *mb, *mreq;
2147 struct vnode *dirp;
2148 struct vattr dirfor, diraft;
2149 nfsfh_t nfh;
2150 fhandle_t *fhp;
2151 struct mount *mp = NULL;
2152 int vfslocked;
2153
2154 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2155 ndclear(&nd);
2156 vfslocked = 0;
2157
2158 fhp = &nfh.fh_generic;
2159 nfsm_srvmtofh(fhp);
2160 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2161 error = ESTALE;
2162 goto ereply;
2163 }
2164 vfslocked = VFS_LOCK_GIANT(mp);
2165 (void) vn_start_write(NULL, &mp, V_WAIT);
2166 vfs_rel(mp); /* The write holds a ref. */
2167 nfsm_srvnamesiz(len);
2168
2169 nd.ni_cnd.cn_cred = cred;
2170 nd.ni_cnd.cn_nameiop = DELETE;
2171 nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | MPSAFE;
2172 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
2173 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
2174 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
2175 if (dirp && !v3) {
2176 vrele(dirp);
2177 dirp = NULL;
2178 }
2179 if (error == 0) {
2180 if (nd.ni_vp->v_type == VDIR) {
2181 error = EPERM; /* POSIX */
2182 goto out;
2183 }
2184 /*
2185 * The root of a mounted filesystem cannot be deleted.
2186 */
2187 if (nd.ni_vp->v_vflag & VV_ROOT) {
2188 error = EBUSY;
2189 goto out;
2190 }
2191 out:
2192 if (!error) {
2193 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2194 NDFREE(&nd, NDF_ONLY_PNBUF);
2195 }
2196 }
2197 if (dirp && v3) {
2198 if (dirp == nd.ni_dvp)
2199 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2200 else {
2201 /* Drop the other locks to avoid deadlock. */
2202 if (nd.ni_dvp) {
2203 if (nd.ni_dvp == nd.ni_vp)
2204 vrele(nd.ni_dvp);
2205 else
2206 vput(nd.ni_dvp);
2207 }
2208 if (nd.ni_vp)
2209 vput(nd.ni_vp);
2210 nd.ni_dvp = NULL;
2211 nd.ni_vp = NULL;
2212
2213 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
2214 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2215 VOP_UNLOCK(dirp, 0, td);
2216 }
2217 vrele(dirp);
2218 dirp = NULL;
2219 }
2220 ereply:
2221 nfsm_reply(NFSX_WCCDATA(v3));
2222 if (v3)
2223 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2224 error = 0;
2225 nfsmout:
2226 NDFREE(&nd, NDF_ONLY_PNBUF);
2227 if (nd.ni_dvp) {
2228 if (nd.ni_dvp == nd.ni_vp)
2229 vrele(nd.ni_dvp);
2230 else
2231 vput(nd.ni_dvp);
2232 }
2233 if (nd.ni_vp)
2234 vput(nd.ni_vp);
2235 vn_finished_write(mp);
2236 VFS_UNLOCK_GIANT(vfslocked);
2237 return(error);
2238 }
2239
2240 /*
2241 * nfs rename service
2242 */
2243 int
2244 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2245 struct thread *td, struct mbuf **mrq)
2246 {
2247 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2248 struct sockaddr *nam = nfsd->nd_nam;
2249 caddr_t dpos = nfsd->nd_dpos;
2250 struct ucred *cred = nfsd->nd_cr;
2251 caddr_t bpos;
2252 int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2253 int tdirfor_ret = 1, tdiraft_ret = 1;
2254 int v3 = (nfsd->nd_flag & ND_NFSV3);
2255 struct mbuf *mb, *mreq;
2256 struct nameidata fromnd, tond;
2257 struct vnode *fvp, *tvp, *tdvp, *fdirp = NULL;
2258 struct vnode *tdirp = NULL;
2259 struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2260 nfsfh_t fnfh, tnfh;
2261 fhandle_t *ffhp, *tfhp;
2262 uid_t saved_uid;
2263 struct mount *mp = NULL;
2264 int vfslocked;
2265
2266 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2267 vfslocked = 0;
2268 #ifndef nolint
2269 fvp = NULL;
2270 #endif
2271 ffhp = &fnfh.fh_generic;
2272 tfhp = &tnfh.fh_generic;
2273
2274 /*
2275 * Clear fields incase goto nfsmout occurs from macro.
2276 */
2277
2278 ndclear(&fromnd);
2279 ndclear(&tond);
2280
2281 nfsm_srvmtofh(ffhp);
2282 if ((mp = vfs_getvfs(&ffhp->fh_fsid)) == NULL) {
2283 error = ESTALE;
2284 goto out1;
2285 }
2286 vfslocked = VFS_LOCK_GIANT(mp);
2287 (void) vn_start_write(NULL, &mp, V_WAIT);
2288 vfs_rel(mp); /* The write holds a ref. */
2289 nfsm_srvnamesiz(len);
2290 /*
2291 * Remember our original uid so that we can reset cr_uid before
2292 * the second nfs_namei() call, in case it is remapped.
2293 */
2294 saved_uid = cred->cr_uid;
2295 fromnd.ni_cnd.cn_cred = cred;
2296 fromnd.ni_cnd.cn_nameiop = DELETE;
2297 fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART | MPSAFE;
2298 error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md,
2299 &dpos, &fdirp, v3, &fdirfor, &fdirfor_ret, td, FALSE);
2300 vfslocked = nfsrv_lockedpair_nd(vfslocked, &fromnd);
2301 if (fdirp && !v3) {
2302 vrele(fdirp);
2303 fdirp = NULL;
2304 }
2305 if (error) {
2306 nfsm_reply(2 * NFSX_WCCDATA(v3));
2307 if (v3) {
2308 nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
2309 nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
2310 }
2311 error = 0;
2312 goto nfsmout;
2313 }
2314 fvp = fromnd.ni_vp;
2315 nfsm_srvmtofh(tfhp);
2316 nfsm_srvnamesiz(len2);
2317 cred->cr_uid = saved_uid;
2318 tond.ni_cnd.cn_cred = cred;
2319 tond.ni_cnd.cn_nameiop = RENAME;
2320 tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | MPSAFE;
2321 error = nfs_namei(&tond, tfhp, len2, slp, nam, &md,
2322 &dpos, &tdirp, v3, &tdirfor, &tdirfor_ret, td, FALSE);
2323 vfslocked = nfsrv_lockedpair_nd(vfslocked, &tond);
2324 if (tdirp && !v3) {
2325 vrele(tdirp);
2326 tdirp = NULL;
2327 }
2328 if (error)
2329 goto out1;
2330
2331 tdvp = tond.ni_dvp;
2332 tvp = tond.ni_vp;
2333 if (tvp != NULL) {
2334 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2335 if (v3)
2336 error = EEXIST;
2337 else
2338 error = EISDIR;
2339 goto out;
2340 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2341 if (v3)
2342 error = EEXIST;
2343 else
2344 error = ENOTDIR;
2345 goto out;
2346 }
2347 if (tvp->v_type == VDIR && tvp->v_mountedhere) {
2348 if (v3)
2349 error = EXDEV;
2350 else
2351 error = ENOTEMPTY;
2352 goto out;
2353 }
2354 }
2355 if (fvp->v_type == VDIR && fvp->v_mountedhere) {
2356 if (v3)
2357 error = EXDEV;
2358 else
2359 error = ENOTEMPTY;
2360 goto out;
2361 }
2362 if (fvp->v_mount != tdvp->v_mount) {
2363 if (v3)
2364 error = EXDEV;
2365 else
2366 error = ENOTEMPTY;
2367 goto out;
2368 }
2369 if (fvp == tdvp) {
2370 if (v3)
2371 error = EINVAL;
2372 else
2373 error = ENOTEMPTY;
2374 }
2375 /*
2376 * If source is the same as the destination (that is the
2377 * same vnode with the same name in the same directory),
2378 * then there is nothing to do.
2379 */
2380 if (fvp == tvp && fromnd.ni_dvp == tdvp &&
2381 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2382 !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
2383 fromnd.ni_cnd.cn_namelen))
2384 error = -1;
2385 out:
2386 if (!error) {
2387 /*
2388 * The VOP_RENAME function releases all vnode references &
2389 * locks prior to returning so we need to clear the pointers
2390 * to bypass cleanup code later on.
2391 */
2392 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2393 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2394 fromnd.ni_dvp = NULL;
2395 fromnd.ni_vp = NULL;
2396 tond.ni_dvp = NULL;
2397 tond.ni_vp = NULL;
2398 if (error) {
2399 NDFREE(&fromnd, NDF_ONLY_PNBUF);
2400 NDFREE(&tond, NDF_ONLY_PNBUF);
2401 }
2402 } else {
2403 if (error == -1)
2404 error = 0;
2405 }
2406 /* fall through */
2407 out1:
2408 nfsm_reply(2 * NFSX_WCCDATA(v3));
2409 if (v3) {
2410 /* Release existing locks to prevent deadlock. */
2411 if (tond.ni_dvp) {
2412 if (tond.ni_dvp == tond.ni_vp)
2413 vrele(tond.ni_dvp);
2414 else
2415 vput(tond.ni_dvp);
2416 }
2417 if (tond.ni_vp)
2418 vput(tond.ni_vp);
2419 tond.ni_dvp = NULL;
2420 tond.ni_vp = NULL;
2421
2422 if (fdirp) {
2423 vn_lock(fdirp, LK_EXCLUSIVE | LK_RETRY, td);
2424 fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, td);
2425 VOP_UNLOCK(fdirp, 0, td);
2426 }
2427 if (tdirp) {
2428 vn_lock(tdirp, LK_EXCLUSIVE | LK_RETRY, td);
2429 tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, td);
2430 VOP_UNLOCK(tdirp, 0, td);
2431 }
2432 nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
2433 nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
2434 }
2435 error = 0;
2436 /* fall through */
2437
2438 nfsmout:
2439 /*
2440 * Clear out tond related fields
2441 */
2442 if (tond.ni_dvp) {
2443 if (tond.ni_dvp == tond.ni_vp)
2444 vrele(tond.ni_dvp);
2445 else
2446 vput(tond.ni_dvp);
2447 }
2448 if (tond.ni_vp)
2449 vput(tond.ni_vp);
2450 if (tdirp)
2451 vrele(tdirp);
2452 if (tond.ni_startdir)
2453 vrele(tond.ni_startdir);
2454 NDFREE(&tond, NDF_ONLY_PNBUF);
2455 /*
2456 * Clear out fromnd related fields
2457 */
2458 if (fdirp)
2459 vrele(fdirp);
2460 if (fromnd.ni_startdir)
2461 vrele(fromnd.ni_startdir);
2462 NDFREE(&fromnd, NDF_ONLY_PNBUF);
2463 if (fromnd.ni_dvp)
2464 vrele(fromnd.ni_dvp);
2465 if (fromnd.ni_vp)
2466 vrele(fromnd.ni_vp);
2467
2468 vn_finished_write(mp);
2469 VFS_UNLOCK_GIANT(vfslocked);
2470 return (error);
2471 }
2472
2473 /*
2474 * nfs link service
2475 */
2476 int
2477 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2478 struct thread *td, struct mbuf **mrq)
2479 {
2480 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2481 struct sockaddr *nam = nfsd->nd_nam;
2482 caddr_t dpos = nfsd->nd_dpos;
2483 struct ucred *cred = nfsd->nd_cr;
2484 struct nameidata nd;
2485 caddr_t bpos;
2486 int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2487 int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3);
2488 struct mbuf *mb, *mreq;
2489 struct vnode *vp = NULL, *xp, *dirp = NULL;
2490 struct vattr dirfor, diraft, at;
2491 nfsfh_t nfh, dnfh;
2492 fhandle_t *fhp, *dfhp;
2493 struct mount *mp = NULL;
2494 int tvfslocked;
2495 int vfslocked;
2496
2497 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2498 ndclear(&nd);
2499 vfslocked = 0;
2500
2501 fhp = &nfh.fh_generic;
2502 dfhp = &dnfh.fh_generic;
2503 nfsm_srvmtofh(fhp);
2504 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2505 error = ESTALE;
2506 goto ereply;
2507 }
2508 vfslocked = VFS_LOCK_GIANT(mp);
2509 (void) vn_start_write(NULL, &mp, V_WAIT);
2510 vfs_rel(mp); /* The write holds a ref. */
2511 nfsm_srvmtofh(dfhp);
2512 nfsm_srvnamesiz(len);
2513
2514 error = nfsrv_fhtovp(fhp, TRUE, &vp, &tvfslocked, cred, slp,
2515 nam, &rdonly, TRUE);
2516 vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
2517 if (error) {
2518 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2519 if (v3) {
2520 nfsm_srvpostop_attr(getret, &at);
2521 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2522 }
2523 vp = NULL;
2524 error = 0;
2525 goto nfsmout;
2526 }
2527 if (v3)
2528 getret = VOP_GETATTR(vp, &at, cred, td);
2529 if (vp->v_type == VDIR) {
2530 error = EPERM; /* POSIX */
2531 goto out1;
2532 }
2533 VOP_UNLOCK(vp, 0, td);
2534 nd.ni_cnd.cn_cred = cred;
2535 nd.ni_cnd.cn_nameiop = CREATE;
2536 nd.ni_cnd.cn_flags = LOCKPARENT | MPSAFE | MPSAFE;
2537 error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos,
2538 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
2539 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
2540 if (dirp && !v3) {
2541 vrele(dirp);
2542 dirp = NULL;
2543 }
2544 if (error) {
2545 vrele(vp);
2546 vp = NULL;
2547 goto out2;
2548 }
2549 xp = nd.ni_vp;
2550 if (xp != NULL) {
2551 error = EEXIST;
2552 vrele(vp);
2553 vp = NULL;
2554 goto out2;
2555 }
2556 xp = nd.ni_dvp;
2557 if (vp->v_mount != xp->v_mount) {
2558 error = EXDEV;
2559 vrele(vp);
2560 vp = NULL;
2561 goto out2;
2562 }
2563 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2564 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2565 NDFREE(&nd, NDF_ONLY_PNBUF);
2566 /* fall through */
2567
2568 out1:
2569 if (v3)
2570 getret = VOP_GETATTR(vp, &at, cred, td);
2571 out2:
2572 if (dirp) {
2573 if (dirp == nd.ni_dvp)
2574 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2575 else {
2576 /* Release existing locks to prevent deadlock. */
2577 if (nd.ni_dvp) {
2578 if (nd.ni_dvp == nd.ni_vp)
2579 vrele(nd.ni_dvp);
2580 else
2581 vput(nd.ni_dvp);
2582 }
2583 if (nd.ni_vp)
2584 vrele(nd.ni_vp);
2585 nd.ni_dvp = NULL;
2586 nd.ni_vp = NULL;
2587
2588 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
2589 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2590 VOP_UNLOCK(dirp, 0, td);
2591 }
2592 }
2593 ereply:
2594 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2595 if (v3) {
2596 nfsm_srvpostop_attr(getret, &at);
2597 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2598 }
2599 error = 0;
2600 /* fall through */
2601
2602 nfsmout:
2603 NDFREE(&nd, NDF_ONLY_PNBUF);
2604 if (vp)
2605 vput(vp);
2606 if (nd.ni_dvp) {
2607 if (nd.ni_dvp == nd.ni_vp)
2608 vrele(nd.ni_dvp);
2609 else
2610 vput(nd.ni_dvp);
2611 }
2612 if (dirp)
2613 vrele(dirp);
2614 if (nd.ni_vp)
2615 vrele(nd.ni_vp);
2616 vn_finished_write(mp);
2617 VFS_UNLOCK_GIANT(vfslocked);
2618 return(error);
2619 }
2620
2621 /*
2622 * nfs symbolic link service
2623 */
2624 int
2625 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2626 struct thread *td, struct mbuf **mrq)
2627 {
2628 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2629 struct sockaddr *nam = nfsd->nd_nam;
2630 caddr_t dpos = nfsd->nd_dpos;
2631 struct ucred *cred = nfsd->nd_cr;
2632 struct vattr va, dirfor, diraft;
2633 struct nameidata nd;
2634 struct vattr *vap = &va;
2635 struct nfsv2_sattr *sp;
2636 char *bpos, *pathcp = NULL;
2637 struct uio io;
2638 struct iovec iv;
2639 int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2640 int v3 = (nfsd->nd_flag & ND_NFSV3);
2641 struct mbuf *mb, *mreq;
2642 struct vnode *dirp = NULL;
2643 nfsfh_t nfh;
2644 fhandle_t *fhp;
2645 struct mount *mp = NULL;
2646 int tvfslocked;
2647 int vfslocked;
2648
2649 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2650 ndclear(&nd);
2651 vfslocked = 0;
2652
2653 fhp = &nfh.fh_generic;
2654 nfsm_srvmtofh(fhp);
2655 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2656 error = ESTALE;
2657 goto out;
2658 }
2659 vfslocked = VFS_LOCK_GIANT(mp);
2660 (void) vn_start_write(NULL, &mp, V_WAIT);
2661 vfs_rel(mp); /* The write holds a ref. */
2662 nfsm_srvnamesiz(len);
2663 nd.ni_cnd.cn_cred = cred;
2664 nd.ni_cnd.cn_nameiop = CREATE;
2665 nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART | MPSAFE;
2666 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
2667 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
2668 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
2669 if (error == 0) {
2670 VATTR_NULL(vap);
2671 if (v3)
2672 nfsm_srvsattr(vap);
2673 nfsm_srvpathsiz(len2);
2674 }
2675 if (dirp && !v3) {
2676 vrele(dirp);
2677 dirp = NULL;
2678 }
2679 if (error)
2680 goto out;
2681 MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
2682 iv.iov_base = pathcp;
2683 iv.iov_len = len2;
2684 io.uio_resid = len2;
2685 io.uio_offset = 0;
2686 io.uio_iov = &iv;
2687 io.uio_iovcnt = 1;
2688 io.uio_segflg = UIO_SYSSPACE;
2689 io.uio_rw = UIO_READ;
2690 io.uio_td = NULL;
2691 nfsm_mtouio(&io, len2);
2692 if (!v3) {
2693 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
2694 vap->va_mode = nfstov_mode(sp->sa_mode);
2695 }
2696 *(pathcp + len2) = '\0';
2697 if (nd.ni_vp) {
2698 error = EEXIST;
2699 goto out;
2700 }
2701
2702 /*
2703 * issue symlink op. SAVESTART is set so the underlying path component
2704 * is only freed by the VOP if an error occurs.
2705 */
2706 if (vap->va_mode == (mode_t)VNOVAL)
2707 vap->va_mode = 0;
2708 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
2709 if (error)
2710 NDFREE(&nd, NDF_ONLY_PNBUF);
2711 else
2712 vput(nd.ni_vp);
2713 nd.ni_vp = NULL;
2714 /*
2715 * releases directory prior to potential lookup op.
2716 */
2717 vput(nd.ni_dvp);
2718 nd.ni_dvp = NULL;
2719
2720 if (error == 0) {
2721 if (v3) {
2722 /*
2723 * Issue lookup. Leave SAVESTART set so we can easily free
2724 * the name buffer later on.
2725 *
2726 * since LOCKPARENT is not set, ni_dvp will be garbage on
2727 * return whether an error occurs or not.
2728 */
2729 nd.ni_cnd.cn_nameiop = LOOKUP;
2730 nd.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW);
2731 nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF);
2732 nd.ni_cnd.cn_thread = td;
2733 nd.ni_cnd.cn_cred = cred;
2734 tvfslocked = VFS_LOCK_GIANT(nd.ni_startdir->v_mount);
2735 if (tvfslocked)
2736 nd.ni_cnd.cn_flags |= GIANTHELD;
2737 error = lookup(&nd);
2738 nd.ni_dvp = NULL;
2739 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
2740 nd.ni_cnd.cn_flags &= ~GIANTHELD;
2741
2742 if (error == 0) {
2743 bzero((caddr_t)fhp, sizeof(nfh));
2744 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
2745 error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
2746 if (!error)
2747 error = VOP_GETATTR(nd.ni_vp, vap, cred,
2748 td);
2749 vput(nd.ni_vp);
2750 nd.ni_vp = NULL;
2751 }
2752 }
2753 }
2754 out:
2755 /*
2756 * These releases aren't strictly required, does even doing them
2757 * make any sense? XXX can nfsm_reply() block?
2758 */
2759 if (pathcp) {
2760 FREE(pathcp, M_TEMP);
2761 pathcp = NULL;
2762 }
2763 if (dirp) {
2764 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
2765 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2766 VOP_UNLOCK(dirp, 0, td);
2767 }
2768 if (nd.ni_startdir) {
2769 vrele(nd.ni_startdir);
2770 nd.ni_startdir = NULL;
2771 }
2772 nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2773 if (v3) {
2774 if (!error) {
2775 nfsm_srvpostop_fh(fhp);
2776 nfsm_srvpostop_attr(0, vap);
2777 }
2778 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2779 }
2780 error = 0;
2781 /* fall through */
2782
2783 nfsmout:
2784 NDFREE(&nd, NDF_ONLY_PNBUF);
2785 if (nd.ni_dvp) {
2786 if (nd.ni_dvp == nd.ni_vp)
2787 vrele(nd.ni_dvp);
2788 else
2789 vput(nd.ni_dvp);
2790 }
2791 if (nd.ni_vp)
2792 vrele(nd.ni_vp);
2793 if (nd.ni_startdir)
2794 vrele(nd.ni_startdir);
2795 if (dirp)
2796 vrele(dirp);
2797 if (pathcp)
2798 FREE(pathcp, M_TEMP);
2799
2800 vn_finished_write(mp);
2801 VFS_UNLOCK_GIANT(vfslocked);
2802 return (error);
2803 }
2804
2805 /*
2806 * nfs mkdir service
2807 */
2808 int
2809 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2810 struct thread *td, struct mbuf **mrq)
2811 {
2812 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2813 struct sockaddr *nam = nfsd->nd_nam;
2814 caddr_t dpos = nfsd->nd_dpos;
2815 struct ucred *cred = nfsd->nd_cr;
2816 struct vattr va, dirfor, diraft;
2817 struct vattr *vap = &va;
2818 struct nfs_fattr *fp;
2819 struct nameidata nd;
2820 u_int32_t *tl;
2821 caddr_t bpos;
2822 int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2823 int v3 = (nfsd->nd_flag & ND_NFSV3);
2824 struct mbuf *mb, *mreq;
2825 struct vnode *dirp = NULL;
2826 int vpexcl = 0;
2827 nfsfh_t nfh;
2828 fhandle_t *fhp;
2829 struct mount *mp = NULL;
2830 int vfslocked;
2831
2832 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2833 ndclear(&nd);
2834 vfslocked = 0;
2835
2836 fhp = &nfh.fh_generic;
2837 nfsm_srvmtofh(fhp);
2838 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2839 error = ESTALE;
2840 goto out;
2841 }
2842 vfslocked = VFS_LOCK_GIANT(mp);
2843 (void) vn_start_write(NULL, &mp, V_WAIT);
2844 vfs_rel(mp); /* The write holds a ref. */
2845 nfsm_srvnamesiz(len);
2846 nd.ni_cnd.cn_cred = cred;
2847 nd.ni_cnd.cn_nameiop = CREATE;
2848 nd.ni_cnd.cn_flags = LOCKPARENT | MPSAFE;
2849
2850 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
2851 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
2852 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
2853 if (dirp && !v3) {
2854 vrele(dirp);
2855 dirp = NULL;
2856 }
2857 if (error) {
2858 nfsm_reply(NFSX_WCCDATA(v3));
2859 if (v3)
2860 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2861 error = 0;
2862 goto nfsmout;
2863 }
2864 VATTR_NULL(vap);
2865 if (v3) {
2866 nfsm_srvsattr(vap);
2867 } else {
2868 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
2869 vap->va_mode = nfstov_mode(*tl++);
2870 }
2871
2872 /*
2873 * At this point nd.ni_dvp is referenced and exclusively locked and
2874 * nd.ni_vp, if it exists, is referenced but not locked.
2875 */
2876
2877 vap->va_type = VDIR;
2878 if (nd.ni_vp != NULL) {
2879 NDFREE(&nd, NDF_ONLY_PNBUF);
2880 error = EEXIST;
2881 goto out;
2882 }
2883
2884 /*
2885 * Issue mkdir op. Since SAVESTART is not set, the pathname
2886 * component is freed by the VOP call. This will fill-in
2887 * nd.ni_vp, reference, and exclusively lock it.
2888 */
2889 if (vap->va_mode == (mode_t)VNOVAL)
2890 vap->va_mode = 0;
2891 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
2892 NDFREE(&nd, NDF_ONLY_PNBUF);
2893 vpexcl = 1;
2894
2895 vput(nd.ni_dvp);
2896 nd.ni_dvp = NULL;
2897
2898 if (!error) {
2899 bzero((caddr_t)fhp, sizeof(nfh));
2900 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
2901 error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
2902 if (!error)
2903 error = VOP_GETATTR(nd.ni_vp, vap, cred, td);
2904 }
2905 out:
2906 if (dirp) {
2907 if (dirp == nd.ni_dvp) {
2908 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2909 } else {
2910 /* Release existing locks to prevent deadlock. */
2911 if (nd.ni_dvp) {
2912 NDFREE(&nd, NDF_ONLY_PNBUF);
2913 if (nd.ni_dvp == nd.ni_vp && vpexcl)
2914 vrele(nd.ni_dvp);
2915 else
2916 vput(nd.ni_dvp);
2917 }
2918 if (nd.ni_vp) {
2919 if (vpexcl)
2920 vput(nd.ni_vp);
2921 else
2922 vrele(nd.ni_vp);
2923 }
2924 nd.ni_dvp = NULL;
2925 nd.ni_vp = NULL;
2926 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
2927 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2928 VOP_UNLOCK(dirp, 0, td);
2929 }
2930 }
2931 nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2932 if (v3) {
2933 if (!error) {
2934 nfsm_srvpostop_fh(fhp);
2935 nfsm_srvpostop_attr(0, vap);
2936 }
2937 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2938 } else if (!error) {
2939 /* v2 non-error case. */
2940 nfsm_srvfhtom(fhp, v3);
2941 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
2942 nfsm_srvfillattr(vap, fp);
2943 }
2944 error = 0;
2945 /* fall through */
2946
2947 nfsmout:
2948 if (nd.ni_dvp) {
2949 NDFREE(&nd, NDF_ONLY_PNBUF);
2950 if (nd.ni_dvp == nd.ni_vp && vpexcl)
2951 vrele(nd.ni_dvp);
2952 else
2953 vput(nd.ni_dvp);
2954 }
2955 if (nd.ni_vp) {
2956 if (vpexcl)
2957 vput(nd.ni_vp);
2958 else
2959 vrele(nd.ni_vp);
2960 }
2961 if (dirp)
2962 vrele(dirp);
2963 vn_finished_write(mp);
2964 VFS_UNLOCK_GIANT(vfslocked);
2965 return (error);
2966 }
2967
2968 /*
2969 * nfs rmdir service
2970 */
2971 int
2972 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2973 struct thread *td, struct mbuf **mrq)
2974 {
2975 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2976 struct sockaddr *nam = nfsd->nd_nam;
2977 caddr_t dpos = nfsd->nd_dpos;
2978 struct ucred *cred = nfsd->nd_cr;
2979 caddr_t bpos;
2980 int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2981 int v3 = (nfsd->nd_flag & ND_NFSV3);
2982 struct mbuf *mb, *mreq;
2983 struct vnode *vp, *dirp = NULL;
2984 struct vattr dirfor, diraft;
2985 nfsfh_t nfh;
2986 fhandle_t *fhp;
2987 struct nameidata nd;
2988 struct mount *mp = NULL;
2989 int vfslocked;
2990
2991 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2992 ndclear(&nd);
2993 vfslocked = 0;
2994
2995 fhp = &nfh.fh_generic;
2996 nfsm_srvmtofh(fhp);
2997 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2998 error = ESTALE;
2999 goto out;
3000 }
3001 vfslocked = VFS_LOCK_GIANT(mp);
3002 (void) vn_start_write(NULL, &mp, V_WAIT);
3003 vfs_rel(mp); /* The write holds a ref. */
3004 nfsm_srvnamesiz(len);
3005 nd.ni_cnd.cn_cred = cred;
3006 nd.ni_cnd.cn_nameiop = DELETE;
3007 nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | MPSAFE;
3008 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
3009 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
3010 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
3011 if (dirp && !v3) {
3012 vrele(dirp);
3013 dirp = NULL;
3014 }
3015 if (error) {
3016 nfsm_reply(NFSX_WCCDATA(v3));
3017 if (v3)
3018 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
3019 error = 0;
3020 goto nfsmout;
3021 }
3022 vp = nd.ni_vp;
3023 if (vp->v_type != VDIR) {
3024 error = ENOTDIR;
3025 goto out;
3026 }
3027 /*
3028 * No rmdir "." please.
3029 */
3030 if (nd.ni_dvp == vp) {
3031 error = EINVAL;
3032 goto out;
3033 }
3034 /*
3035 * The root of a mounted filesystem cannot be deleted.
3036 */
3037 if (vp->v_vflag & VV_ROOT)
3038 error = EBUSY;
3039 out:
3040 /*
3041 * Issue or abort op. Since SAVESTART is not set, path name
3042 * component is freed by the VOP after either.
3043 */
3044 if (!error)
3045 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3046 NDFREE(&nd, NDF_ONLY_PNBUF);
3047
3048 if (dirp) {
3049 if (dirp == nd.ni_dvp)
3050 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
3051 else {
3052 /* Release existing locks to prevent deadlock. */
3053 if (nd.ni_dvp) {
3054 if (nd.ni_dvp == nd.ni_vp)
3055 vrele(nd.ni_dvp);
3056 else
3057 vput(nd.ni_dvp);
3058 }
3059 if (nd.ni_vp)
3060 vput(nd.ni_vp);
3061 nd.ni_dvp = NULL;
3062 nd.ni_vp = NULL;
3063 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
3064 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
3065 VOP_UNLOCK(dirp, 0, td);
3066 }
3067 }
3068 nfsm_reply(NFSX_WCCDATA(v3));
3069 error = 0;
3070 if (v3)
3071 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
3072 /* fall through */
3073
3074 nfsmout:
3075 NDFREE(&nd, NDF_ONLY_PNBUF);
3076 if (nd.ni_dvp) {
3077 if (nd.ni_dvp == nd.ni_vp)
3078 vrele(nd.ni_dvp);
3079 else
3080 vput(nd.ni_dvp);
3081 }
3082 if (nd.ni_vp)
3083 vput(nd.ni_vp);
3084 if (dirp)
3085 vrele(dirp);
3086
3087 vn_finished_write(mp);
3088 VFS_UNLOCK_GIANT(vfslocked);
3089 return(error);
3090 }
3091
3092 /*
3093 * nfs readdir service
3094 * - mallocs what it thinks is enough to read
3095 * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
3096 * - calls VOP_READDIR()
3097 * - loops around building the reply
3098 * if the output generated exceeds count break out of loop
3099 * The nfsm_clget macro is used here so that the reply will be packed
3100 * tightly in mbuf clusters.
3101 * - it only knows that it has encountered eof when the VOP_READDIR()
3102 * reads nothing
3103 * - as such one readdir rpc will return eof false although you are there
3104 * and then the next will return eof
3105 * - it trims out records with d_fileno == 0
3106 * this doesn't matter for Unix clients, but they might confuse clients
3107 * for other os'.
3108 * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
3109 * than requested, but this may not apply to all filesystems. For
3110 * example, client NFS does not { although it is never remote mounted
3111 * anyhow }
3112 * The alternate call nfsrv_readdirplus() does lookups as well.
3113 * PS: The NFS protocol spec. does not clarify what the "count" byte
3114 * argument is a count of.. just name strings and file id's or the
3115 * entire reply rpc or ...
3116 * I tried just file name and id sizes and it confused the Sun client,
3117 * so I am using the full rpc size now. The "paranoia.." comment refers
3118 * to including the status longwords that are not a part of the dir.
3119 * "entry" structures, but are in the rpc.
3120 */
3121 struct flrep {
3122 nfsuint64 fl_off;
3123 u_int32_t fl_postopok;
3124 u_int32_t fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
3125 u_int32_t fl_fhok;
3126 u_int32_t fl_fhsize;
3127 u_int32_t fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
3128 };
3129
3130 int
3131 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3132 struct thread *td, struct mbuf **mrq)
3133 {
3134 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3135 struct sockaddr *nam = nfsd->nd_nam;
3136 caddr_t dpos = nfsd->nd_dpos;
3137 struct ucred *cred = nfsd->nd_cr;
3138 char *bp, *be;
3139 struct mbuf *mp;
3140 struct dirent *dp;
3141 caddr_t cp;
3142 u_int32_t *tl;
3143 caddr_t bpos;
3144 struct mbuf *mb, *mreq;
3145 char *cpos, *cend, *rbuf;
3146 struct vnode *vp = NULL;
3147 struct vattr at;
3148 nfsfh_t nfh;
3149 fhandle_t *fhp;
3150 struct uio io;
3151 struct iovec iv;
3152 int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3153 int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
3154 int v3 = (nfsd->nd_flag & ND_NFSV3);
3155 u_quad_t off, toff, verf;
3156 u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3157 int vfslocked;
3158
3159 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3160 vfslocked = 0;
3161 fhp = &nfh.fh_generic;
3162 nfsm_srvmtofh(fhp);
3163 if (v3) {
3164 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
3165 toff = fxdr_hyper(tl);
3166 tl += 2;
3167 verf = fxdr_hyper(tl);
3168 tl += 2;
3169 } else {
3170 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
3171 toff = fxdr_unsigned(u_quad_t, *tl++);
3172 verf = 0; /* shut up gcc */
3173 }
3174 off = toff;
3175 cnt = fxdr_unsigned(int, *tl);
3176 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3177 xfer = NFS_SRVMAXDATA(nfsd);
3178 if (cnt > xfer)
3179 cnt = xfer;
3180 if (siz > xfer)
3181 siz = xfer;
3182 fullsiz = siz;
3183 error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
3184 nam, &rdonly, TRUE);
3185 if (!error && vp->v_type != VDIR) {
3186 error = ENOTDIR;
3187 vput(vp);
3188 vp = NULL;
3189 }
3190 if (error) {
3191 nfsm_reply(NFSX_UNSIGNED);
3192 if (v3)
3193 nfsm_srvpostop_attr(getret, &at);
3194 error = 0;
3195 goto nfsmout;
3196 }
3197
3198 /*
3199 * Obtain lock on vnode for this section of the code
3200 */
3201 if (v3) {
3202 error = getret = VOP_GETATTR(vp, &at, cred, td);
3203 #if 0
3204 /*
3205 * XXX This check may be too strict for Solaris 2.5 clients.
3206 */
3207 if (!error && toff && verf && verf != at.va_filerev)
3208 error = NFSERR_BAD_COOKIE;
3209 #endif
3210 }
3211 if (!error)
3212 error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
3213 if (error) {
3214 vput(vp);
3215 vp = NULL;
3216 nfsm_reply(NFSX_POSTOPATTR(v3));
3217 if (v3)
3218 nfsm_srvpostop_attr(getret, &at);
3219 error = 0;
3220 goto nfsmout;
3221 }
3222 VOP_UNLOCK(vp, 0, td);
3223
3224 /*
3225 * end section. Allocate rbuf and continue
3226 */
3227 MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3228 again:
3229 iv.iov_base = rbuf;
3230 iv.iov_len = fullsiz;
3231 io.uio_iov = &iv;
3232 io.uio_iovcnt = 1;
3233 io.uio_offset = (off_t)off;
3234 io.uio_resid = fullsiz;
3235 io.uio_segflg = UIO_SYSSPACE;
3236 io.uio_rw = UIO_READ;
3237 io.uio_td = NULL;
3238 eofflag = 0;
3239 vn_lock(vp, LK_SHARED | LK_RETRY, td);
3240 if (cookies) {
3241 free((caddr_t)cookies, M_TEMP);
3242 cookies = NULL;
3243 }
3244 error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3245 off = (off_t)io.uio_offset;
3246 if (!cookies && !error)
3247 error = NFSERR_PERM;
3248 if (v3) {
3249 getret = VOP_GETATTR(vp, &at, cred, td);
3250 if (!error)
3251 error = getret;
3252 }
3253 VOP_UNLOCK(vp, 0, td);
3254 if (error) {
3255 vrele(vp);
3256 vp = NULL;
3257 free((caddr_t)rbuf, M_TEMP);
3258 if (cookies)
3259 free((caddr_t)cookies, M_TEMP);
3260 nfsm_reply(NFSX_POSTOPATTR(v3));
3261 if (v3)
3262 nfsm_srvpostop_attr(getret, &at);
3263 error = 0;
3264 goto nfsmout;
3265 }
3266 if (io.uio_resid) {
3267 siz -= io.uio_resid;
3268
3269 /*
3270 * If nothing read, return eof
3271 * rpc reply
3272 */
3273 if (siz == 0) {
3274 vrele(vp);
3275 vp = NULL;
3276 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) +
3277 2 * NFSX_UNSIGNED);
3278 if (v3) {
3279 nfsm_srvpostop_attr(getret, &at);
3280 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
3281 txdr_hyper(at.va_filerev, tl);
3282 tl += 2;
3283 } else
3284 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
3285 *tl++ = nfsrv_nfs_false;
3286 *tl = nfsrv_nfs_true;
3287 FREE((caddr_t)rbuf, M_TEMP);
3288 FREE((caddr_t)cookies, M_TEMP);
3289 error = 0;
3290 goto nfsmout;
3291 }
3292 }
3293
3294 /*
3295 * Check for degenerate cases of nothing useful read.
3296 * If so go try again
3297 */
3298 cpos = rbuf;
3299 cend = rbuf + siz;
3300 dp = (struct dirent *)cpos;
3301 cookiep = cookies;
3302 /*
3303 * For some reason FreeBSD's ufs_readdir() chooses to back the
3304 * directory offset up to a block boundary, so it is necessary to
3305 * skip over the records that precede the requested offset. This
3306 * requires the assumption that file offset cookies monotonically
3307 * increase.
3308 */
3309 while (cpos < cend && ncookies > 0 &&
3310 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
3311 ((u_quad_t)(*cookiep)) <= toff)) {
3312 cpos += dp->d_reclen;
3313 dp = (struct dirent *)cpos;
3314 cookiep++;
3315 ncookies--;
3316 }
3317 if (cpos >= cend || ncookies == 0) {
3318 toff = off;
3319 siz = fullsiz;
3320 goto again;
3321 }
3322
3323 len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */
3324 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz);
3325 if (v3) {
3326 nfsm_srvpostop_attr(getret, &at);
3327 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
3328 txdr_hyper(at.va_filerev, tl);
3329 }
3330 mp = mb;
3331 bp = bpos;
3332 be = bp + M_TRAILINGSPACE(mp);
3333
3334 /* Loop through the records and build reply */
3335 while (cpos < cend && ncookies > 0) {
3336 if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
3337 nlen = dp->d_namlen;
3338 rem = nfsm_rndup(nlen) - nlen;
3339 len += (4 * NFSX_UNSIGNED + nlen + rem);
3340 if (v3)
3341 len += 2 * NFSX_UNSIGNED;
3342 if (len > cnt) {
3343 eofflag = 0;
3344 break;
3345 }
3346 /*
3347 * Build the directory record xdr from
3348 * the dirent entry.
3349 */
3350 nfsm_clget;
3351 *tl = nfsrv_nfs_true;
3352 bp += NFSX_UNSIGNED;
3353 if (v3) {
3354 nfsm_clget;
3355 *tl = 0;
3356 bp += NFSX_UNSIGNED;
3357 }
3358 nfsm_clget;
3359 *tl = txdr_unsigned(dp->d_fileno);
3360 bp += NFSX_UNSIGNED;
3361 nfsm_clget;
3362 *tl = txdr_unsigned(nlen);
3363 bp += NFSX_UNSIGNED;
3364
3365 /* And loop around copying the name */
3366 xfer = nlen;
3367 cp = dp->d_name;
3368 while (xfer > 0) {
3369 nfsm_clget;
3370 if ((bp+xfer) > be)
3371 tsiz = be-bp;
3372 else
3373 tsiz = xfer;
3374 bcopy(cp, bp, tsiz);
3375 bp += tsiz;
3376 xfer -= tsiz;
3377 if (xfer > 0)
3378 cp += tsiz;
3379 }
3380 /* And null pad to an int32_t boundary. */
3381 for (i = 0; i < rem; i++)
3382 *bp++ = '\0';
3383 nfsm_clget;
3384
3385 /* Finish off the record */
3386 if (v3) {
3387 *tl = 0;
3388 bp += NFSX_UNSIGNED;
3389 nfsm_clget;
3390 }
3391 *tl = txdr_unsigned(*cookiep);
3392 bp += NFSX_UNSIGNED;
3393 }
3394 cpos += dp->d_reclen;
3395 dp = (struct dirent *)cpos;
3396 cookiep++;
3397 ncookies--;
3398 }
3399 vrele(vp);
3400 vp = NULL;
3401 nfsm_clget;
3402 *tl = nfsrv_nfs_false;
3403 bp += NFSX_UNSIGNED;
3404 nfsm_clget;
3405 if (eofflag)
3406 *tl = nfsrv_nfs_true;
3407 else
3408 *tl = nfsrv_nfs_false;
3409 bp += NFSX_UNSIGNED;
3410 if (mp != mb) {
3411 if (bp < be)
3412 mp->m_len = bp - mtod(mp, caddr_t);
3413 } else
3414 mp->m_len += bp - bpos;
3415 FREE((caddr_t)rbuf, M_TEMP);
3416 FREE((caddr_t)cookies, M_TEMP);
3417
3418 nfsmout:
3419 if (vp)
3420 vrele(vp);
3421 VFS_UNLOCK_GIANT(vfslocked);
3422 return(error);
3423 }
3424
3425 int
3426 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3427 struct thread *td, struct mbuf **mrq)
3428 {
3429 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3430 struct sockaddr *nam = nfsd->nd_nam;
3431 caddr_t dpos = nfsd->nd_dpos;
3432 struct ucred *cred = nfsd->nd_cr;
3433 char *bp, *be;
3434 struct mbuf *mp;
3435 struct dirent *dp;
3436 caddr_t cp;
3437 u_int32_t *tl;
3438 caddr_t bpos;
3439 struct mbuf *mb, *mreq;
3440 char *cpos, *cend, *rbuf;
3441 struct vnode *vp = NULL, *nvp;
3442 struct flrep fl;
3443 nfsfh_t nfh;
3444 fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3445 struct uio io;
3446 struct iovec iv;
3447 struct vattr va, at, *vap = &va;
3448 struct nfs_fattr *fp;
3449 int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3450 int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3451 u_quad_t off, toff, verf;
3452 u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3453 int v3 = (nfsd->nd_flag & ND_NFSV3);
3454 int vfslocked;
3455
3456 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3457 vfslocked = 0;
3458 if (!v3)
3459 panic("nfsrv_readdirplus: v3 proc called on a v2 connection");
3460 fhp = &nfh.fh_generic;
3461 nfsm_srvmtofh(fhp);
3462 tl = nfsm_dissect_nonblock(u_int32_t *, 6 * NFSX_UNSIGNED);
3463 toff = fxdr_hyper(tl);
3464 tl += 2;
3465 verf = fxdr_hyper(tl);
3466 tl += 2;
3467 siz = fxdr_unsigned(int, *tl++);
3468 cnt = fxdr_unsigned(int, *tl);
3469 off = toff;
3470 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3471 xfer = NFS_SRVMAXDATA(nfsd);
3472 if (cnt > xfer)
3473 cnt = xfer;
3474 if (siz > xfer)
3475 siz = xfer;
3476 fullsiz = siz;
3477 error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
3478 nam, &rdonly, TRUE);
3479 if (!error && vp->v_type != VDIR) {
3480 error = ENOTDIR;
3481 vput(vp);
3482 vp = NULL;
3483 }
3484 if (error) {
3485 nfsm_reply(NFSX_UNSIGNED);
3486 nfsm_srvpostop_attr(getret, &at);
3487 error = 0;
3488 goto nfsmout;
3489 }
3490 error = getret = VOP_GETATTR(vp, &at, cred, td);
3491 #if 0
3492 /*
3493 * XXX This check may be too strict for Solaris 2.5 clients.
3494 */
3495 if (!error && toff && verf && verf != at.va_filerev)
3496 error = NFSERR_BAD_COOKIE;
3497 #endif
3498 if (!error)
3499 error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
3500 if (error) {
3501 vput(vp);
3502 vp = NULL;
3503 nfsm_reply(NFSX_V3POSTOPATTR);
3504 nfsm_srvpostop_attr(getret, &at);
3505 error = 0;
3506 goto nfsmout;
3507 }
3508 VOP_UNLOCK(vp, 0, td);
3509 MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3510 again:
3511 iv.iov_base = rbuf;
3512 iv.iov_len = fullsiz;
3513 io.uio_iov = &iv;
3514 io.uio_iovcnt = 1;
3515 io.uio_offset = (off_t)off;
3516 io.uio_resid = fullsiz;
3517 io.uio_segflg = UIO_SYSSPACE;
3518 io.uio_rw = UIO_READ;
3519 io.uio_td = NULL;
3520 eofflag = 0;
3521 vn_lock(vp, LK_SHARED | LK_RETRY, td);
3522 if (cookies) {
3523 free((caddr_t)cookies, M_TEMP);
3524 cookies = NULL;
3525 }
3526 error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3527 off = (u_quad_t)io.uio_offset;
3528 getret = VOP_GETATTR(vp, &at, cred, td);
3529 VOP_UNLOCK(vp, 0, td);
3530 if (!cookies && !error)
3531 error = NFSERR_PERM;
3532 if (!error)
3533 error = getret;
3534 if (error) {
3535 vrele(vp);
3536 vp = NULL;
3537 if (cookies)
3538 free((caddr_t)cookies, M_TEMP);
3539 free((caddr_t)rbuf, M_TEMP);
3540 nfsm_reply(NFSX_V3POSTOPATTR);
3541 nfsm_srvpostop_attr(getret, &at);
3542 error = 0;
3543 goto nfsmout;
3544 }
3545 if (io.uio_resid) {
3546 siz -= io.uio_resid;
3547
3548 /*
3549 * If nothing read, return eof
3550 * rpc reply
3551 */
3552 if (siz == 0) {
3553 vrele(vp);
3554 vp = NULL;
3555 nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3556 2 * NFSX_UNSIGNED);
3557 nfsm_srvpostop_attr(getret, &at);
3558 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
3559 txdr_hyper(at.va_filerev, tl);
3560 tl += 2;
3561 *tl++ = nfsrv_nfs_false;
3562 *tl = nfsrv_nfs_true;
3563 FREE((caddr_t)cookies, M_TEMP);
3564 FREE((caddr_t)rbuf, M_TEMP);
3565 error = 0;
3566 goto nfsmout;
3567 }
3568 }
3569
3570 /*
3571 * Check for degenerate cases of nothing useful read.
3572 * If so go try again
3573 */
3574 cpos = rbuf;
3575 cend = rbuf + siz;
3576 dp = (struct dirent *)cpos;
3577 cookiep = cookies;
3578 /*
3579 * For some reason FreeBSD's ufs_readdir() chooses to back the
3580 * directory offset up to a block boundary, so it is necessary to
3581 * skip over the records that precede the requested offset. This
3582 * requires the assumption that file offset cookies monotonically
3583 * increase.
3584 */
3585 while (cpos < cend && ncookies > 0 &&
3586 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
3587 ((u_quad_t)(*cookiep)) <= toff)) {
3588 cpos += dp->d_reclen;
3589 dp = (struct dirent *)cpos;
3590 cookiep++;
3591 ncookies--;
3592 }
3593 if (cpos >= cend || ncookies == 0) {
3594 toff = off;
3595 siz = fullsiz;
3596 goto again;
3597 }
3598
3599 /*
3600 * Probe one of the directory entries to see if the filesystem
3601 * supports VGET.
3602 */
3603 error = VFS_VGET(vp->v_mount, dp->d_fileno, LK_EXCLUSIVE, &nvp);
3604 if (error) {
3605 if (error == EOPNOTSUPP)
3606 error = NFSERR_NOTSUPP;
3607 else
3608 error = NFSERR_SERVERFAULT;
3609 vrele(vp);
3610 vp = NULL;
3611 free((caddr_t)cookies, M_TEMP);
3612 free((caddr_t)rbuf, M_TEMP);
3613 nfsm_reply(NFSX_V3POSTOPATTR);
3614 nfsm_srvpostop_attr(getret, &at);
3615 error = 0;
3616 goto nfsmout;
3617 }
3618 vput(nvp);
3619 nvp = NULL;
3620
3621 dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3622 2 * NFSX_UNSIGNED;
3623 nfsm_reply(cnt);
3624 nfsm_srvpostop_attr(getret, &at);
3625 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
3626 txdr_hyper(at.va_filerev, tl);
3627 mp = mb;
3628 bp = bpos;
3629 be = bp + M_TRAILINGSPACE(mp);
3630
3631 /* Loop through the records and build reply */
3632 while (cpos < cend && ncookies > 0) {
3633 if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
3634 nlen = dp->d_namlen;
3635 rem = nfsm_rndup(nlen)-nlen;
3636
3637 /*
3638 * For readdir_and_lookup get the vnode using
3639 * the file number.
3640 */
3641 if (VFS_VGET(vp->v_mount, dp->d_fileno, LK_EXCLUSIVE,
3642 &nvp))
3643 goto invalid;
3644 bzero((caddr_t)nfhp, NFSX_V3FH);
3645 nfhp->fh_fsid =
3646 nvp->v_mount->mnt_stat.f_fsid;
3647 /*
3648 * XXXRW: Assert the mountpoints are the same so that
3649 * we know that acquiring Giant based on the
3650 * directory is the right thing for the child.
3651 */
3652 KASSERT(nvp->v_mount == vp->v_mount,
3653 ("nfsrv_readdirplus: nvp mount != vp mount"));
3654 if (VOP_VPTOFH(nvp, &nfhp->fh_fid)) {
3655 vput(nvp);
3656 nvp = NULL;
3657 goto invalid;
3658 }
3659 if (VOP_GETATTR(nvp, vap, cred, td)) {
3660 vput(nvp);
3661 nvp = NULL;
3662 goto invalid;
3663 }
3664 vput(nvp);
3665 nvp = NULL;
3666
3667 /*
3668 * If either the dircount or maxcount will be
3669 * exceeded, get out now. Both of these lengths
3670 * are calculated conservatively, including all
3671 * XDR overheads.
3672 */
3673 len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3674 NFSX_V3POSTOPATTR);
3675 dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3676 if (len > cnt || dirlen > fullsiz) {
3677 eofflag = 0;
3678 break;
3679 }
3680
3681 /*
3682 * Build the directory record xdr from
3683 * the dirent entry.
3684 */
3685 fp = (struct nfs_fattr *)&fl.fl_fattr;
3686 nfsm_srvfillattr(vap, fp);
3687 fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3688 fl.fl_fhok = nfsrv_nfs_true;
3689 fl.fl_postopok = nfsrv_nfs_true;
3690 fl.fl_off.nfsuquad[0] = 0;
3691 fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3692
3693 nfsm_clget;
3694 *tl = nfsrv_nfs_true;
3695 bp += NFSX_UNSIGNED;
3696 nfsm_clget;
3697 *tl = 0;
3698 bp += NFSX_UNSIGNED;
3699 nfsm_clget;
3700 *tl = txdr_unsigned(dp->d_fileno);
3701 bp += NFSX_UNSIGNED;
3702 nfsm_clget;
3703 *tl = txdr_unsigned(nlen);
3704 bp += NFSX_UNSIGNED;
3705
3706 /* And loop around copying the name */
3707 xfer = nlen;
3708 cp = dp->d_name;
3709 while (xfer > 0) {
3710 nfsm_clget;
3711 if ((bp + xfer) > be)
3712 tsiz = be - bp;
3713 else
3714 tsiz = xfer;
3715 bcopy(cp, bp, tsiz);
3716 bp += tsiz;
3717 xfer -= tsiz;
3718 if (xfer > 0)
3719 cp += tsiz;
3720 }
3721 /* And null pad to an int32_t boundary. */
3722 for (i = 0; i < rem; i++)
3723 *bp++ = '\0';
3724
3725 /*
3726 * Now copy the flrep structure out.
3727 */
3728 xfer = sizeof (struct flrep);
3729 cp = (caddr_t)&fl;
3730 while (xfer > 0) {
3731 nfsm_clget;
3732 if ((bp + xfer) > be)
3733 tsiz = be - bp;
3734 else
3735 tsiz = xfer;
3736 bcopy(cp, bp, tsiz);
3737 bp += tsiz;
3738 xfer -= tsiz;
3739 if (xfer > 0)
3740 cp += tsiz;
3741 }
3742 }
3743 invalid:
3744 cpos += dp->d_reclen;
3745 dp = (struct dirent *)cpos;
3746 cookiep++;
3747 ncookies--;
3748 }
3749 vrele(vp);
3750 vp = NULL;
3751 nfsm_clget;
3752 *tl = nfsrv_nfs_false;
3753 bp += NFSX_UNSIGNED;
3754 nfsm_clget;
3755 if (eofflag)
3756 *tl = nfsrv_nfs_true;
3757 else
3758 *tl = nfsrv_nfs_false;
3759 bp += NFSX_UNSIGNED;
3760 if (mp != mb) {
3761 if (bp < be)
3762 mp->m_len = bp - mtod(mp, caddr_t);
3763 } else
3764 mp->m_len += bp - bpos;
3765 FREE((caddr_t)cookies, M_TEMP);
3766 FREE((caddr_t)rbuf, M_TEMP);
3767 nfsmout:
3768 if (vp)
3769 vrele(vp);
3770 VFS_UNLOCK_GIANT(vfslocked);
3771 return(error);
3772 }
3773
3774 /*
3775 * nfs commit service
3776 */
3777 int
3778 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3779 struct thread *td, struct mbuf **mrq)
3780 {
3781 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3782 struct sockaddr *nam = nfsd->nd_nam;
3783 caddr_t dpos = nfsd->nd_dpos;
3784 struct ucred *cred = nfsd->nd_cr;
3785 struct vattr bfor, aft;
3786 struct vnode *vp = NULL;
3787 nfsfh_t nfh;
3788 fhandle_t *fhp;
3789 u_int32_t *tl;
3790 caddr_t bpos;
3791 int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3792 struct mbuf *mb, *mreq;
3793 u_quad_t off;
3794 struct mount *mp = NULL;
3795 int v3 = (nfsd->nd_flag & ND_NFSV3);
3796 int tvfslocked;
3797 int vfslocked;
3798
3799 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3800 vfslocked = 0;
3801 if (!v3)
3802 panic("nfsrv_commit: v3 proc called on a v2 connection");
3803 fhp = &nfh.fh_generic;
3804 nfsm_srvmtofh(fhp);
3805 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
3806 error = ESTALE;
3807 goto ereply;
3808 }
3809 vfslocked = VFS_LOCK_GIANT(mp);
3810 (void) vn_start_write(NULL, &mp, V_WAIT);
3811 vfs_rel(mp); /* The write holds a ref. */
3812 tl = nfsm_dissect_nonblock(u_int32_t *, 3 * NFSX_UNSIGNED);
3813
3814 /*
3815 * XXX At this time VOP_FSYNC() does not accept offset and byte
3816 * count parameters, so these arguments are useless (someday maybe).
3817 */
3818 off = fxdr_hyper(tl);
3819 tl += 2;
3820 cnt = fxdr_unsigned(int, *tl);
3821 error = nfsrv_fhtovp(fhp, 1, &vp, &tvfslocked, cred, slp,
3822 nam, &rdonly, TRUE);
3823 vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
3824 if (error) {
3825 nfsm_reply(2 * NFSX_UNSIGNED);
3826 nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3827 error = 0;
3828 goto nfsmout;
3829 }
3830 for_ret = VOP_GETATTR(vp, &bfor, cred, td);
3831
3832 if (cnt > MAX_COMMIT_COUNT) {
3833 /*
3834 * Give up and do the whole thing
3835 */
3836 if (vp->v_object &&
3837 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3838 VM_OBJECT_LOCK(vp->v_object);
3839 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3840 VM_OBJECT_UNLOCK(vp->v_object);
3841 }
3842 error = VOP_FSYNC(vp, MNT_WAIT, td);
3843 } else {
3844 /*
3845 * Locate and synchronously write any buffers that fall
3846 * into the requested range. Note: we are assuming that
3847 * f_iosize is a power of 2.
3848 */
3849 int iosize = vp->v_mount->mnt_stat.f_iosize;
3850 int iomask = iosize - 1;
3851 int s;
3852 daddr_t lblkno;
3853
3854 /*
3855 * Align to iosize boundry, super-align to page boundry.
3856 */
3857 if (off & iomask) {
3858 cnt += off & iomask;
3859 off &= ~(u_quad_t)iomask;
3860 }
3861 if (off & PAGE_MASK) {
3862 cnt += off & PAGE_MASK;
3863 off &= ~(u_quad_t)PAGE_MASK;
3864 }
3865 lblkno = off / iosize;
3866
3867 if (vp->v_object &&
3868 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3869 VM_OBJECT_LOCK(vp->v_object);
3870 vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3871 VM_OBJECT_UNLOCK(vp->v_object);
3872 }
3873
3874 s = splbio();
3875 VI_LOCK(vp);
3876 while (cnt > 0) {
3877 struct buf *bp;
3878
3879 /*
3880 * If we have a buffer and it is marked B_DELWRI we
3881 * have to lock and write it. Otherwise the prior
3882 * write is assumed to have already been committed.
3883 *
3884 * gbincore() can return invalid buffers now so we
3885 * have to check that bit as well (though B_DELWRI
3886 * should not be set if B_INVAL is set there could be
3887 * a race here since we haven't locked the buffer).
3888 */
3889 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
3890 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
3891 LK_INTERLOCK, VI_MTX(vp)) == ENOLCK) {
3892 VI_LOCK(vp);
3893 continue; /* retry */
3894 }
3895 if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
3896 B_DELWRI) {
3897 bremfree(bp);
3898 bp->b_flags &= ~B_ASYNC;
3899 bwrite(bp);
3900 ++nfs_commit_miss;
3901 } else
3902 BUF_UNLOCK(bp);
3903 VI_LOCK(vp);
3904 }
3905 ++nfs_commit_blks;
3906 if (cnt < iosize)
3907 break;
3908 cnt -= iosize;
3909 ++lblkno;
3910 }
3911 VI_UNLOCK(vp);
3912 splx(s);
3913 }
3914
3915 aft_ret = VOP_GETATTR(vp, &aft, cred, td);
3916 vput(vp);
3917 vp = NULL;
3918 ereply:
3919 nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
3920 nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3921 if (!error) {
3922 tl = nfsm_build(u_int32_t *, NFSX_V3WRITEVERF);
3923 if (nfsver.tv_sec == 0)
3924 nfsver = boottime;
3925 *tl++ = txdr_unsigned(nfsver.tv_sec);
3926 *tl = txdr_unsigned(nfsver.tv_usec);
3927 } else {
3928 error = 0;
3929 }
3930 nfsmout:
3931 if (vp)
3932 vput(vp);
3933 vn_finished_write(mp);
3934 VFS_UNLOCK_GIANT(vfslocked);
3935 return(error);
3936 }
3937
3938 /*
3939 * nfs statfs service
3940 */
3941 int
3942 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3943 struct thread *td, struct mbuf **mrq)
3944 {
3945 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3946 struct sockaddr *nam = nfsd->nd_nam;
3947 caddr_t dpos = nfsd->nd_dpos;
3948 struct ucred *cred = nfsd->nd_cr;
3949 struct statfs *sf;
3950 struct nfs_statfs *sfp;
3951 caddr_t bpos;
3952 int error = 0, rdonly, getret = 1;
3953 int v3 = (nfsd->nd_flag & ND_NFSV3);
3954 struct mbuf *mb, *mreq;
3955 struct vnode *vp = NULL;
3956 struct vattr at;
3957 nfsfh_t nfh;
3958 fhandle_t *fhp;
3959 struct statfs statfs;
3960 u_quad_t tval;
3961 int vfslocked;
3962
3963 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3964 vfslocked = 0;
3965 fhp = &nfh.fh_generic;
3966 nfsm_srvmtofh(fhp);
3967 error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
3968 nam, &rdonly, TRUE);
3969 if (error) {
3970 nfsm_reply(NFSX_UNSIGNED);
3971 if (v3)
3972 nfsm_srvpostop_attr(getret, &at);
3973 error = 0;
3974 goto nfsmout;
3975 }
3976 sf = &statfs;
3977 error = VFS_STATFS(vp->v_mount, sf, td);
3978 getret = VOP_GETATTR(vp, &at, cred, td);
3979 vput(vp);
3980 vp = NULL;
3981 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3));
3982 if (v3)
3983 nfsm_srvpostop_attr(getret, &at);
3984 if (error) {
3985 error = 0;
3986 goto nfsmout;
3987 }
3988 sfp = nfsm_build(struct nfs_statfs *, NFSX_STATFS(v3));
3989 if (v3) {
3990 tval = (u_quad_t)sf->f_blocks;
3991 tval *= (u_quad_t)sf->f_bsize;
3992 txdr_hyper(tval, &sfp->sf_tbytes);
3993 tval = (u_quad_t)sf->f_bfree;
3994 tval *= (u_quad_t)sf->f_bsize;
3995 txdr_hyper(tval, &sfp->sf_fbytes);
3996 /*
3997 * Don't send negative values for available space,
3998 * since this field is unsigned in the NFS protocol.
3999 * Otherwise, the client would see absurdly high
4000 * numbers for free space.
4001 */
4002 if (sf->f_bavail < 0)
4003 tval = 0;
4004 else
4005 tval = (u_quad_t)sf->f_bavail;
4006 tval *= (u_quad_t)sf->f_bsize;
4007 txdr_hyper(tval, &sfp->sf_abytes);
4008 sfp->sf_tfiles.nfsuquad[0] = 0;
4009 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
4010 sfp->sf_ffiles.nfsuquad[0] = 0;
4011 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
4012 sfp->sf_afiles.nfsuquad[0] = 0;
4013 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
4014 sfp->sf_invarsec = 0;
4015 } else {
4016 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
4017 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
4018 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
4019 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
4020 if (sf->f_bavail < 0)
4021 sfp->sf_bavail = 0;
4022 else
4023 sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
4024 }
4025 nfsmout:
4026 if (vp)
4027 vput(vp);
4028 VFS_UNLOCK_GIANT(vfslocked);
4029 return(error);
4030 }
4031
4032 /*
4033 * nfs fsinfo service
4034 */
4035 int
4036 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
4037 struct thread *td, struct mbuf **mrq)
4038 {
4039 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
4040 struct sockaddr *nam = nfsd->nd_nam;
4041 caddr_t dpos = nfsd->nd_dpos;
4042 struct ucred *cred = nfsd->nd_cr;
4043 struct nfsv3_fsinfo *sip;
4044 caddr_t bpos;
4045 int error = 0, rdonly, getret = 1, pref;
4046 struct mbuf *mb, *mreq;
4047 struct vnode *vp = NULL;
4048 struct vattr at;
4049 nfsfh_t nfh;
4050 fhandle_t *fhp;
4051 u_quad_t maxfsize;
4052 struct statfs sb;
4053 int v3 = (nfsd->nd_flag & ND_NFSV3);
4054 int vfslocked;
4055
4056 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
4057 if (!v3)
4058 panic("nfsrv_fsinfo: v3 proc called on a v2 connection");
4059 fhp = &nfh.fh_generic;
4060 vfslocked = 0;
4061 nfsm_srvmtofh(fhp);
4062 error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
4063 nam, &rdonly, TRUE);
4064 if (error) {
4065 nfsm_reply(NFSX_UNSIGNED);
4066 nfsm_srvpostop_attr(getret, &at);
4067 error = 0;
4068 goto nfsmout;
4069 }
4070
4071 /* XXX Try to make a guess on the max file size. */
4072 VFS_STATFS(vp->v_mount, &sb, td);
4073 maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
4074
4075 getret = VOP_GETATTR(vp, &at, cred, td);
4076 vput(vp);
4077 vp = NULL;
4078 nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO);
4079 nfsm_srvpostop_attr(getret, &at);
4080 sip = nfsm_build(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
4081
4082 /*
4083 * XXX
4084 * There should be filesystem VFS OP(s) to get this information.
4085 * For now, assume ufs.
4086 */
4087 if (slp->ns_so->so_type == SOCK_DGRAM)
4088 pref = NFS_MAXDGRAMDATA;
4089 else
4090 pref = NFS_MAXDATA;
4091 sip->fs_rtmax = txdr_unsigned(pref);
4092 sip->fs_rtpref = txdr_unsigned(pref);
4093 sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
4094 sip->fs_wtmax = txdr_unsigned(pref);
4095 sip->fs_wtpref = txdr_unsigned(pref);
4096 sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
4097 sip->fs_dtpref = txdr_unsigned(pref);
4098 txdr_hyper(maxfsize, &sip->fs_maxfilesize);
4099 sip->fs_timedelta.nfsv3_sec = 0;
4100 sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
4101 sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
4102 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
4103 NFSV3FSINFO_CANSETTIME);
4104 nfsmout:
4105 if (vp)
4106 vput(vp);
4107 VFS_UNLOCK_GIANT(vfslocked);
4108 return(error);
4109 }
4110
4111 /*
4112 * nfs pathconf service
4113 */
4114 int
4115 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
4116 struct thread *td, struct mbuf **mrq)
4117 {
4118 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
4119 struct sockaddr *nam = nfsd->nd_nam;
4120 caddr_t dpos = nfsd->nd_dpos;
4121 struct ucred *cred = nfsd->nd_cr;
4122 struct nfsv3_pathconf *pc;
4123 caddr_t bpos;
4124 int error = 0, rdonly, getret = 1;
4125 register_t linkmax, namemax, chownres, notrunc;
4126 struct mbuf *mb, *mreq;
4127 struct vnode *vp = NULL;
4128 struct vattr at;
4129 nfsfh_t nfh;
4130 fhandle_t *fhp;
4131 int v3 = (nfsd->nd_flag & ND_NFSV3);
4132 int vfslocked;
4133
4134 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
4135 if (!v3)
4136 panic("nfsrv_pathconf: v3 proc called on a v2 connection");
4137 vfslocked = 0;
4138 fhp = &nfh.fh_generic;
4139 nfsm_srvmtofh(fhp);
4140 error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
4141 nam, &rdonly, TRUE);
4142 if (error) {
4143 nfsm_reply(NFSX_UNSIGNED);
4144 nfsm_srvpostop_attr(getret, &at);
4145 error = 0;
4146 goto nfsmout;
4147 }
4148 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
4149 if (!error)
4150 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
4151 if (!error)
4152 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
4153 if (!error)
4154 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, ¬runc);
4155 getret = VOP_GETATTR(vp, &at, cred, td);
4156 vput(vp);
4157 vp = NULL;
4158 nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF);
4159 nfsm_srvpostop_attr(getret, &at);
4160 if (error) {
4161 error = 0;
4162 goto nfsmout;
4163 }
4164 pc = nfsm_build(struct nfsv3_pathconf *, NFSX_V3PATHCONF);
4165
4166 pc->pc_linkmax = txdr_unsigned(linkmax);
4167 pc->pc_namemax = txdr_unsigned(namemax);
4168 pc->pc_notrunc = txdr_unsigned(notrunc);
4169 pc->pc_chownrestricted = txdr_unsigned(chownres);
4170
4171 /*
4172 * These should probably be supported by VOP_PATHCONF(), but
4173 * until msdosfs is exportable (why would you want to?), the
4174 * Unix defaults should be ok.
4175 */
4176 pc->pc_caseinsensitive = nfsrv_nfs_false;
4177 pc->pc_casepreserving = nfsrv_nfs_true;
4178 nfsmout:
4179 if (vp)
4180 vput(vp);
4181 VFS_UNLOCK_GIANT(vfslocked);
4182 return(error);
4183 }
4184
4185 /*
4186 * Null operation, used by clients to ping server
4187 */
4188 /* ARGSUSED */
4189 int
4190 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
4191 struct thread *td, struct mbuf **mrq)
4192 {
4193 struct mbuf *mrep = nfsd->nd_mrep;
4194 caddr_t bpos;
4195 int error = NFSERR_RETVOID;
4196 struct mbuf *mb, *mreq;
4197
4198 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
4199 nfsm_reply(0);
4200 nfsmout:
4201 return (error);
4202 }
4203
4204 /*
4205 * No operation, used for obsolete procedures
4206 */
4207 /* ARGSUSED */
4208 int
4209 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
4210 struct thread *td, struct mbuf **mrq)
4211 {
4212 struct mbuf *mrep = nfsd->nd_mrep;
4213 caddr_t bpos;
4214 int error;
4215 struct mbuf *mb, *mreq;
4216
4217 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
4218 if (nfsd->nd_repstat)
4219 error = nfsd->nd_repstat;
4220 else
4221 error = EPROCUNAVAIL;
4222 nfsm_reply(0);
4223 error = 0;
4224 nfsmout:
4225 return (error);
4226 }
4227
4228 /*
4229 * Perform access checking for vnodes obtained from file handles that would
4230 * refer to files already opened by a Unix client. You cannot just use
4231 * vn_writechk() and VOP_ACCESS() for two reasons.
4232 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
4233 * case.
4234 * 2 - The owner is to be given access irrespective of mode bits for some
4235 * operations, so that processes that chmod after opening a file don't
4236 * break. I don't like this because it opens a security hole, but since
4237 * the nfs server opens a security hole the size of a barn door anyhow,
4238 * what the heck.
4239 *
4240 * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
4241 * will return EPERM instead of EACCESS. EPERM is always an error.
4242 */
4243 static int
4244 nfsrv_access(struct vnode *vp, int flags, struct ucred *cred,
4245 int rdonly, struct thread *td, int override)
4246 {
4247 struct vattr vattr;
4248 int error;
4249
4250 VFS_ASSERT_GIANT(vp->v_mount);
4251
4252 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
4253
4254 if (flags & VWRITE) {
4255 /* Just vn_writechk() changed to check rdonly */
4256 /*
4257 * Disallow write attempts on read-only filesystems;
4258 * unless the file is a socket or a block or character
4259 * device resident on the filesystem.
4260 */
4261 if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
4262 switch (vp->v_type) {
4263 case VREG:
4264 case VDIR:
4265 case VLNK:
4266 return (EROFS);
4267 default:
4268 break;
4269 }
4270 }
4271 /*
4272 * If there's shared text associated with
4273 * the inode, we can't allow writing.
4274 */
4275 if (vp->v_vflag & VV_TEXT)
4276 return (ETXTBSY);
4277 }
4278
4279 error = VOP_GETATTR(vp, &vattr, cred, td);
4280 if (error)
4281 return (error);
4282 error = VOP_ACCESS(vp, flags, cred, td);
4283 /*
4284 * Allow certain operations for the owner (reads and writes
4285 * on files that are already open).
4286 */
4287 if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
4288 error = 0;
4289 return (error);
4290 }
Cache object: ad134796c3410adfccab4958776e3bfd
|