1 /*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)nfs_serv.c 8.8 (Berkeley) 7/31/95
37 */
38
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD: releng/5.1/sys/nfsserver/nfs_serv.c 115301 2003-05-25 06:17:33Z truckman $");
41
42 /*
43 * nfs version 2 and 3 server calls to vnode ops
44 * - these routines generally have 3 phases
45 * 1 - break down and validate rpc request in mbuf list
46 * 2 - do the vnode ops for the request
47 * (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
48 * 3 - build the rpc reply in an mbuf list
49 * nb:
50 * - do not mix the phases, since the nfsm_?? macros can return failures
51 * on a bad rpc or similar and do not do any vrele() or vput()'s
52 *
53 * - the nfsm_reply() macro generates an nfs rpc reply with the nfs
54 * error number iff error != 0 whereas
55 * returning an error from the server function implies a fatal error
56 * such as a badly constructed rpc request that should be dropped without
57 * a reply.
58 * For nfsm_reply(), the case where error == EBADRPC is treated
59 * specially; after constructing a reply, it does an immediate
60 * `goto nfsmout' to avoid getting any V3 post-op status appended.
61 *
62 * Other notes:
63 * Warning: always pay careful attention to resource cleanup on return
64 * and note that nfsm_*() macros can terminate a procedure on certain
65 * errors.
66 *
67 * lookup() and namei()
68 * may return garbage in various structural fields/return elements
69 * if an error is returned, and may garbage up nd.ni_dvp even if no
70 * error is returned and you did not request LOCKPARENT or WANTPARENT.
71 *
72 * We use the ni_cnd.cn_flags 'HASBUF' flag to track whether the name
73 * buffer has been freed or not.
74 */
75
76 #include <sys/param.h>
77 #include <sys/systm.h>
78 #include <sys/proc.h>
79 #include <sys/namei.h>
80 #include <sys/unistd.h>
81 #include <sys/vnode.h>
82 #include <sys/mount.h>
83 #include <sys/socket.h>
84 #include <sys/socketvar.h>
85 #include <sys/malloc.h>
86 #include <sys/mbuf.h>
87 #include <sys/dirent.h>
88 #include <sys/stat.h>
89 #include <sys/kernel.h>
90 #include <sys/sysctl.h>
91 #include <sys/bio.h>
92 #include <sys/buf.h>
93
94 #include <vm/vm.h>
95 #include <vm/vm_extern.h>
96 #include <vm/vm_object.h>
97
98 #include <nfs/nfsproto.h>
99 #include <nfs/rpcv2.h>
100 #include <nfsserver/nfs.h>
101 #include <nfs/xdr_subs.h>
102 #include <nfsserver/nfsm_subs.h>
103
104 #ifdef NFSRV_DEBUG
105 #define nfsdbprintf(info) printf info
106 #else
107 #define nfsdbprintf(info)
108 #endif
109
110 #define MAX_COMMIT_COUNT (1024 * 1024)
111
112 #define NUM_HEURISTIC 64
113 #define NHUSE_INIT 64
114 #define NHUSE_INC 16
115 #define NHUSE_MAX 2048
116
117 static struct nfsheur {
118 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */
119 off_t nh_nextr; /* next offset for sequential detection */
120 int nh_use; /* use count for selection */
121 int nh_seqcount; /* heuristic */
122 } nfsheur[NUM_HEURISTIC];
123
124 /* Global vars */
125
126 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
127 int nfsrvw_procrastinate_v3 = 0;
128
129 static struct timeval nfsver = { 0 };
130
131 SYSCTL_NODE(_vfs, OID_AUTO, nfsrv, CTLFLAG_RW, 0, "NFS server");
132
133 static int nfs_async;
134 static int nfs_commit_blks;
135 static int nfs_commit_miss;
136 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
137 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
138 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
139
140 struct nfsrvstats nfsrvstats;
141 SYSCTL_STRUCT(_vfs_nfsrv, NFS_NFSRVSTATS, nfsrvstats, CTLFLAG_RD,
142 &nfsrvstats, nfsrvstats, "S,nfsrvstats");
143
144 static int nfsrv_access(struct vnode *, int, struct ucred *, int,
145 struct thread *, int);
146 static void nfsrvw_coalesce(struct nfsrv_descript *,
147 struct nfsrv_descript *);
148
149 /*
150 * Clear nameidata fields that are tested in nsfmout cleanup code prior
151 * to using first nfsm macro (that might jump to the cleanup code).
152 */
153
154 static __inline void
155 ndclear(struct nameidata *nd)
156 {
157
158 nd->ni_cnd.cn_flags = 0;
159 nd->ni_vp = NULL;
160 nd->ni_dvp = NULL;
161 nd->ni_startdir = NULL;
162 }
163
164 /*
165 * nfs v3 access service
166 */
167 int
168 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
169 struct thread *td, struct mbuf **mrq)
170 {
171 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
172 struct sockaddr *nam = nfsd->nd_nam;
173 caddr_t dpos = nfsd->nd_dpos;
174 struct ucred *cred = &nfsd->nd_cr;
175 struct vnode *vp = NULL;
176 nfsfh_t nfh;
177 fhandle_t *fhp;
178 u_int32_t *tl;
179 caddr_t bpos;
180 int error = 0, rdonly, getret;
181 struct mbuf *mb, *mreq;
182 struct vattr vattr, *vap = &vattr;
183 u_long testmode, nfsmode;
184 int v3 = (nfsd->nd_flag & ND_NFSV3);
185
186 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
187 if (!v3)
188 panic("nfsrv3_access: v3 proc called on a v2 connection");
189 fhp = &nfh.fh_generic;
190 nfsm_srvmtofh(fhp);
191 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
192 error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
193 if (error) {
194 nfsm_reply(NFSX_UNSIGNED);
195 nfsm_srvpostop_attr(1, NULL);
196 error = 0;
197 goto nfsmout;
198 }
199 nfsmode = fxdr_unsigned(u_int32_t, *tl);
200 if ((nfsmode & NFSV3ACCESS_READ) &&
201 nfsrv_access(vp, VREAD, cred, rdonly, td, 0))
202 nfsmode &= ~NFSV3ACCESS_READ;
203 if (vp->v_type == VDIR)
204 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
205 NFSV3ACCESS_DELETE);
206 else
207 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
208 if ((nfsmode & testmode) &&
209 nfsrv_access(vp, VWRITE, cred, rdonly, td, 0))
210 nfsmode &= ~testmode;
211 if (vp->v_type == VDIR)
212 testmode = NFSV3ACCESS_LOOKUP;
213 else
214 testmode = NFSV3ACCESS_EXECUTE;
215 if ((nfsmode & testmode) &&
216 nfsrv_access(vp, VEXEC, cred, rdonly, td, 0))
217 nfsmode &= ~testmode;
218 getret = VOP_GETATTR(vp, vap, cred, td);
219 vput(vp);
220 vp = NULL;
221 nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED);
222 nfsm_srvpostop_attr(getret, vap);
223 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
224 *tl = txdr_unsigned(nfsmode);
225 nfsmout:
226 if (vp)
227 vput(vp);
228 return(error);
229 }
230
231 /*
232 * nfs getattr service
233 */
234 int
235 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
236 struct thread *td, struct mbuf **mrq)
237 {
238 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
239 struct sockaddr *nam = nfsd->nd_nam;
240 caddr_t dpos = nfsd->nd_dpos;
241 struct ucred *cred = &nfsd->nd_cr;
242 struct nfs_fattr *fp;
243 struct vattr va;
244 struct vattr *vap = &va;
245 struct vnode *vp = NULL;
246 nfsfh_t nfh;
247 fhandle_t *fhp;
248 caddr_t bpos;
249 int error = 0, rdonly;
250 struct mbuf *mb, *mreq;
251
252 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
253 fhp = &nfh.fh_generic;
254 nfsm_srvmtofh(fhp);
255 error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
256 if (error) {
257 nfsm_reply(0);
258 error = 0;
259 goto nfsmout;
260 }
261 error = VOP_GETATTR(vp, vap, cred, td);
262 vput(vp);
263 vp = NULL;
264 nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
265 if (error) {
266 error = 0;
267 goto nfsmout;
268 }
269 fp = nfsm_build(struct nfs_fattr *,
270 NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
271 nfsm_srvfillattr(vap, fp);
272 /* fall through */
273
274 nfsmout:
275 if (vp)
276 vput(vp);
277 return(error);
278 }
279
280 /*
281 * nfs setattr service
282 */
283 int
284 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
285 struct thread *td, struct mbuf **mrq)
286 {
287 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
288 struct sockaddr *nam = nfsd->nd_nam;
289 caddr_t dpos = nfsd->nd_dpos;
290 struct ucred *cred = &nfsd->nd_cr;
291 struct vattr va, preat;
292 struct vattr *vap = &va;
293 struct nfsv2_sattr *sp;
294 struct nfs_fattr *fp;
295 struct vnode *vp = NULL;
296 nfsfh_t nfh;
297 fhandle_t *fhp;
298 u_int32_t *tl;
299 caddr_t bpos;
300 int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
301 int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0;
302 struct mbuf *mb, *mreq;
303 struct timespec guard;
304 struct mount *mp = NULL;
305
306 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
307 fhp = &nfh.fh_generic;
308 nfsm_srvmtofh(fhp);
309 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
310 error = ESTALE;
311 goto out;
312 }
313 if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != 0) {
314 mp = NULL;
315 goto out;
316 }
317 (void) vn_start_write(vp, &mp, V_WAIT);
318 vput(vp);
319 vp = NULL;
320 VATTR_NULL(vap);
321 if (v3) {
322 nfsm_srvsattr(vap);
323 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
324 gcheck = fxdr_unsigned(int, *tl);
325 if (gcheck) {
326 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
327 fxdr_nfsv3time(tl, &guard);
328 }
329 } else {
330 sp = nfsm_dissect(struct nfsv2_sattr *, NFSX_V2SATTR);
331 /*
332 * Nah nah nah nah na nah
333 * There is a bug in the Sun client that puts 0xffff in the mode
334 * field of sattr when it should put in 0xffffffff. The u_short
335 * doesn't sign extend.
336 * --> check the low order 2 bytes for 0xffff
337 */
338 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
339 vap->va_mode = nfstov_mode(sp->sa_mode);
340 if (sp->sa_uid != nfsrv_nfs_xdrneg1)
341 vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
342 if (sp->sa_gid != nfsrv_nfs_xdrneg1)
343 vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
344 if (sp->sa_size != nfsrv_nfs_xdrneg1)
345 vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
346 if (sp->sa_atime.nfsv2_sec != nfsrv_nfs_xdrneg1) {
347 #ifdef notyet
348 fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
349 #else
350 vap->va_atime.tv_sec =
351 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
352 vap->va_atime.tv_nsec = 0;
353 #endif
354 }
355 if (sp->sa_mtime.nfsv2_sec != nfsrv_nfs_xdrneg1)
356 fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
357
358 }
359
360 /*
361 * Now that we have all the fields, lets do it.
362 */
363 error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
364 if (error) {
365 nfsm_reply(2 * NFSX_UNSIGNED);
366 if (v3)
367 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
368 error = 0;
369 goto nfsmout;
370 }
371
372 /*
373 * vp now an active resource, pay careful attention to cleanup
374 */
375 if (v3) {
376 error = preat_ret = VOP_GETATTR(vp, &preat, cred, td);
377 if (!error && gcheck &&
378 (preat.va_ctime.tv_sec != guard.tv_sec ||
379 preat.va_ctime.tv_nsec != guard.tv_nsec))
380 error = NFSERR_NOT_SYNC;
381 if (error) {
382 vput(vp);
383 vp = NULL;
384 nfsm_reply(NFSX_WCCDATA(v3));
385 if (v3)
386 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
387 error = 0;
388 goto nfsmout;
389 }
390 }
391
392 /*
393 * If the size is being changed write acces is required, otherwise
394 * just check for a read only filesystem.
395 */
396 if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
397 if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
398 error = EROFS;
399 goto out;
400 }
401 } else {
402 if (vp->v_type == VDIR) {
403 error = EISDIR;
404 goto out;
405 } else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
406 td, 0)) != 0)
407 goto out;
408 }
409 error = VOP_SETATTR(vp, vap, cred, td);
410 postat_ret = VOP_GETATTR(vp, vap, cred, td);
411 if (!error)
412 error = postat_ret;
413 out:
414 if (vp != NULL)
415 vput(vp);
416 vp = NULL;
417 nfsm_reply(NFSX_WCCORFATTR(v3));
418 if (v3) {
419 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
420 } else if (!error) {
421 /* v2 non-error case. */
422 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
423 nfsm_srvfillattr(vap, fp);
424 }
425 error = 0;
426 /* fall through */
427
428 nfsmout:
429 if (vp)
430 vput(vp);
431 vn_finished_write(mp);
432 return(error);
433 }
434
435 /*
436 * nfs lookup rpc
437 */
438 int
439 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
440 struct thread *td, struct mbuf **mrq)
441 {
442 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
443 struct sockaddr *nam = nfsd->nd_nam;
444 caddr_t dpos = nfsd->nd_dpos;
445 struct ucred *cred = &nfsd->nd_cr;
446 struct nfs_fattr *fp;
447 struct nameidata nd, ind, *ndp = &nd;
448 struct vnode *vp, *dirp = NULL;
449 nfsfh_t nfh;
450 fhandle_t *fhp;
451 caddr_t bpos;
452 int error = 0, len, dirattr_ret = 1;
453 int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag;
454 struct mbuf *mb, *mreq;
455 struct vattr va, dirattr, *vap = &va;
456
457 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
458 ndclear(&nd);
459
460 fhp = &nfh.fh_generic;
461 nfsm_srvmtofh(fhp);
462 nfsm_srvnamesiz(len);
463
464 pubflag = nfs_ispublicfh(fhp);
465
466 nd.ni_cnd.cn_cred = cred;
467 nd.ni_cnd.cn_nameiop = LOOKUP;
468 nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART;
469 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
470 &dirp, v3, &dirattr, &dirattr_ret, td, pubflag);
471
472 /*
473 * namei failure, only dirp to cleanup. Clear out garbarge from
474 * structure in case macros jump to nfsmout.
475 */
476
477 if (error) {
478 if (dirp) {
479 vrele(dirp);
480 dirp = NULL;
481 }
482 nfsm_reply(NFSX_POSTOPATTR(v3));
483 if (v3)
484 nfsm_srvpostop_attr(dirattr_ret, &dirattr);
485 error = 0;
486 goto nfsmout;
487 }
488
489 /*
490 * Locate index file for public filehandle
491 *
492 * error is 0 on entry and 0 on exit from this block.
493 */
494
495 if (pubflag) {
496 if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) {
497 /*
498 * Setup call to lookup() to see if we can find
499 * the index file. Arguably, this doesn't belong
500 * in a kernel.. Ugh. If an error occurs, do not
501 * try to install an index file and then clear the
502 * error.
503 *
504 * When we replace nd with ind and redirect ndp,
505 * maintenance of ni_startdir and ni_vp shift to
506 * ind and we have to clean them up in the old nd.
507 * However, the cnd resource continues to be maintained
508 * via the original nd. Confused? You aren't alone!
509 */
510 ind = nd;
511 VOP_UNLOCK(nd.ni_vp, 0, td);
512 ind.ni_pathlen = strlen(nfs_pub.np_index);
513 ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf =
514 nfs_pub.np_index;
515 ind.ni_startdir = nd.ni_vp;
516 VREF(ind.ni_startdir);
517
518 error = lookup(&ind);
519 ind.ni_dvp = NULL;
520
521 if (error == 0) {
522 /*
523 * Found an index file. Get rid of
524 * the old references. transfer nd.ni_vp'
525 */
526 if (dirp)
527 vrele(dirp);
528 dirp = nd.ni_vp;
529 nd.ni_vp = NULL;
530 vrele(nd.ni_startdir);
531 nd.ni_startdir = NULL;
532 ndp = &ind;
533 }
534 error = 0;
535 }
536 /*
537 * If the public filehandle was used, check that this lookup
538 * didn't result in a filehandle outside the publicly exported
539 * filesystem. We clear the poor vp here to avoid lockups due
540 * to NFS I/O.
541 */
542
543 if (ndp->ni_vp->v_mount != nfs_pub.np_mount) {
544 vput(nd.ni_vp);
545 nd.ni_vp = NULL;
546 error = EPERM;
547 }
548 }
549
550 if (dirp) {
551 vrele(dirp);
552 dirp = NULL;
553 }
554
555 /*
556 * Resources at this point:
557 * ndp->ni_vp may not be NULL
558 *
559 */
560
561 if (error) {
562 nfsm_reply(NFSX_POSTOPATTR(v3));
563 if (v3)
564 nfsm_srvpostop_attr(dirattr_ret, &dirattr);
565 error = 0;
566 goto nfsmout;
567 }
568
569 /*
570 * Clear out some resources prior to potentially blocking. This
571 * is not as critical as ni_dvp resources in other routines, but
572 * it helps.
573 */
574 vrele(ndp->ni_startdir);
575 ndp->ni_startdir = NULL;
576 NDFREE(&nd, NDF_ONLY_PNBUF);
577
578 /*
579 * Get underlying attribute, then release remaining resources ( for
580 * the same potential blocking reason ) and reply.
581 */
582 vp = ndp->ni_vp;
583 bzero((caddr_t)fhp, sizeof(nfh));
584 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
585 error = VFS_VPTOFH(vp, &fhp->fh_fid);
586 if (!error)
587 error = VOP_GETATTR(vp, vap, cred, td);
588
589 vput(vp);
590 ndp->ni_vp = NULL;
591 nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3));
592 if (error) {
593 if (v3)
594 nfsm_srvpostop_attr(dirattr_ret, &dirattr);
595 error = 0;
596 goto nfsmout;
597 }
598 nfsm_srvfhtom(fhp, v3);
599 if (v3) {
600 nfsm_srvpostop_attr(0, vap);
601 nfsm_srvpostop_attr(dirattr_ret, &dirattr);
602 } else {
603 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
604 nfsm_srvfillattr(vap, fp);
605 }
606
607 nfsmout:
608 if (dirp)
609 vrele(dirp);
610 NDFREE(&nd, NDF_ONLY_PNBUF);
611 if (ndp->ni_startdir)
612 vrele(ndp->ni_startdir);
613 if (ndp->ni_vp)
614 vput(ndp->ni_vp);
615 return (error);
616 }
617
618 /*
619 * nfs readlink service
620 */
621 int
622 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
623 struct thread *td, struct mbuf **mrq)
624 {
625 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
626 struct sockaddr *nam = nfsd->nd_nam;
627 caddr_t dpos = nfsd->nd_dpos;
628 struct ucred *cred = &nfsd->nd_cr;
629 struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
630 struct iovec *ivp = iv;
631 struct mbuf *mp;
632 u_int32_t *tl;
633 caddr_t bpos;
634 int error = 0, rdonly, i, tlen, len, getret;
635 int v3 = (nfsd->nd_flag & ND_NFSV3);
636 struct mbuf *mb, *mp3, *nmp, *mreq;
637 struct vnode *vp = NULL;
638 struct vattr attr;
639 nfsfh_t nfh;
640 fhandle_t *fhp;
641 struct uio io, *uiop = &io;
642
643 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
644 #ifndef nolint
645 mp = NULL;
646 #endif
647 mp3 = NULL;
648 fhp = &nfh.fh_generic;
649 nfsm_srvmtofh(fhp);
650 len = 0;
651 i = 0;
652 while (len < NFS_MAXPATHLEN) {
653 MGET(nmp, M_TRYWAIT, MT_DATA);
654 MCLGET(nmp, M_TRYWAIT);
655 nmp->m_len = NFSMSIZ(nmp);
656 if (len == 0)
657 mp3 = mp = nmp;
658 else {
659 mp->m_next = nmp;
660 mp = nmp;
661 }
662 if ((len + mp->m_len) > NFS_MAXPATHLEN) {
663 mp->m_len = NFS_MAXPATHLEN - len;
664 len = NFS_MAXPATHLEN;
665 } else
666 len += mp->m_len;
667 ivp->iov_base = mtod(mp, caddr_t);
668 ivp->iov_len = mp->m_len;
669 i++;
670 ivp++;
671 }
672 uiop->uio_iov = iv;
673 uiop->uio_iovcnt = i;
674 uiop->uio_offset = 0;
675 uiop->uio_resid = len;
676 uiop->uio_rw = UIO_READ;
677 uiop->uio_segflg = UIO_SYSSPACE;
678 uiop->uio_td = NULL;
679 error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
680 if (error) {
681 nfsm_reply(2 * NFSX_UNSIGNED);
682 if (v3)
683 nfsm_srvpostop_attr(1, NULL);
684 error = 0;
685 goto nfsmout;
686 }
687 if (vp->v_type != VLNK) {
688 if (v3)
689 error = EINVAL;
690 else
691 error = ENXIO;
692 goto out;
693 }
694 error = VOP_READLINK(vp, uiop, cred);
695 out:
696 getret = VOP_GETATTR(vp, &attr, cred, td);
697 vput(vp);
698 vp = NULL;
699 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED);
700 if (v3)
701 nfsm_srvpostop_attr(getret, &attr);
702 if (error) {
703 error = 0;
704 goto nfsmout;
705 }
706 if (uiop->uio_resid > 0) {
707 len -= uiop->uio_resid;
708 tlen = nfsm_rndup(len);
709 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
710 }
711 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
712 *tl = txdr_unsigned(len);
713 mb->m_next = mp3;
714 mp3 = NULL;
715 nfsmout:
716 if (mp3)
717 m_freem(mp3);
718 if (vp)
719 vput(vp);
720 return(error);
721 }
722
723 /*
724 * nfs read service
725 */
726 int
727 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
728 struct thread *td, struct mbuf **mrq)
729 {
730 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
731 struct sockaddr *nam = nfsd->nd_nam;
732 caddr_t dpos = nfsd->nd_dpos;
733 struct ucred *cred = &nfsd->nd_cr;
734 struct iovec *iv;
735 struct iovec *iv2;
736 struct mbuf *m;
737 struct nfs_fattr *fp;
738 u_int32_t *tl;
739 int i;
740 caddr_t bpos;
741 int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
742 int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen;
743 struct mbuf *mb, *mreq;
744 struct mbuf *m2;
745 struct vnode *vp = NULL;
746 nfsfh_t nfh;
747 fhandle_t *fhp;
748 struct uio io, *uiop = &io;
749 struct vattr va, *vap = &va;
750 struct nfsheur *nh;
751 off_t off;
752 int ioflag = 0;
753
754 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
755 fhp = &nfh.fh_generic;
756 nfsm_srvmtofh(fhp);
757 if (v3) {
758 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
759 off = fxdr_hyper(tl);
760 } else {
761 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
762 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
763 }
764 nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd));
765
766 /*
767 * Reference vp. If an error occurs, vp will be invalid, but we
768 * have to NULL it just in case. The macros might goto nfsmout
769 * as well.
770 */
771
772 error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
773 if (error) {
774 vp = NULL;
775 nfsm_reply(2 * NFSX_UNSIGNED);
776 if (v3)
777 nfsm_srvpostop_attr(1, NULL);
778 error = 0;
779 goto nfsmout;
780 }
781
782 if (vp->v_type != VREG) {
783 if (v3)
784 error = EINVAL;
785 else
786 error = (vp->v_type == VDIR) ? EISDIR : EACCES;
787 }
788 if (!error) {
789 if ((error = nfsrv_access(vp, VREAD, cred, rdonly, td, 1)) != 0)
790 error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 1);
791 }
792 getret = VOP_GETATTR(vp, vap, cred, td);
793 if (!error)
794 error = getret;
795 if (error) {
796 vput(vp);
797 vp = NULL;
798 nfsm_reply(NFSX_POSTOPATTR(v3));
799 if (v3)
800 nfsm_srvpostop_attr(getret, vap);
801 error = 0;
802 goto nfsmout;
803 }
804
805 /*
806 * Calculate byte count to read
807 */
808
809 if (off >= vap->va_size)
810 cnt = 0;
811 else if ((off + reqlen) > vap->va_size)
812 cnt = vap->va_size - off;
813 else
814 cnt = reqlen;
815
816 /*
817 * Calculate seqcount for heuristic
818 */
819
820 {
821 int hi;
822 int try = 4;
823
824 /*
825 * Locate best candidate
826 */
827
828 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) & (NUM_HEURISTIC - 1);
829 nh = &nfsheur[hi];
830
831 while (try--) {
832 if (nfsheur[hi].nh_vp == vp) {
833 nh = &nfsheur[hi];
834 break;
835 }
836 if (nfsheur[hi].nh_use > 0)
837 --nfsheur[hi].nh_use;
838 hi = (hi + 1) & (NUM_HEURISTIC - 1);
839 if (nfsheur[hi].nh_use < nh->nh_use)
840 nh = &nfsheur[hi];
841 }
842
843 if (nh->nh_vp != vp) {
844 nh->nh_vp = vp;
845 nh->nh_nextr = off;
846 nh->nh_use = NHUSE_INIT;
847 if (off == 0)
848 nh->nh_seqcount = 4;
849 else
850 nh->nh_seqcount = 1;
851 }
852
853 /*
854 * Calculate heuristic
855 */
856
857 if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
858 if (++nh->nh_seqcount > IO_SEQMAX)
859 nh->nh_seqcount = IO_SEQMAX;
860 } else if (nh->nh_seqcount > 1) {
861 nh->nh_seqcount = 1;
862 } else {
863 nh->nh_seqcount = 0;
864 }
865 nh->nh_use += NHUSE_INC;
866 if (nh->nh_use > NHUSE_MAX)
867 nh->nh_use = NHUSE_MAX;
868 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
869 }
870
871 nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
872 if (v3) {
873 tl = nfsm_build(u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
874 *tl++ = nfsrv_nfs_true;
875 fp = (struct nfs_fattr *)tl;
876 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
877 } else {
878 tl = nfsm_build(u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED);
879 fp = (struct nfs_fattr *)tl;
880 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
881 }
882 len = left = nfsm_rndup(cnt);
883 if (cnt > 0) {
884 /*
885 * Generate the mbuf list with the uio_iov ref. to it.
886 */
887 i = 0;
888 m = m2 = mb;
889 while (left > 0) {
890 siz = min(M_TRAILINGSPACE(m), left);
891 if (siz > 0) {
892 left -= siz;
893 i++;
894 }
895 if (left > 0) {
896 MGET(m, M_TRYWAIT, MT_DATA);
897 MCLGET(m, M_TRYWAIT);
898 m->m_len = 0;
899 m2->m_next = m;
900 m2 = m;
901 }
902 }
903 MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
904 M_TEMP, M_WAITOK);
905 uiop->uio_iov = iv2 = iv;
906 m = mb;
907 left = len;
908 i = 0;
909 while (left > 0) {
910 if (m == NULL)
911 panic("nfsrv_read iov");
912 siz = min(M_TRAILINGSPACE(m), left);
913 if (siz > 0) {
914 iv->iov_base = mtod(m, caddr_t) + m->m_len;
915 iv->iov_len = siz;
916 m->m_len += siz;
917 left -= siz;
918 iv++;
919 i++;
920 }
921 m = m->m_next;
922 }
923 uiop->uio_iovcnt = i;
924 uiop->uio_offset = off;
925 uiop->uio_resid = len;
926 uiop->uio_rw = UIO_READ;
927 uiop->uio_segflg = UIO_SYSSPACE;
928 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
929 off = uiop->uio_offset;
930 nh->nh_nextr = off;
931 FREE((caddr_t)iv2, M_TEMP);
932 if (error || (getret = VOP_GETATTR(vp, vap, cred, td))) {
933 if (!error)
934 error = getret;
935 m_freem(mreq);
936 vput(vp);
937 vp = NULL;
938 nfsm_reply(NFSX_POSTOPATTR(v3));
939 if (v3)
940 nfsm_srvpostop_attr(getret, vap);
941 error = 0;
942 goto nfsmout;
943 }
944 } else {
945 uiop->uio_resid = 0;
946 }
947 vput(vp);
948 vp = NULL;
949 nfsm_srvfillattr(vap, fp);
950 tlen = len - uiop->uio_resid;
951 cnt = cnt < tlen ? cnt : tlen;
952 tlen = nfsm_rndup(cnt);
953 if (len != tlen || tlen != cnt)
954 nfsm_adj(mb, len - tlen, tlen - cnt);
955 if (v3) {
956 *tl++ = txdr_unsigned(cnt);
957 if (len < reqlen)
958 *tl++ = nfsrv_nfs_true;
959 else
960 *tl++ = nfsrv_nfs_false;
961 }
962 *tl = txdr_unsigned(cnt);
963 nfsmout:
964 if (vp)
965 vput(vp);
966 return(error);
967 }
968
969 /*
970 * nfs write service
971 */
972 int
973 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
974 struct thread *td, struct mbuf **mrq)
975 {
976 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
977 struct sockaddr *nam = nfsd->nd_nam;
978 caddr_t dpos = nfsd->nd_dpos;
979 struct ucred *cred = &nfsd->nd_cr;
980 struct iovec *ivp;
981 int i, cnt;
982 struct mbuf *mp;
983 struct nfs_fattr *fp;
984 struct iovec *iv;
985 struct vattr va, forat;
986 struct vattr *vap = &va;
987 u_int32_t *tl;
988 caddr_t bpos;
989 int error = 0, rdonly, len, forat_ret = 1;
990 int ioflags, aftat_ret = 1, retlen = 0, zeroing, adjust;
991 int stable = NFSV3WRITE_FILESYNC;
992 int v3 = (nfsd->nd_flag & ND_NFSV3);
993 struct mbuf *mb, *mreq;
994 struct vnode *vp = NULL;
995 nfsfh_t nfh;
996 fhandle_t *fhp;
997 struct uio io, *uiop = &io;
998 off_t off;
999 struct mount *mntp = NULL;
1000
1001 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1002 if (mrep == NULL) {
1003 *mrq = NULL;
1004 error = 0;
1005 goto nfsmout;
1006 }
1007 fhp = &nfh.fh_generic;
1008 nfsm_srvmtofh(fhp);
1009 if ((mntp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
1010 error = ESTALE;
1011 goto ereply;
1012 }
1013 if ((error = VFS_FHTOVP(mntp, &fhp->fh_fid, &vp)) != 0) {
1014 mntp = NULL;
1015 goto ereply;
1016 }
1017 (void) vn_start_write(vp, &mntp, V_WAIT);
1018 vput(vp);
1019 vp = NULL;
1020 if (v3) {
1021 tl = nfsm_dissect(u_int32_t *, 5 * NFSX_UNSIGNED);
1022 off = fxdr_hyper(tl);
1023 tl += 3;
1024 stable = fxdr_unsigned(int, *tl++);
1025 } else {
1026 tl = nfsm_dissect(u_int32_t *, 4 * NFSX_UNSIGNED);
1027 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1028 tl += 2;
1029 if (nfs_async)
1030 stable = NFSV3WRITE_UNSTABLE;
1031 }
1032 retlen = len = fxdr_unsigned(int32_t, *tl);
1033 cnt = i = 0;
1034
1035 /*
1036 * For NFS Version 2, it is not obvious what a write of zero length
1037 * should do, but I might as well be consistent with Version 3,
1038 * which is to return ok so long as there are no permission problems.
1039 */
1040 if (len > 0) {
1041 zeroing = 1;
1042 mp = mrep;
1043 while (mp) {
1044 if (mp == md) {
1045 zeroing = 0;
1046 adjust = dpos - mtod(mp, caddr_t);
1047 mp->m_len -= adjust;
1048 if (mp->m_len > 0 && adjust > 0)
1049 mp->m_data += adjust;
1050 }
1051 if (zeroing)
1052 mp->m_len = 0;
1053 else if (mp->m_len > 0) {
1054 i += mp->m_len;
1055 if (i > len) {
1056 mp->m_len -= (i - len);
1057 zeroing = 1;
1058 }
1059 if (mp->m_len > 0)
1060 cnt++;
1061 }
1062 mp = mp->m_next;
1063 }
1064 }
1065 if (len > NFS_MAXDATA || len < 0 || i < len) {
1066 error = EIO;
1067 nfsm_reply(2 * NFSX_UNSIGNED);
1068 if (v3)
1069 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1070 error = 0;
1071 goto nfsmout;
1072 }
1073 error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
1074 if (error) {
1075 vp = NULL;
1076 nfsm_reply(2 * NFSX_UNSIGNED);
1077 if (v3)
1078 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1079 error = 0;
1080 goto nfsmout;
1081 }
1082 if (v3)
1083 forat_ret = VOP_GETATTR(vp, &forat, cred, td);
1084 if (vp->v_type != VREG) {
1085 if (v3)
1086 error = EINVAL;
1087 else
1088 error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1089 }
1090 if (!error)
1091 error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1);
1092 if (error) {
1093 vput(vp);
1094 vp = NULL;
1095 nfsm_reply(NFSX_WCCDATA(v3));
1096 if (v3)
1097 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1098 error = 0;
1099 goto nfsmout;
1100 }
1101
1102 if (len > 0) {
1103 MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
1104 M_WAITOK);
1105 uiop->uio_iov = iv = ivp;
1106 uiop->uio_iovcnt = cnt;
1107 mp = mrep;
1108 while (mp) {
1109 if (mp->m_len > 0) {
1110 ivp->iov_base = mtod(mp, caddr_t);
1111 ivp->iov_len = mp->m_len;
1112 ivp++;
1113 }
1114 mp = mp->m_next;
1115 }
1116
1117 /*
1118 * XXX
1119 * The IO_METASYNC flag indicates that all metadata (and not just
1120 * enough to ensure data integrity) mus be written to stable storage
1121 * synchronously.
1122 * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1123 */
1124 if (stable == NFSV3WRITE_UNSTABLE)
1125 ioflags = IO_NODELOCKED;
1126 else if (stable == NFSV3WRITE_DATASYNC)
1127 ioflags = (IO_SYNC | IO_NODELOCKED);
1128 else
1129 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1130 uiop->uio_resid = len;
1131 uiop->uio_rw = UIO_WRITE;
1132 uiop->uio_segflg = UIO_SYSSPACE;
1133 uiop->uio_td = NULL;
1134 uiop->uio_offset = off;
1135 error = VOP_WRITE(vp, uiop, ioflags, cred);
1136 nfsrvstats.srvvop_writes++;
1137 FREE((caddr_t)iv, M_TEMP);
1138 }
1139 aftat_ret = VOP_GETATTR(vp, vap, cred, td);
1140 vput(vp);
1141 vp = NULL;
1142 if (!error)
1143 error = aftat_ret;
1144 ereply:
1145 nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
1146 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
1147 if (v3) {
1148 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1149 if (error) {
1150 error = 0;
1151 goto nfsmout;
1152 }
1153 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
1154 *tl++ = txdr_unsigned(retlen);
1155 /*
1156 * If nfs_async is set, then pretend the write was FILESYNC.
1157 */
1158 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1159 *tl++ = txdr_unsigned(stable);
1160 else
1161 *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1162 /*
1163 * Actually, there is no need to txdr these fields,
1164 * but it may make the values more human readable,
1165 * for debugging purposes.
1166 */
1167 if (nfsver.tv_sec == 0)
1168 nfsver = boottime;
1169 *tl++ = txdr_unsigned(nfsver.tv_sec);
1170 *tl = txdr_unsigned(nfsver.tv_usec);
1171 } else if (!error) {
1172 /* v2 non-error case. */
1173 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
1174 nfsm_srvfillattr(vap, fp);
1175 }
1176 error = 0;
1177 nfsmout:
1178 if (vp)
1179 vput(vp);
1180 vn_finished_write(mntp);
1181 return(error);
1182 }
1183
1184 /*
1185 * NFS write service with write gathering support. Called when
1186 * nfsrvw_procrastinate > 0.
1187 * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1188 * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1189 * Jan. 1994.
1190 */
1191 int
1192 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1193 struct thread *td, struct mbuf **mrq)
1194 {
1195 struct iovec *ivp;
1196 struct mbuf *mp;
1197 struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1198 struct nfs_fattr *fp;
1199 int i;
1200 struct iovec *iov;
1201 struct nfsrvw_delayhash *wpp;
1202 struct ucred *cred;
1203 struct vattr va, forat;
1204 u_int32_t *tl;
1205 caddr_t bpos, dpos;
1206 int error = 0, rdonly, len, forat_ret = 1;
1207 int ioflags, aftat_ret = 1, s, adjust, v3, zeroing;
1208 struct mbuf *mb, *mreq, *mrep, *md;
1209 struct vnode *vp = NULL;
1210 struct uio io, *uiop = &io;
1211 u_quad_t cur_usec;
1212 struct mount *mntp = NULL;
1213
1214 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1215 #ifndef nolint
1216 i = 0;
1217 len = 0;
1218 #endif
1219 *mrq = NULL;
1220 if (*ndp) {
1221 nfsd = *ndp;
1222 *ndp = NULL;
1223 mrep = nfsd->nd_mrep;
1224 md = nfsd->nd_md;
1225 dpos = nfsd->nd_dpos;
1226 cred = &nfsd->nd_cr;
1227 v3 = (nfsd->nd_flag & ND_NFSV3);
1228 LIST_INIT(&nfsd->nd_coalesce);
1229 nfsd->nd_mreq = NULL;
1230 nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1231 cur_usec = nfs_curusec();
1232 nfsd->nd_time = cur_usec +
1233 (v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1234
1235 /*
1236 * Now, get the write header..
1237 */
1238 nfsm_srvmtofh(&nfsd->nd_fh);
1239 if (v3) {
1240 tl = nfsm_dissect(u_int32_t *, 5 * NFSX_UNSIGNED);
1241 nfsd->nd_off = fxdr_hyper(tl);
1242 tl += 3;
1243 nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1244 } else {
1245 tl = nfsm_dissect(u_int32_t *, 4 * NFSX_UNSIGNED);
1246 nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1247 tl += 2;
1248 if (nfs_async)
1249 nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1250 }
1251 len = fxdr_unsigned(int32_t, *tl);
1252 nfsd->nd_len = len;
1253 nfsd->nd_eoff = nfsd->nd_off + len;
1254
1255 /*
1256 * Trim the header out of the mbuf list and trim off any trailing
1257 * junk so that the mbuf list has only the write data.
1258 */
1259 zeroing = 1;
1260 i = 0;
1261 mp = mrep;
1262 while (mp) {
1263 if (mp == md) {
1264 zeroing = 0;
1265 adjust = dpos - mtod(mp, caddr_t);
1266 mp->m_len -= adjust;
1267 if (mp->m_len > 0 && adjust > 0)
1268 mp->m_data += adjust;
1269 }
1270 if (zeroing)
1271 mp->m_len = 0;
1272 else {
1273 i += mp->m_len;
1274 if (i > len) {
1275 mp->m_len -= (i - len);
1276 zeroing = 1;
1277 }
1278 }
1279 mp = mp->m_next;
1280 }
1281 if (len > NFS_MAXDATA || len < 0 || i < len) {
1282 nfsmout:
1283 m_freem(mrep);
1284 error = EIO;
1285 nfsm_writereply(2 * NFSX_UNSIGNED);
1286 if (v3)
1287 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1288 nfsd->nd_mreq = mreq;
1289 nfsd->nd_mrep = NULL;
1290 nfsd->nd_time = 0;
1291 }
1292
1293 /*
1294 * Add this entry to the hash and time queues.
1295 */
1296 s = splsoftclock();
1297 owp = NULL;
1298 wp = LIST_FIRST(&slp->ns_tq);
1299 while (wp && wp->nd_time < nfsd->nd_time) {
1300 owp = wp;
1301 wp = LIST_NEXT(wp, nd_tq);
1302 }
1303 NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1304 if (owp) {
1305 LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1306 } else {
1307 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1308 }
1309 if (nfsd->nd_mrep) {
1310 wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1311 owp = NULL;
1312 wp = LIST_FIRST(wpp);
1313 while (wp &&
1314 bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh, NFSX_V3FH)){
1315 owp = wp;
1316 wp = LIST_NEXT(wp, nd_hash);
1317 }
1318 while (wp && wp->nd_off < nfsd->nd_off &&
1319 !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh, NFSX_V3FH)) {
1320 owp = wp;
1321 wp = LIST_NEXT(wp, nd_hash);
1322 }
1323 if (owp) {
1324 LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1325
1326 /*
1327 * Search the hash list for overlapping entries and
1328 * coalesce.
1329 */
1330 for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1331 wp = LIST_NEXT(nfsd, nd_hash);
1332 if (NFSW_SAMECRED(owp, nfsd))
1333 nfsrvw_coalesce(owp, nfsd);
1334 }
1335 } else {
1336 LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1337 }
1338 }
1339 splx(s);
1340 }
1341
1342 /*
1343 * Now, do VOP_WRITE()s for any one(s) that need to be done now
1344 * and generate the associated reply mbuf list(s).
1345 */
1346 loop1:
1347 cur_usec = nfs_curusec();
1348 s = splsoftclock();
1349 for (nfsd = LIST_FIRST(&slp->ns_tq); nfsd; nfsd = owp) {
1350 owp = LIST_NEXT(nfsd, nd_tq);
1351 if (nfsd->nd_time > cur_usec)
1352 break;
1353 if (nfsd->nd_mreq)
1354 continue;
1355 NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1356 LIST_REMOVE(nfsd, nd_tq);
1357 LIST_REMOVE(nfsd, nd_hash);
1358 splx(s);
1359 mrep = nfsd->nd_mrep;
1360 nfsd->nd_mrep = NULL;
1361 cred = &nfsd->nd_cr;
1362 v3 = (nfsd->nd_flag & ND_NFSV3);
1363 forat_ret = aftat_ret = 1;
1364 error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp,
1365 nfsd->nd_nam, &rdonly, TRUE);
1366 if (!error) {
1367 if (v3)
1368 forat_ret = VOP_GETATTR(vp, &forat, cred, td);
1369 if (vp->v_type != VREG) {
1370 if (v3)
1371 error = EINVAL;
1372 else
1373 error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1374 }
1375 } else {
1376 vp = NULL;
1377 }
1378 if (!error)
1379 error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1);
1380 if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1381 ioflags = IO_NODELOCKED;
1382 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1383 ioflags = (IO_SYNC | IO_NODELOCKED);
1384 else
1385 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1386 uiop->uio_rw = UIO_WRITE;
1387 uiop->uio_segflg = UIO_SYSSPACE;
1388 uiop->uio_td = NULL;
1389 uiop->uio_offset = nfsd->nd_off;
1390 uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1391 if (uiop->uio_resid > 0) {
1392 mp = mrep;
1393 i = 0;
1394 while (mp) {
1395 if (mp->m_len > 0)
1396 i++;
1397 mp = mp->m_next;
1398 }
1399 uiop->uio_iovcnt = i;
1400 MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
1401 M_TEMP, M_WAITOK);
1402 uiop->uio_iov = ivp = iov;
1403 mp = mrep;
1404 while (mp) {
1405 if (mp->m_len > 0) {
1406 ivp->iov_base = mtod(mp, caddr_t);
1407 ivp->iov_len = mp->m_len;
1408 ivp++;
1409 }
1410 mp = mp->m_next;
1411 }
1412 if (!error) {
1413 if (vn_start_write(vp, &mntp, V_NOWAIT) != 0) {
1414 VOP_UNLOCK(vp, 0, td);
1415 error = vn_start_write(NULL, &mntp, V_WAIT);
1416 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1417 }
1418 }
1419 if (!error) {
1420 error = VOP_WRITE(vp, uiop, ioflags, cred);
1421 nfsrvstats.srvvop_writes++;
1422 vn_finished_write(mntp);
1423 }
1424 FREE((caddr_t)iov, M_TEMP);
1425 }
1426 m_freem(mrep);
1427 if (vp) {
1428 aftat_ret = VOP_GETATTR(vp, &va, cred, td);
1429 vput(vp);
1430 vp = NULL;
1431 }
1432
1433 /*
1434 * Loop around generating replies for all write rpcs that have
1435 * now been completed.
1436 */
1437 swp = nfsd;
1438 do {
1439 NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1440 if (error) {
1441 nfsm_writereply(NFSX_WCCDATA(v3));
1442 if (v3) {
1443 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1444 }
1445 } else {
1446 nfsm_writereply(NFSX_PREOPATTR(v3) +
1447 NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED +
1448 NFSX_WRITEVERF(v3));
1449 if (v3) {
1450 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1451 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
1452 *tl++ = txdr_unsigned(nfsd->nd_len);
1453 *tl++ = txdr_unsigned(swp->nd_stable);
1454 /*
1455 * Actually, there is no need to txdr these fields,
1456 * but it may make the values more human readable,
1457 * for debugging purposes.
1458 */
1459 if (nfsver.tv_sec == 0)
1460 nfsver = boottime;
1461 *tl++ = txdr_unsigned(nfsver.tv_sec);
1462 *tl = txdr_unsigned(nfsver.tv_usec);
1463 } else {
1464 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
1465 nfsm_srvfillattr(&va, fp);
1466 }
1467 }
1468 nfsd->nd_mreq = mreq;
1469 if (nfsd->nd_mrep)
1470 panic("nfsrv_write: nd_mrep not free");
1471
1472 /*
1473 * Done. Put it at the head of the timer queue so that
1474 * the final phase can return the reply.
1475 */
1476 s = splsoftclock();
1477 if (nfsd != swp) {
1478 nfsd->nd_time = 0;
1479 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1480 }
1481 nfsd = LIST_FIRST(&swp->nd_coalesce);
1482 if (nfsd) {
1483 LIST_REMOVE(nfsd, nd_tq);
1484 }
1485 splx(s);
1486 } while (nfsd);
1487 s = splsoftclock();
1488 swp->nd_time = 0;
1489 LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1490 splx(s);
1491 goto loop1;
1492 }
1493 splx(s);
1494
1495 /*
1496 * Search for a reply to return.
1497 */
1498 s = splsoftclock();
1499 LIST_FOREACH(nfsd, &slp->ns_tq, nd_tq)
1500 if (nfsd->nd_mreq) {
1501 NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1502 LIST_REMOVE(nfsd, nd_tq);
1503 *mrq = nfsd->nd_mreq;
1504 *ndp = nfsd;
1505 break;
1506 }
1507 splx(s);
1508 return (0);
1509 }
1510
1511 /*
1512 * Coalesce the write request nfsd into owp. To do this we must:
1513 * - remove nfsd from the queues
1514 * - merge nfsd->nd_mrep into owp->nd_mrep
1515 * - update the nd_eoff and nd_stable for owp
1516 * - put nfsd on owp's nd_coalesce list
1517 * NB: Must be called at splsoftclock().
1518 */
1519 static void
1520 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1521 {
1522 int overlap;
1523 struct mbuf *mp;
1524 struct nfsrv_descript *p;
1525
1526 NFS_DPF(WG, ("C%03x-%03x",
1527 nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1528 LIST_REMOVE(nfsd, nd_hash);
1529 LIST_REMOVE(nfsd, nd_tq);
1530 if (owp->nd_eoff < nfsd->nd_eoff) {
1531 overlap = owp->nd_eoff - nfsd->nd_off;
1532 if (overlap < 0)
1533 panic("nfsrv_coalesce: bad off");
1534 if (overlap > 0)
1535 m_adj(nfsd->nd_mrep, overlap);
1536 mp = owp->nd_mrep;
1537 while (mp->m_next)
1538 mp = mp->m_next;
1539 mp->m_next = nfsd->nd_mrep;
1540 owp->nd_eoff = nfsd->nd_eoff;
1541 } else
1542 m_freem(nfsd->nd_mrep);
1543 nfsd->nd_mrep = NULL;
1544 if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1545 owp->nd_stable = NFSV3WRITE_FILESYNC;
1546 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1547 owp->nd_stable == NFSV3WRITE_UNSTABLE)
1548 owp->nd_stable = NFSV3WRITE_DATASYNC;
1549 LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1550
1551 /*
1552 * If nfsd had anything else coalesced into it, transfer them
1553 * to owp, otherwise their replies will never get sent.
1554 */
1555 for (p = LIST_FIRST(&nfsd->nd_coalesce); p;
1556 p = LIST_FIRST(&nfsd->nd_coalesce)) {
1557 LIST_REMOVE(p, nd_tq);
1558 LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1559 }
1560 }
1561
1562 /*
1563 * nfs create service
1564 * now does a truncate to 0 length via. setattr if it already exists
1565 */
1566 int
1567 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1568 struct thread *td, struct mbuf **mrq)
1569 {
1570 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1571 struct sockaddr *nam = nfsd->nd_nam;
1572 caddr_t dpos = nfsd->nd_dpos;
1573 struct ucred *cred = &nfsd->nd_cr;
1574 struct nfs_fattr *fp;
1575 struct vattr va, dirfor, diraft;
1576 struct vattr *vap = &va;
1577 struct nfsv2_sattr *sp;
1578 u_int32_t *tl;
1579 struct nameidata nd;
1580 caddr_t bpos;
1581 int error = 0, rdev, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1582 int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0;
1583 caddr_t cp;
1584 struct mbuf *mb, *mreq;
1585 struct vnode *dirp = NULL;
1586 nfsfh_t nfh;
1587 fhandle_t *fhp;
1588 u_quad_t tempsize;
1589 u_char cverf[NFSX_V3CREATEVERF];
1590 struct mount *mp = NULL;
1591 struct vnode *vp;
1592
1593 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1594 #ifndef nolint
1595 rdev = 0;
1596 #endif
1597 ndclear(&nd);
1598
1599 fhp = &nfh.fh_generic;
1600 nfsm_srvmtofh(fhp);
1601 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
1602 error = ESTALE;
1603 goto ereply;
1604 }
1605 if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != 0) {
1606 mp = NULL;
1607 goto ereply;
1608 }
1609 (void) vn_start_write(vp, &mp, V_WAIT);
1610 vput(vp);
1611 nfsm_srvnamesiz(len);
1612
1613 nd.ni_cnd.cn_cred = cred;
1614 nd.ni_cnd.cn_nameiop = CREATE;
1615 nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
1616
1617 /*
1618 * Call namei and do initial cleanup to get a few things
1619 * out of the way. If we get an initial error we cleanup
1620 * and return here to avoid special-casing the invalid nd
1621 * structure through the rest of the case. dirp may be
1622 * set even if an error occurs, but the nd structure will not
1623 * be valid at all if an error occurs so we have to invalidate it
1624 * prior to calling nfsm_reply ( which might goto nfsmout ).
1625 */
1626 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
1627 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
1628 if (dirp && !v3) {
1629 vrele(dirp);
1630 dirp = NULL;
1631 }
1632 if (error) {
1633 nfsm_reply(NFSX_WCCDATA(v3));
1634 if (v3)
1635 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1636 error = 0;
1637 goto nfsmout;
1638 }
1639
1640 /*
1641 * No error. Continue. State:
1642 *
1643 * startdir is valid ( we release this immediately )
1644 * dirp may be valid
1645 * nd.ni_vp may be valid
1646 * nd.ni_dvp is valid
1647 *
1648 * The error state is set through the code and we may also do some
1649 * opportunistic releasing of vnodes to avoid holding locks through
1650 * NFS I/O. The cleanup at the end is a catch-all
1651 */
1652
1653 VATTR_NULL(vap);
1654 if (v3) {
1655 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
1656 how = fxdr_unsigned(int, *tl);
1657 switch (how) {
1658 case NFSV3CREATE_GUARDED:
1659 if (nd.ni_vp) {
1660 error = EEXIST;
1661 break;
1662 }
1663 /* fall through */
1664 case NFSV3CREATE_UNCHECKED:
1665 nfsm_srvsattr(vap);
1666 break;
1667 case NFSV3CREATE_EXCLUSIVE:
1668 cp = nfsm_dissect(caddr_t, NFSX_V3CREATEVERF);
1669 bcopy(cp, cverf, NFSX_V3CREATEVERF);
1670 exclusive_flag = 1;
1671 break;
1672 };
1673 vap->va_type = VREG;
1674 } else {
1675 sp = nfsm_dissect(struct nfsv2_sattr *, NFSX_V2SATTR);
1676 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1677 if (vap->va_type == VNON)
1678 vap->va_type = VREG;
1679 vap->va_mode = nfstov_mode(sp->sa_mode);
1680 switch (vap->va_type) {
1681 case VREG:
1682 tsize = fxdr_unsigned(int32_t, sp->sa_size);
1683 if (tsize != -1)
1684 vap->va_size = (u_quad_t)tsize;
1685 break;
1686 case VCHR:
1687 case VBLK:
1688 case VFIFO:
1689 rdev = fxdr_unsigned(long, sp->sa_size);
1690 break;
1691 default:
1692 break;
1693 };
1694 }
1695
1696 /*
1697 * Iff doesn't exist, create it
1698 * otherwise just truncate to 0 length
1699 * should I set the mode too ?
1700 *
1701 * The only possible error we can have at this point is EEXIST.
1702 * nd.ni_vp will also be non-NULL in that case.
1703 */
1704 if (nd.ni_vp == NULL) {
1705 if (vap->va_mode == (mode_t)VNOVAL)
1706 vap->va_mode = 0;
1707 if (vap->va_type == VREG || vap->va_type == VSOCK) {
1708 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1709 if (error)
1710 NDFREE(&nd, NDF_ONLY_PNBUF);
1711 else {
1712 nfsrv_object_create(nd.ni_vp);
1713 if (exclusive_flag) {
1714 exclusive_flag = 0;
1715 VATTR_NULL(vap);
1716 bcopy(cverf, (caddr_t)&vap->va_atime,
1717 NFSX_V3CREATEVERF);
1718 error = VOP_SETATTR(nd.ni_vp, vap, cred,
1719 td);
1720 }
1721 }
1722 } else if (vap->va_type == VCHR || vap->va_type == VBLK ||
1723 vap->va_type == VFIFO) {
1724 /*
1725 * NFSv2-specific code for creating device nodes
1726 * and fifos.
1727 *
1728 * Handle SysV FIFO node special cases. All other
1729 * devices require super user to access.
1730 */
1731 if (vap->va_type == VCHR && rdev == 0xffffffff)
1732 vap->va_type = VFIFO;
1733 if (vap->va_type != VFIFO &&
1734 (error = suser_cred(cred, 0))) {
1735 goto ereply;
1736 }
1737 vap->va_rdev = rdev;
1738 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1739 if (error) {
1740 NDFREE(&nd, NDF_ONLY_PNBUF);
1741 goto ereply;
1742 }
1743 vput(nd.ni_vp);
1744 nd.ni_vp = NULL;
1745
1746 /*
1747 * release dvp prior to lookup
1748 */
1749 vput(nd.ni_dvp);
1750 nd.ni_dvp = NULL;
1751
1752 /*
1753 * Setup for lookup.
1754 *
1755 * Even though LOCKPARENT was cleared, ni_dvp may
1756 * be garbage.
1757 */
1758 nd.ni_cnd.cn_nameiop = LOOKUP;
1759 nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
1760 nd.ni_cnd.cn_thread = td;
1761 nd.ni_cnd.cn_cred = cred;
1762
1763 error = lookup(&nd);
1764 nd.ni_dvp = NULL;
1765 if (error)
1766 goto ereply;
1767
1768 nfsrv_object_create(nd.ni_vp);
1769 if (nd.ni_cnd.cn_flags & ISSYMLINK) {
1770 error = EINVAL;
1771 goto ereply;
1772 }
1773 } else {
1774 error = ENXIO;
1775 }
1776 } else {
1777 if (vap->va_size != -1) {
1778 error = nfsrv_access(nd.ni_vp, VWRITE, cred,
1779 (nd.ni_cnd.cn_flags & RDONLY), td, 0);
1780 if (!error) {
1781 tempsize = vap->va_size;
1782 VATTR_NULL(vap);
1783 vap->va_size = tempsize;
1784 error = VOP_SETATTR(nd.ni_vp, vap, cred,
1785 td);
1786 }
1787 }
1788 }
1789
1790 if (!error) {
1791 bzero((caddr_t)fhp, sizeof(nfh));
1792 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
1793 error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
1794 if (!error)
1795 error = VOP_GETATTR(nd.ni_vp, vap, cred, td);
1796 }
1797 if (v3) {
1798 if (exclusive_flag && !error &&
1799 bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1800 error = EEXIST;
1801 if (dirp == nd.ni_dvp)
1802 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
1803 else {
1804 /* Drop the other locks to avoid deadlock. */
1805 if (nd.ni_dvp) {
1806 if (nd.ni_dvp == nd.ni_vp)
1807 vrele(nd.ni_dvp);
1808 else
1809 vput(nd.ni_dvp);
1810 }
1811 if (nd.ni_vp)
1812 vput(nd.ni_vp);
1813 nd.ni_dvp = NULL;
1814 nd.ni_vp = NULL;
1815
1816 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
1817 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
1818 VOP_UNLOCK(dirp, 0, td);
1819 }
1820 }
1821 ereply:
1822 nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
1823 if (v3) {
1824 if (!error) {
1825 nfsm_srvpostop_fh(fhp);
1826 nfsm_srvpostop_attr(0, vap);
1827 }
1828 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1829 } else if (!error) {
1830 /* v2 non-error case. */
1831 nfsm_srvfhtom(fhp, v3);
1832 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
1833 nfsm_srvfillattr(vap, fp);
1834 }
1835 error = 0;
1836
1837 nfsmout:
1838 if (nd.ni_startdir) {
1839 vrele(nd.ni_startdir);
1840 nd.ni_startdir = NULL;
1841 }
1842 if (dirp)
1843 vrele(dirp);
1844 NDFREE(&nd, NDF_ONLY_PNBUF);
1845 if (nd.ni_dvp) {
1846 if (nd.ni_dvp == nd.ni_vp)
1847 vrele(nd.ni_dvp);
1848 else
1849 vput(nd.ni_dvp);
1850 }
1851 if (nd.ni_vp)
1852 vput(nd.ni_vp);
1853 vn_finished_write(mp);
1854 return (error);
1855 }
1856
1857 /*
1858 * nfs v3 mknod service
1859 */
1860 int
1861 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1862 struct thread *td, struct mbuf **mrq)
1863 {
1864 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1865 struct sockaddr *nam = nfsd->nd_nam;
1866 caddr_t dpos = nfsd->nd_dpos;
1867 struct ucred *cred = &nfsd->nd_cr;
1868 struct vattr va, dirfor, diraft;
1869 struct vattr *vap = &va;
1870 u_int32_t *tl;
1871 struct nameidata nd;
1872 caddr_t bpos;
1873 int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1874 u_int32_t major, minor;
1875 enum vtype vtyp;
1876 struct mbuf *mb, *mreq;
1877 struct vnode *vp, *dirp = NULL;
1878 nfsfh_t nfh;
1879 fhandle_t *fhp;
1880 struct mount *mp = NULL;
1881 int v3 = (nfsd->nd_flag & ND_NFSV3);
1882
1883 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1884 if (!v3)
1885 panic("nfsrv_mknod: v3 proc called on a v2 connection");
1886 ndclear(&nd);
1887
1888 fhp = &nfh.fh_generic;
1889 nfsm_srvmtofh(fhp);
1890 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
1891 error = ESTALE;
1892 goto ereply;
1893 }
1894 if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != 0) {
1895 mp = NULL;
1896 goto ereply;
1897 }
1898 (void) vn_start_write(vp, &mp, V_WAIT);
1899 vput(vp);
1900 vp = NULL;
1901 nfsm_srvnamesiz(len);
1902
1903 nd.ni_cnd.cn_cred = cred;
1904 nd.ni_cnd.cn_nameiop = CREATE;
1905 nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
1906
1907 /*
1908 * Handle nfs_namei() call. If an error occurs, the nd structure
1909 * is not valid. However, nfsm_*() routines may still jump to
1910 * nfsmout.
1911 */
1912
1913 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
1914 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
1915 if (error) {
1916 nfsm_reply(NFSX_WCCDATA(1));
1917 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1918 error = 0;
1919 goto nfsmout;
1920 }
1921 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
1922 vtyp = nfsv3tov_type(*tl);
1923 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1924 error = NFSERR_BADTYPE;
1925 goto out;
1926 }
1927 VATTR_NULL(vap);
1928 nfsm_srvsattr(vap);
1929 if (vtyp == VCHR || vtyp == VBLK) {
1930 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
1931 major = fxdr_unsigned(u_int32_t, *tl++);
1932 minor = fxdr_unsigned(u_int32_t, *tl);
1933 vap->va_rdev = makeudev(major, minor);
1934 }
1935
1936 /*
1937 * Iff doesn't exist, create it.
1938 */
1939 if (nd.ni_vp) {
1940 error = EEXIST;
1941 goto out;
1942 }
1943 vap->va_type = vtyp;
1944 if (vap->va_mode == (mode_t)VNOVAL)
1945 vap->va_mode = 0;
1946 if (vtyp == VSOCK) {
1947 vrele(nd.ni_startdir);
1948 nd.ni_startdir = NULL;
1949 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1950 if (error)
1951 NDFREE(&nd, NDF_ONLY_PNBUF);
1952 } else {
1953 if (vtyp != VFIFO && (error = suser_cred(cred, 0)))
1954 goto out;
1955 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1956 if (error) {
1957 NDFREE(&nd, NDF_ONLY_PNBUF);
1958 goto out;
1959 }
1960 vput(nd.ni_vp);
1961 nd.ni_vp = NULL;
1962
1963 /*
1964 * Release dvp prior to lookup
1965 */
1966 vput(nd.ni_dvp);
1967 nd.ni_dvp = NULL;
1968
1969 nd.ni_cnd.cn_nameiop = LOOKUP;
1970 nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
1971 nd.ni_cnd.cn_thread = td;
1972 nd.ni_cnd.cn_cred = td->td_ucred;
1973
1974 error = lookup(&nd);
1975 nd.ni_dvp = NULL;
1976
1977 if (error)
1978 goto out;
1979 if (nd.ni_cnd.cn_flags & ISSYMLINK)
1980 error = EINVAL;
1981 }
1982
1983 /*
1984 * send response, cleanup, return.
1985 */
1986 out:
1987 if (nd.ni_startdir) {
1988 vrele(nd.ni_startdir);
1989 nd.ni_startdir = NULL;
1990 }
1991 NDFREE(&nd, NDF_ONLY_PNBUF);
1992 if (nd.ni_dvp) {
1993 if (nd.ni_dvp == nd.ni_vp)
1994 vrele(nd.ni_dvp);
1995 else
1996 vput(nd.ni_dvp);
1997 nd.ni_dvp = NULL;
1998 }
1999 vp = nd.ni_vp;
2000 if (!error) {
2001 bzero((caddr_t)fhp, sizeof(nfh));
2002 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
2003 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2004 if (!error)
2005 error = VOP_GETATTR(vp, vap, cred, td);
2006 }
2007 if (vp) {
2008 vput(vp);
2009 vp = NULL;
2010 nd.ni_vp = NULL;
2011 }
2012 if (dirp) {
2013 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
2014 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2015 VOP_UNLOCK(dirp, 0, td);
2016 }
2017 ereply:
2018 nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1));
2019 if (v3) {
2020 if (!error) {
2021 nfsm_srvpostop_fh(fhp);
2022 nfsm_srvpostop_attr(0, vap);
2023 }
2024 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2025 }
2026 vn_finished_write(mp);
2027 return (0);
2028 nfsmout:
2029 if (dirp)
2030 vrele(dirp);
2031 if (nd.ni_startdir)
2032 vrele(nd.ni_startdir);
2033 NDFREE(&nd, NDF_ONLY_PNBUF);
2034 if (nd.ni_dvp) {
2035 if (nd.ni_dvp == nd.ni_vp)
2036 vrele(nd.ni_dvp);
2037 else
2038 vput(nd.ni_dvp);
2039 }
2040 if (nd.ni_vp)
2041 vput(nd.ni_vp);
2042 vn_finished_write(mp);
2043 return (error);
2044 }
2045
2046 /*
2047 * nfs remove service
2048 */
2049 int
2050 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2051 struct thread *td, struct mbuf **mrq)
2052 {
2053 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2054 struct sockaddr *nam = nfsd->nd_nam;
2055 caddr_t dpos = nfsd->nd_dpos;
2056 struct ucred *cred = &nfsd->nd_cr;
2057 struct nameidata nd;
2058 caddr_t bpos;
2059 int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2060 int v3 = (nfsd->nd_flag & ND_NFSV3);
2061 struct mbuf *mb, *mreq;
2062 struct vnode *dirp;
2063 struct vattr dirfor, diraft;
2064 nfsfh_t nfh;
2065 fhandle_t *fhp;
2066 struct mount *mp = NULL;
2067 struct vnode *vp;
2068
2069 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2070 ndclear(&nd);
2071
2072 fhp = &nfh.fh_generic;
2073 nfsm_srvmtofh(fhp);
2074 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2075 error = ESTALE;
2076 goto ereply;
2077 }
2078 if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != 0) {
2079 mp = NULL;
2080 goto ereply;
2081 }
2082 (void) vn_start_write(vp, &mp, V_WAIT);
2083 vput(vp);
2084 vp = NULL;
2085 nfsm_srvnamesiz(len);
2086
2087 nd.ni_cnd.cn_cred = cred;
2088 nd.ni_cnd.cn_nameiop = DELETE;
2089 nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
2090 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
2091 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
2092 if (dirp && !v3) {
2093 vrele(dirp);
2094 dirp = NULL;
2095 }
2096 if (error == 0) {
2097 if (nd.ni_vp->v_type == VDIR) {
2098 error = EPERM; /* POSIX */
2099 goto out;
2100 }
2101 /*
2102 * The root of a mounted filesystem cannot be deleted.
2103 */
2104 if (nd.ni_vp->v_vflag & VV_ROOT) {
2105 error = EBUSY;
2106 goto out;
2107 }
2108 out:
2109 if (!error) {
2110 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2111 NDFREE(&nd, NDF_ONLY_PNBUF);
2112 }
2113 }
2114 if (dirp && v3) {
2115 if (dirp == nd.ni_dvp)
2116 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2117 else {
2118 /* Drop the other locks to avoid deadlock. */
2119 if (nd.ni_dvp) {
2120 if (nd.ni_dvp == nd.ni_vp)
2121 vrele(nd.ni_dvp);
2122 else
2123 vput(nd.ni_dvp);
2124 }
2125 if (nd.ni_vp)
2126 vput(nd.ni_vp);
2127 nd.ni_dvp = NULL;
2128 nd.ni_vp = NULL;
2129
2130 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
2131 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2132 VOP_UNLOCK(dirp, 0, td);
2133 }
2134 vrele(dirp);
2135 dirp = NULL;
2136 }
2137 ereply:
2138 nfsm_reply(NFSX_WCCDATA(v3));
2139 if (v3) {
2140 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2141 error = 0;
2142 }
2143 nfsmout:
2144 NDFREE(&nd, NDF_ONLY_PNBUF);
2145 if (nd.ni_dvp) {
2146 if (nd.ni_dvp == nd.ni_vp)
2147 vrele(nd.ni_dvp);
2148 else
2149 vput(nd.ni_dvp);
2150 }
2151 if (nd.ni_vp)
2152 vput(nd.ni_vp);
2153 vn_finished_write(mp);
2154 return(error);
2155 }
2156
2157 /*
2158 * nfs rename service
2159 */
2160 int
2161 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2162 struct thread *td, struct mbuf **mrq)
2163 {
2164 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2165 struct sockaddr *nam = nfsd->nd_nam;
2166 caddr_t dpos = nfsd->nd_dpos;
2167 struct ucred *cred = &nfsd->nd_cr;
2168 caddr_t bpos;
2169 int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2170 int tdirfor_ret = 1, tdiraft_ret = 1;
2171 int v3 = (nfsd->nd_flag & ND_NFSV3);
2172 struct mbuf *mb, *mreq;
2173 struct nameidata fromnd, tond;
2174 struct vnode *fvp, *tvp, *tdvp, *fdirp = NULL;
2175 struct vnode *tdirp = NULL;
2176 struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2177 nfsfh_t fnfh, tnfh;
2178 fhandle_t *ffhp, *tfhp;
2179 uid_t saved_uid;
2180 struct mount *mp = NULL;
2181 struct vnode *vp;
2182
2183 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2184 #ifndef nolint
2185 fvp = NULL;
2186 #endif
2187 ffhp = &fnfh.fh_generic;
2188 tfhp = &tnfh.fh_generic;
2189
2190 /*
2191 * Clear fields incase goto nfsmout occurs from macro.
2192 */
2193
2194 ndclear(&fromnd);
2195 ndclear(&tond);
2196
2197 nfsm_srvmtofh(ffhp);
2198 if ((mp = vfs_getvfs(&ffhp->fh_fsid)) == NULL) {
2199 error = ESTALE;
2200 goto out1;
2201 }
2202 if ((error = VFS_FHTOVP(mp, &ffhp->fh_fid, &vp)) != 0) {
2203 mp = NULL;
2204 goto out1;
2205 }
2206 (void) vn_start_write(vp, &mp, V_WAIT);
2207 vput(vp);
2208 vp = NULL;
2209 nfsm_srvnamesiz(len);
2210 /*
2211 * Remember our original uid so that we can reset cr_uid before
2212 * the second nfs_namei() call, in case it is remapped.
2213 */
2214 saved_uid = cred->cr_uid;
2215 fromnd.ni_cnd.cn_cred = cred;
2216 fromnd.ni_cnd.cn_nameiop = DELETE;
2217 fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART;
2218 error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md,
2219 &dpos, &fdirp, v3, &fdirfor, &fdirfor_ret, td, FALSE);
2220 if (fdirp && !v3) {
2221 vrele(fdirp);
2222 fdirp = NULL;
2223 }
2224 if (error) {
2225 nfsm_reply(2 * NFSX_WCCDATA(v3));
2226 if (v3) {
2227 nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
2228 nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
2229 }
2230 error = 0;
2231 goto nfsmout;
2232 }
2233 fvp = fromnd.ni_vp;
2234 nfsm_srvmtofh(tfhp);
2235 nfsm_srvnamesiz(len2);
2236 cred->cr_uid = saved_uid;
2237 tond.ni_cnd.cn_cred = cred;
2238 tond.ni_cnd.cn_nameiop = RENAME;
2239 tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
2240 error = nfs_namei(&tond, tfhp, len2, slp, nam, &md,
2241 &dpos, &tdirp, v3, &tdirfor, &tdirfor_ret, td, FALSE);
2242 if (tdirp && !v3) {
2243 vrele(tdirp);
2244 tdirp = NULL;
2245 }
2246 if (error)
2247 goto out1;
2248
2249 tdvp = tond.ni_dvp;
2250 tvp = tond.ni_vp;
2251 if (tvp != NULL) {
2252 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2253 if (v3)
2254 error = EEXIST;
2255 else
2256 error = EISDIR;
2257 goto out;
2258 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2259 if (v3)
2260 error = EEXIST;
2261 else
2262 error = ENOTDIR;
2263 goto out;
2264 }
2265 if (tvp->v_type == VDIR && tvp->v_mountedhere) {
2266 if (v3)
2267 error = EXDEV;
2268 else
2269 error = ENOTEMPTY;
2270 goto out;
2271 }
2272 }
2273 if (fvp->v_type == VDIR && fvp->v_mountedhere) {
2274 if (v3)
2275 error = EXDEV;
2276 else
2277 error = ENOTEMPTY;
2278 goto out;
2279 }
2280 if (fvp->v_mount != tdvp->v_mount) {
2281 if (v3)
2282 error = EXDEV;
2283 else
2284 error = ENOTEMPTY;
2285 goto out;
2286 }
2287 if (fvp == tdvp) {
2288 if (v3)
2289 error = EINVAL;
2290 else
2291 error = ENOTEMPTY;
2292 }
2293 /*
2294 * If source is the same as the destination (that is the
2295 * same vnode with the same name in the same directory),
2296 * then there is nothing to do.
2297 */
2298 if (fvp == tvp && fromnd.ni_dvp == tdvp &&
2299 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2300 !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
2301 fromnd.ni_cnd.cn_namelen))
2302 error = -1;
2303 out:
2304 if (!error) {
2305 /*
2306 * The VOP_RENAME function releases all vnode references &
2307 * locks prior to returning so we need to clear the pointers
2308 * to bypass cleanup code later on.
2309 */
2310 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2311 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2312 fromnd.ni_dvp = NULL;
2313 fromnd.ni_vp = NULL;
2314 tond.ni_dvp = NULL;
2315 tond.ni_vp = NULL;
2316 if (error) {
2317 fromnd.ni_cnd.cn_flags &= ~HASBUF;
2318 tond.ni_cnd.cn_flags &= ~HASBUF;
2319 }
2320 } else {
2321 if (error == -1)
2322 error = 0;
2323 }
2324 /* fall through */
2325
2326 out1:
2327 nfsm_reply(2 * NFSX_WCCDATA(v3));
2328 if (v3) {
2329 /* Release existing locks to prevent deadlock. */
2330 if (tond.ni_dvp) {
2331 if (tond.ni_dvp == tond.ni_vp)
2332 vrele(tond.ni_dvp);
2333 else
2334 vput(tond.ni_dvp);
2335 }
2336 if (tond.ni_vp)
2337 vput(tond.ni_vp);
2338 tond.ni_dvp = NULL;
2339 tond.ni_vp = NULL;
2340
2341 if (fdirp) {
2342 vn_lock(fdirp, LK_EXCLUSIVE | LK_RETRY, td);
2343 fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, td);
2344 VOP_UNLOCK(fdirp, 0, td);
2345 }
2346 if (tdirp) {
2347 vn_lock(tdirp, LK_EXCLUSIVE | LK_RETRY, td);
2348 tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, td);
2349 VOP_UNLOCK(tdirp, 0, td);
2350 }
2351 nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
2352 nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
2353 }
2354 error = 0;
2355 /* fall through */
2356
2357 nfsmout:
2358 /*
2359 * Clear out tond related fields
2360 */
2361 if (tdirp)
2362 vrele(tdirp);
2363 if (tond.ni_startdir)
2364 vrele(tond.ni_startdir);
2365 NDFREE(&tond, NDF_ONLY_PNBUF);
2366 if (tond.ni_dvp) {
2367 if (tond.ni_dvp == tond.ni_vp)
2368 vrele(tond.ni_dvp);
2369 else
2370 vput(tond.ni_dvp);
2371 }
2372 if (tond.ni_vp)
2373 vput(tond.ni_vp);
2374
2375 /*
2376 * Clear out fromnd related fields
2377 */
2378 if (fdirp)
2379 vrele(fdirp);
2380 if (fromnd.ni_startdir)
2381 vrele(fromnd.ni_startdir);
2382 NDFREE(&fromnd, NDF_ONLY_PNBUF);
2383 if (fromnd.ni_dvp)
2384 vrele(fromnd.ni_dvp);
2385 if (fromnd.ni_vp)
2386 vrele(fromnd.ni_vp);
2387
2388 vn_finished_write(mp);
2389 return (error);
2390 }
2391
2392 /*
2393 * nfs link service
2394 */
2395 int
2396 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2397 struct thread *td, struct mbuf **mrq)
2398 {
2399 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2400 struct sockaddr *nam = nfsd->nd_nam;
2401 caddr_t dpos = nfsd->nd_dpos;
2402 struct ucred *cred = &nfsd->nd_cr;
2403 struct nameidata nd;
2404 caddr_t bpos;
2405 int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2406 int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3);
2407 struct mbuf *mb, *mreq;
2408 struct vnode *vp = NULL, *xp, *dirp = NULL;
2409 struct vattr dirfor, diraft, at;
2410 nfsfh_t nfh, dnfh;
2411 fhandle_t *fhp, *dfhp;
2412 struct mount *mp = NULL;
2413
2414 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2415 ndclear(&nd);
2416
2417 fhp = &nfh.fh_generic;
2418 dfhp = &dnfh.fh_generic;
2419 nfsm_srvmtofh(fhp);
2420 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2421 error = ESTALE;
2422 goto ereply;
2423 }
2424 if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != 0) {
2425 mp = NULL;
2426 goto ereply;
2427 }
2428 (void) vn_start_write(vp, &mp, V_WAIT);
2429 vput(vp);
2430 vp = NULL;
2431 nfsm_srvmtofh(dfhp);
2432 nfsm_srvnamesiz(len);
2433
2434 error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, slp, nam, &rdonly, TRUE);
2435 if (error) {
2436 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2437 if (v3) {
2438 nfsm_srvpostop_attr(getret, &at);
2439 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2440 }
2441 vp = NULL;
2442 error = 0;
2443 goto nfsmout;
2444 }
2445 if (v3)
2446 getret = VOP_GETATTR(vp, &at, cred, td);
2447 if (vp->v_type == VDIR) {
2448 error = EPERM; /* POSIX */
2449 goto out1;
2450 }
2451 VOP_UNLOCK(vp, 0, td);
2452 nd.ni_cnd.cn_cred = cred;
2453 nd.ni_cnd.cn_nameiop = CREATE;
2454 nd.ni_cnd.cn_flags = LOCKPARENT;
2455 error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos,
2456 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
2457 if (dirp && !v3) {
2458 vrele(dirp);
2459 dirp = NULL;
2460 }
2461 if (error) {
2462 vrele(vp);
2463 vp = NULL;
2464 goto out2;
2465 }
2466 xp = nd.ni_vp;
2467 if (xp != NULL) {
2468 error = EEXIST;
2469 vrele(vp);
2470 vp = NULL;
2471 goto out2;
2472 }
2473 xp = nd.ni_dvp;
2474 if (vp->v_mount != xp->v_mount) {
2475 error = EXDEV;
2476 vrele(vp);
2477 vp = NULL;
2478 goto out2;
2479 }
2480 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2481 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2482 NDFREE(&nd, NDF_ONLY_PNBUF);
2483 /* fall through */
2484
2485 out1:
2486 if (v3)
2487 getret = VOP_GETATTR(vp, &at, cred, td);
2488 out2:
2489 if (dirp) {
2490 if (dirp == nd.ni_dvp)
2491 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2492 else {
2493 /* Release existing locks to prevent deadlock. */
2494 if (nd.ni_dvp) {
2495 if (nd.ni_dvp == nd.ni_vp)
2496 vrele(nd.ni_dvp);
2497 else
2498 vput(nd.ni_dvp);
2499 }
2500 if (nd.ni_vp)
2501 vrele(nd.ni_vp);
2502 nd.ni_dvp = NULL;
2503 nd.ni_vp = NULL;
2504
2505 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
2506 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2507 VOP_UNLOCK(dirp, 0, td);
2508 }
2509 }
2510 ereply:
2511 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2512 if (v3) {
2513 nfsm_srvpostop_attr(getret, &at);
2514 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2515 error = 0;
2516 }
2517 /* fall through */
2518
2519 nfsmout:
2520 NDFREE(&nd, NDF_ONLY_PNBUF);
2521 if (dirp)
2522 vrele(dirp);
2523 if (vp)
2524 vput(vp);
2525 if (nd.ni_dvp) {
2526 if (nd.ni_dvp == nd.ni_vp)
2527 vrele(nd.ni_dvp);
2528 else
2529 vput(nd.ni_dvp);
2530 }
2531 if (nd.ni_vp)
2532 vrele(nd.ni_vp);
2533 vn_finished_write(mp);
2534 return(error);
2535 }
2536
2537 /*
2538 * nfs symbolic link service
2539 */
2540 int
2541 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2542 struct thread *td, struct mbuf **mrq)
2543 {
2544 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2545 struct sockaddr *nam = nfsd->nd_nam;
2546 caddr_t dpos = nfsd->nd_dpos;
2547 struct ucred *cred = &nfsd->nd_cr;
2548 struct vattr va, dirfor, diraft;
2549 struct nameidata nd;
2550 struct vattr *vap = &va;
2551 struct nfsv2_sattr *sp;
2552 char *bpos, *pathcp = NULL;
2553 struct uio io;
2554 struct iovec iv;
2555 int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2556 int v3 = (nfsd->nd_flag & ND_NFSV3);
2557 struct mbuf *mb, *mreq;
2558 struct vnode *dirp = NULL;
2559 nfsfh_t nfh;
2560 fhandle_t *fhp;
2561 struct mount *mp = NULL;
2562 struct vnode *vp;
2563
2564 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2565 ndclear(&nd);
2566
2567 fhp = &nfh.fh_generic;
2568 nfsm_srvmtofh(fhp);
2569 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2570 error = ESTALE;
2571 goto out;
2572 }
2573 if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != 0) {
2574 mp = NULL;
2575 goto out;
2576 }
2577 (void) vn_start_write(vp, &mp, V_WAIT);
2578 vput(vp);
2579 vp = NULL;
2580 nfsm_srvnamesiz(len);
2581 nd.ni_cnd.cn_cred = cred;
2582 nd.ni_cnd.cn_nameiop = CREATE;
2583 nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART;
2584 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
2585 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
2586 if (dirp && !v3) {
2587 vrele(dirp);
2588 dirp = NULL;
2589 }
2590 if (error)
2591 goto out;
2592
2593 VATTR_NULL(vap);
2594 if (v3)
2595 nfsm_srvsattr(vap);
2596 nfsm_srvpathsiz(len2);
2597 MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
2598 iv.iov_base = pathcp;
2599 iv.iov_len = len2;
2600 io.uio_resid = len2;
2601 io.uio_offset = 0;
2602 io.uio_iov = &iv;
2603 io.uio_iovcnt = 1;
2604 io.uio_segflg = UIO_SYSSPACE;
2605 io.uio_rw = UIO_READ;
2606 io.uio_td = NULL;
2607 nfsm_mtouio(&io, len2);
2608 if (!v3) {
2609 sp = nfsm_dissect(struct nfsv2_sattr *, NFSX_V2SATTR);
2610 vap->va_mode = nfstov_mode(sp->sa_mode);
2611 }
2612 *(pathcp + len2) = '\0';
2613 if (nd.ni_vp) {
2614 error = EEXIST;
2615 goto out;
2616 }
2617
2618 /*
2619 * issue symlink op. SAVESTART is set so the underlying path component
2620 * is only freed by the VOP if an error occurs.
2621 */
2622 if (vap->va_mode == (mode_t)VNOVAL)
2623 vap->va_mode = 0;
2624 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
2625 if (error)
2626 NDFREE(&nd, NDF_ONLY_PNBUF);
2627 else
2628 vput(nd.ni_vp);
2629 nd.ni_vp = NULL;
2630 /*
2631 * releases directory prior to potential lookup op.
2632 */
2633 vput(nd.ni_dvp);
2634 nd.ni_dvp = NULL;
2635
2636 if (error == 0) {
2637 if (v3) {
2638 /*
2639 * Issue lookup. Leave SAVESTART set so we can easily free
2640 * the name buffer later on.
2641 *
2642 * since LOCKPARENT is not set, ni_dvp will be garbage on
2643 * return whether an error occurs or not.
2644 */
2645 nd.ni_cnd.cn_nameiop = LOOKUP;
2646 nd.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW);
2647 nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF);
2648 nd.ni_cnd.cn_thread = td;
2649 nd.ni_cnd.cn_cred = cred;
2650
2651 error = lookup(&nd);
2652 nd.ni_dvp = NULL;
2653
2654 if (error == 0) {
2655 bzero((caddr_t)fhp, sizeof(nfh));
2656 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
2657 error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
2658 if (!error)
2659 error = VOP_GETATTR(nd.ni_vp, vap, cred,
2660 td);
2661 vput(nd.ni_vp);
2662 nd.ni_vp = NULL;
2663 }
2664 }
2665 }
2666 out:
2667 /*
2668 * These releases aren't strictly required, does even doing them
2669 * make any sense? XXX can nfsm_reply() block?
2670 */
2671 if (pathcp) {
2672 FREE(pathcp, M_TEMP);
2673 pathcp = NULL;
2674 }
2675 if (dirp) {
2676 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
2677 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2678 VOP_UNLOCK(dirp, 0, td);
2679 }
2680 if (nd.ni_startdir) {
2681 vrele(nd.ni_startdir);
2682 nd.ni_startdir = NULL;
2683 }
2684 nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2685 if (v3) {
2686 if (!error) {
2687 nfsm_srvpostop_fh(fhp);
2688 nfsm_srvpostop_attr(0, vap);
2689 }
2690 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2691 }
2692 error = 0;
2693 /* fall through */
2694
2695 nfsmout:
2696 NDFREE(&nd, NDF_ONLY_PNBUF);
2697 if (nd.ni_dvp) {
2698 if (nd.ni_dvp == nd.ni_vp)
2699 vrele(nd.ni_dvp);
2700 else
2701 vput(nd.ni_dvp);
2702 }
2703 if (nd.ni_vp)
2704 vrele(nd.ni_vp);
2705 if (nd.ni_startdir)
2706 vrele(nd.ni_startdir);
2707 if (dirp)
2708 vrele(dirp);
2709 if (pathcp)
2710 FREE(pathcp, M_TEMP);
2711
2712 vn_finished_write(mp);
2713 return (error);
2714 }
2715
2716 /*
2717 * nfs mkdir service
2718 */
2719 int
2720 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2721 struct thread *td, struct mbuf **mrq)
2722 {
2723 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2724 struct sockaddr *nam = nfsd->nd_nam;
2725 caddr_t dpos = nfsd->nd_dpos;
2726 struct ucred *cred = &nfsd->nd_cr;
2727 struct vattr va, dirfor, diraft;
2728 struct vattr *vap = &va;
2729 struct nfs_fattr *fp;
2730 struct nameidata nd;
2731 u_int32_t *tl;
2732 caddr_t bpos;
2733 int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2734 int v3 = (nfsd->nd_flag & ND_NFSV3);
2735 struct mbuf *mb, *mreq;
2736 struct vnode *dirp = NULL;
2737 int vpexcl = 0;
2738 nfsfh_t nfh;
2739 fhandle_t *fhp;
2740 struct mount *mp = NULL;
2741 struct vnode *vp;
2742
2743 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2744 ndclear(&nd);
2745
2746 fhp = &nfh.fh_generic;
2747 nfsm_srvmtofh(fhp);
2748 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2749 error = ESTALE;
2750 goto out;
2751 }
2752 if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != 0) {
2753 mp = NULL;
2754 goto out;
2755 }
2756 (void) vn_start_write(vp, &mp, V_WAIT);
2757 vput(vp);
2758 vp = NULL;
2759 nfsm_srvnamesiz(len);
2760 nd.ni_cnd.cn_cred = cred;
2761 nd.ni_cnd.cn_nameiop = CREATE;
2762 nd.ni_cnd.cn_flags = LOCKPARENT;
2763
2764 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
2765 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
2766 if (dirp && !v3) {
2767 vrele(dirp);
2768 dirp = NULL;
2769 }
2770 if (error) {
2771 nfsm_reply(NFSX_WCCDATA(v3));
2772 if (v3)
2773 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2774 error = 0;
2775 goto nfsmout;
2776 }
2777 VATTR_NULL(vap);
2778 if (v3) {
2779 nfsm_srvsattr(vap);
2780 } else {
2781 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2782 vap->va_mode = nfstov_mode(*tl++);
2783 }
2784
2785 /*
2786 * At this point nd.ni_dvp is referenced and exclusively locked and
2787 * nd.ni_vp, if it exists, is referenced but not locked.
2788 */
2789
2790 vap->va_type = VDIR;
2791 if (nd.ni_vp != NULL) {
2792 NDFREE(&nd, NDF_ONLY_PNBUF);
2793 error = EEXIST;
2794 goto out;
2795 }
2796
2797 /*
2798 * Issue mkdir op. Since SAVESTART is not set, the pathname
2799 * component is freed by the VOP call. This will fill-in
2800 * nd.ni_vp, reference, and exclusively lock it.
2801 */
2802 if (vap->va_mode == (mode_t)VNOVAL)
2803 vap->va_mode = 0;
2804 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
2805 NDFREE(&nd, NDF_ONLY_PNBUF);
2806 vpexcl = 1;
2807
2808 vput(nd.ni_dvp);
2809 nd.ni_dvp = NULL;
2810
2811 if (!error) {
2812 bzero((caddr_t)fhp, sizeof(nfh));
2813 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
2814 error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
2815 if (!error)
2816 error = VOP_GETATTR(nd.ni_vp, vap, cred, td);
2817 }
2818 out:
2819 if (dirp) {
2820 if (dirp == nd.ni_dvp) {
2821 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2822 } else {
2823 /* Release existing locks to prevent deadlock. */
2824 if (nd.ni_dvp) {
2825 NDFREE(&nd, NDF_ONLY_PNBUF);
2826 if (nd.ni_dvp == nd.ni_vp && vpexcl)
2827 vrele(nd.ni_dvp);
2828 else
2829 vput(nd.ni_dvp);
2830 }
2831 if (nd.ni_vp) {
2832 if (vpexcl)
2833 vput(nd.ni_vp);
2834 else
2835 vrele(nd.ni_vp);
2836 }
2837 nd.ni_dvp = NULL;
2838 nd.ni_vp = NULL;
2839 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
2840 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2841 VOP_UNLOCK(dirp, 0, td);
2842 }
2843 }
2844 nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2845 if (v3) {
2846 if (!error) {
2847 nfsm_srvpostop_fh(fhp);
2848 nfsm_srvpostop_attr(0, vap);
2849 }
2850 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2851 } else if (!error) {
2852 /* v2 non-error case. */
2853 nfsm_srvfhtom(fhp, v3);
2854 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
2855 nfsm_srvfillattr(vap, fp);
2856 }
2857 error = 0;
2858 /* fall through */
2859
2860 nfsmout:
2861 if (dirp)
2862 vrele(dirp);
2863 if (nd.ni_dvp) {
2864 NDFREE(&nd, NDF_ONLY_PNBUF);
2865 if (nd.ni_dvp == nd.ni_vp && vpexcl)
2866 vrele(nd.ni_dvp);
2867 else
2868 vput(nd.ni_dvp);
2869 }
2870 if (nd.ni_vp) {
2871 if (vpexcl)
2872 vput(nd.ni_vp);
2873 else
2874 vrele(nd.ni_vp);
2875 }
2876 vn_finished_write(mp);
2877 return (error);
2878 }
2879
2880 /*
2881 * nfs rmdir service
2882 */
2883 int
2884 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2885 struct thread *td, struct mbuf **mrq)
2886 {
2887 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2888 struct sockaddr *nam = nfsd->nd_nam;
2889 caddr_t dpos = nfsd->nd_dpos;
2890 struct ucred *cred = &nfsd->nd_cr;
2891 caddr_t bpos;
2892 int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2893 int v3 = (nfsd->nd_flag & ND_NFSV3);
2894 struct mbuf *mb, *mreq;
2895 struct vnode *vp, *dirp = NULL;
2896 struct vattr dirfor, diraft;
2897 nfsfh_t nfh;
2898 fhandle_t *fhp;
2899 struct nameidata nd;
2900 struct mount *mp = NULL;
2901
2902 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2903 ndclear(&nd);
2904
2905 fhp = &nfh.fh_generic;
2906 nfsm_srvmtofh(fhp);
2907 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2908 error = ESTALE;
2909 goto out;
2910 }
2911 if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != 0) {
2912 mp = NULL;
2913 goto out;
2914 }
2915 (void) vn_start_write(vp, &mp, V_WAIT);
2916 vput(vp);
2917 vp = NULL;
2918 nfsm_srvnamesiz(len);
2919 nd.ni_cnd.cn_cred = cred;
2920 nd.ni_cnd.cn_nameiop = DELETE;
2921 nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
2922 error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
2923 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
2924 if (dirp && !v3) {
2925 vrele(dirp);
2926 dirp = NULL;
2927 }
2928 if (error) {
2929 nfsm_reply(NFSX_WCCDATA(v3));
2930 if (v3)
2931 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2932 error = 0;
2933 goto nfsmout;
2934 }
2935 vp = nd.ni_vp;
2936 if (vp->v_type != VDIR) {
2937 error = ENOTDIR;
2938 goto out;
2939 }
2940 /*
2941 * No rmdir "." please.
2942 */
2943 if (nd.ni_dvp == vp) {
2944 error = EINVAL;
2945 goto out;
2946 }
2947 /*
2948 * The root of a mounted filesystem cannot be deleted.
2949 */
2950 if (vp->v_vflag & VV_ROOT)
2951 error = EBUSY;
2952 out:
2953 /*
2954 * Issue or abort op. Since SAVESTART is not set, path name
2955 * component is freed by the VOP after either.
2956 */
2957 if (!error)
2958 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2959 NDFREE(&nd, NDF_ONLY_PNBUF);
2960
2961 if (dirp) {
2962 if (dirp == nd.ni_dvp)
2963 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2964 else {
2965 /* Release existing locks to prevent deadlock. */
2966 if (nd.ni_dvp) {
2967 if (nd.ni_dvp == nd.ni_vp)
2968 vrele(nd.ni_dvp);
2969 else
2970 vput(nd.ni_dvp);
2971 }
2972 if (nd.ni_vp)
2973 vput(nd.ni_vp);
2974 nd.ni_dvp = NULL;
2975 nd.ni_vp = NULL;
2976 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
2977 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
2978 VOP_UNLOCK(dirp, 0, td);
2979 }
2980 }
2981 nfsm_reply(NFSX_WCCDATA(v3));
2982 error = 0;
2983 if (v3)
2984 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2985 /* fall through */
2986
2987 nfsmout:
2988 NDFREE(&nd, NDF_ONLY_PNBUF);
2989 if (dirp)
2990 vrele(dirp);
2991 if (nd.ni_dvp) {
2992 if (nd.ni_dvp == nd.ni_vp)
2993 vrele(nd.ni_dvp);
2994 else
2995 vput(nd.ni_dvp);
2996 }
2997 if (nd.ni_vp)
2998 vput(nd.ni_vp);
2999
3000 vn_finished_write(mp);
3001 return(error);
3002 }
3003
3004 /*
3005 * nfs readdir service
3006 * - mallocs what it thinks is enough to read
3007 * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
3008 * - calls VOP_READDIR()
3009 * - loops around building the reply
3010 * if the output generated exceeds count break out of loop
3011 * The nfsm_clget macro is used here so that the reply will be packed
3012 * tightly in mbuf clusters.
3013 * - it only knows that it has encountered eof when the VOP_READDIR()
3014 * reads nothing
3015 * - as such one readdir rpc will return eof false although you are there
3016 * and then the next will return eof
3017 * - it trims out records with d_fileno == 0
3018 * this doesn't matter for Unix clients, but they might confuse clients
3019 * for other os'.
3020 * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
3021 * than requested, but this may not apply to all filesystems. For
3022 * example, client NFS does not { although it is never remote mounted
3023 * anyhow }
3024 * The alternate call nfsrv_readdirplus() does lookups as well.
3025 * PS: The NFS protocol spec. does not clarify what the "count" byte
3026 * argument is a count of.. just name strings and file id's or the
3027 * entire reply rpc or ...
3028 * I tried just file name and id sizes and it confused the Sun client,
3029 * so I am using the full rpc size now. The "paranoia.." comment refers
3030 * to including the status longwords that are not a part of the dir.
3031 * "entry" structures, but are in the rpc.
3032 */
3033 struct flrep {
3034 nfsuint64 fl_off;
3035 u_int32_t fl_postopok;
3036 u_int32_t fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
3037 u_int32_t fl_fhok;
3038 u_int32_t fl_fhsize;
3039 u_int32_t fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
3040 };
3041
3042 int
3043 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3044 struct thread *td, struct mbuf **mrq)
3045 {
3046 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3047 struct sockaddr *nam = nfsd->nd_nam;
3048 caddr_t dpos = nfsd->nd_dpos;
3049 struct ucred *cred = &nfsd->nd_cr;
3050 char *bp, *be;
3051 struct mbuf *mp;
3052 struct dirent *dp;
3053 caddr_t cp;
3054 u_int32_t *tl;
3055 caddr_t bpos;
3056 struct mbuf *mb, *mreq;
3057 char *cpos, *cend, *rbuf;
3058 struct vnode *vp = NULL;
3059 struct vattr at;
3060 nfsfh_t nfh;
3061 fhandle_t *fhp;
3062 struct uio io;
3063 struct iovec iv;
3064 int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3065 int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
3066 int v3 = (nfsd->nd_flag & ND_NFSV3);
3067 u_quad_t off, toff, verf;
3068 u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3069
3070 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3071 fhp = &nfh.fh_generic;
3072 nfsm_srvmtofh(fhp);
3073 if (v3) {
3074 tl = nfsm_dissect(u_int32_t *, 5 * NFSX_UNSIGNED);
3075 toff = fxdr_hyper(tl);
3076 tl += 2;
3077 verf = fxdr_hyper(tl);
3078 tl += 2;
3079 } else {
3080 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
3081 toff = fxdr_unsigned(u_quad_t, *tl++);
3082 verf = 0; /* shut up gcc */
3083 }
3084 off = toff;
3085 cnt = fxdr_unsigned(int, *tl);
3086 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3087 xfer = NFS_SRVMAXDATA(nfsd);
3088 if (cnt > xfer)
3089 cnt = xfer;
3090 if (siz > xfer)
3091 siz = xfer;
3092 fullsiz = siz;
3093 error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
3094 if (!error && vp->v_type != VDIR) {
3095 error = ENOTDIR;
3096 vput(vp);
3097 vp = NULL;
3098 }
3099 if (error) {
3100 nfsm_reply(NFSX_UNSIGNED);
3101 if (v3)
3102 nfsm_srvpostop_attr(getret, &at);
3103 error = 0;
3104 goto nfsmout;
3105 }
3106
3107 /*
3108 * Obtain lock on vnode for this section of the code
3109 */
3110 if (v3) {
3111 error = getret = VOP_GETATTR(vp, &at, cred, td);
3112 #if 0
3113 /*
3114 * XXX This check may be too strict for Solaris 2.5 clients.
3115 */
3116 if (!error && toff && verf && verf != at.va_filerev)
3117 error = NFSERR_BAD_COOKIE;
3118 #endif
3119 }
3120 if (!error)
3121 error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
3122 if (error) {
3123 vput(vp);
3124 vp = NULL;
3125 nfsm_reply(NFSX_POSTOPATTR(v3));
3126 if (v3)
3127 nfsm_srvpostop_attr(getret, &at);
3128 error = 0;
3129 goto nfsmout;
3130 }
3131 VOP_UNLOCK(vp, 0, td);
3132
3133 /*
3134 * end section. Allocate rbuf and continue
3135 */
3136 MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3137 again:
3138 iv.iov_base = rbuf;
3139 iv.iov_len = fullsiz;
3140 io.uio_iov = &iv;
3141 io.uio_iovcnt = 1;
3142 io.uio_offset = (off_t)off;
3143 io.uio_resid = fullsiz;
3144 io.uio_segflg = UIO_SYSSPACE;
3145 io.uio_rw = UIO_READ;
3146 io.uio_td = NULL;
3147 eofflag = 0;
3148 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3149 if (cookies) {
3150 free((caddr_t)cookies, M_TEMP);
3151 cookies = NULL;
3152 }
3153 error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3154 off = (off_t)io.uio_offset;
3155 if (!cookies && !error)
3156 error = NFSERR_PERM;
3157 if (v3) {
3158 getret = VOP_GETATTR(vp, &at, cred, td);
3159 if (!error)
3160 error = getret;
3161 }
3162 VOP_UNLOCK(vp, 0, td);
3163 if (error) {
3164 vrele(vp);
3165 vp = NULL;
3166 free((caddr_t)rbuf, M_TEMP);
3167 if (cookies)
3168 free((caddr_t)cookies, M_TEMP);
3169 nfsm_reply(NFSX_POSTOPATTR(v3));
3170 if (v3)
3171 nfsm_srvpostop_attr(getret, &at);
3172 error = 0;
3173 goto nfsmout;
3174 }
3175 if (io.uio_resid) {
3176 siz -= io.uio_resid;
3177
3178 /*
3179 * If nothing read, return eof
3180 * rpc reply
3181 */
3182 if (siz == 0) {
3183 vrele(vp);
3184 vp = NULL;
3185 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) +
3186 2 * NFSX_UNSIGNED);
3187 if (v3) {
3188 nfsm_srvpostop_attr(getret, &at);
3189 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
3190 txdr_hyper(at.va_filerev, tl);
3191 tl += 2;
3192 } else
3193 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
3194 *tl++ = nfsrv_nfs_false;
3195 *tl = nfsrv_nfs_true;
3196 FREE((caddr_t)rbuf, M_TEMP);
3197 FREE((caddr_t)cookies, M_TEMP);
3198 error = 0;
3199 goto nfsmout;
3200 }
3201 }
3202
3203 /*
3204 * Check for degenerate cases of nothing useful read.
3205 * If so go try again
3206 */
3207 cpos = rbuf;
3208 cend = rbuf + siz;
3209 dp = (struct dirent *)cpos;
3210 cookiep = cookies;
3211 /*
3212 * For some reason FreeBSD's ufs_readdir() chooses to back the
3213 * directory offset up to a block boundary, so it is necessary to
3214 * skip over the records that precede the requested offset. This
3215 * requires the assumption that file offset cookies monotonically
3216 * increase.
3217 */
3218 while (cpos < cend && ncookies > 0 &&
3219 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
3220 ((u_quad_t)(*cookiep)) <= toff)) {
3221 cpos += dp->d_reclen;
3222 dp = (struct dirent *)cpos;
3223 cookiep++;
3224 ncookies--;
3225 }
3226 if (cpos >= cend || ncookies == 0) {
3227 toff = off;
3228 siz = fullsiz;
3229 goto again;
3230 }
3231
3232 len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */
3233 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz);
3234 if (v3) {
3235 nfsm_srvpostop_attr(getret, &at);
3236 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
3237 txdr_hyper(at.va_filerev, tl);
3238 }
3239 mp = mb;
3240 bp = bpos;
3241 be = bp + M_TRAILINGSPACE(mp);
3242
3243 /* Loop through the records and build reply */
3244 while (cpos < cend && ncookies > 0) {
3245 if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
3246 nlen = dp->d_namlen;
3247 rem = nfsm_rndup(nlen) - nlen;
3248 len += (4 * NFSX_UNSIGNED + nlen + rem);
3249 if (v3)
3250 len += 2 * NFSX_UNSIGNED;
3251 if (len > cnt) {
3252 eofflag = 0;
3253 break;
3254 }
3255 /*
3256 * Build the directory record xdr from
3257 * the dirent entry.
3258 */
3259 nfsm_clget;
3260 *tl = nfsrv_nfs_true;
3261 bp += NFSX_UNSIGNED;
3262 if (v3) {
3263 nfsm_clget;
3264 *tl = 0;
3265 bp += NFSX_UNSIGNED;
3266 }
3267 nfsm_clget;
3268 *tl = txdr_unsigned(dp->d_fileno);
3269 bp += NFSX_UNSIGNED;
3270 nfsm_clget;
3271 *tl = txdr_unsigned(nlen);
3272 bp += NFSX_UNSIGNED;
3273
3274 /* And loop around copying the name */
3275 xfer = nlen;
3276 cp = dp->d_name;
3277 while (xfer > 0) {
3278 nfsm_clget;
3279 if ((bp+xfer) > be)
3280 tsiz = be-bp;
3281 else
3282 tsiz = xfer;
3283 bcopy(cp, bp, tsiz);
3284 bp += tsiz;
3285 xfer -= tsiz;
3286 if (xfer > 0)
3287 cp += tsiz;
3288 }
3289 /* And null pad to an int32_t boundary. */
3290 for (i = 0; i < rem; i++)
3291 *bp++ = '\0';
3292 nfsm_clget;
3293
3294 /* Finish off the record */
3295 if (v3) {
3296 *tl = 0;
3297 bp += NFSX_UNSIGNED;
3298 nfsm_clget;
3299 }
3300 *tl = txdr_unsigned(*cookiep);
3301 bp += NFSX_UNSIGNED;
3302 }
3303 cpos += dp->d_reclen;
3304 dp = (struct dirent *)cpos;
3305 cookiep++;
3306 ncookies--;
3307 }
3308 vrele(vp);
3309 vp = NULL;
3310 nfsm_clget;
3311 *tl = nfsrv_nfs_false;
3312 bp += NFSX_UNSIGNED;
3313 nfsm_clget;
3314 if (eofflag)
3315 *tl = nfsrv_nfs_true;
3316 else
3317 *tl = nfsrv_nfs_false;
3318 bp += NFSX_UNSIGNED;
3319 if (mp != mb) {
3320 if (bp < be)
3321 mp->m_len = bp - mtod(mp, caddr_t);
3322 } else
3323 mp->m_len += bp - bpos;
3324 FREE((caddr_t)rbuf, M_TEMP);
3325 FREE((caddr_t)cookies, M_TEMP);
3326
3327 nfsmout:
3328 if (vp)
3329 vrele(vp);
3330 return(error);
3331 }
3332
3333 int
3334 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3335 struct thread *td, struct mbuf **mrq)
3336 {
3337 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3338 struct sockaddr *nam = nfsd->nd_nam;
3339 caddr_t dpos = nfsd->nd_dpos;
3340 struct ucred *cred = &nfsd->nd_cr;
3341 char *bp, *be;
3342 struct mbuf *mp;
3343 struct dirent *dp;
3344 caddr_t cp;
3345 u_int32_t *tl;
3346 caddr_t bpos;
3347 struct mbuf *mb, *mreq;
3348 char *cpos, *cend, *rbuf;
3349 struct vnode *vp = NULL, *nvp;
3350 struct flrep fl;
3351 nfsfh_t nfh;
3352 fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3353 struct uio io;
3354 struct iovec iv;
3355 struct vattr va, at, *vap = &va;
3356 struct nfs_fattr *fp;
3357 int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3358 int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3359 u_quad_t off, toff, verf;
3360 u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3361 int v3 = (nfsd->nd_flag & ND_NFSV3);
3362
3363 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3364 if (!v3)
3365 panic("nfsrv_readdirplus: v3 proc called on a v2 connection");
3366 fhp = &nfh.fh_generic;
3367 nfsm_srvmtofh(fhp);
3368 tl = nfsm_dissect(u_int32_t *, 6 * NFSX_UNSIGNED);
3369 toff = fxdr_hyper(tl);
3370 tl += 2;
3371 verf = fxdr_hyper(tl);
3372 tl += 2;
3373 siz = fxdr_unsigned(int, *tl++);
3374 cnt = fxdr_unsigned(int, *tl);
3375 off = toff;
3376 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3377 xfer = NFS_SRVMAXDATA(nfsd);
3378 if (cnt > xfer)
3379 cnt = xfer;
3380 if (siz > xfer)
3381 siz = xfer;
3382 fullsiz = siz;
3383 error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
3384 if (!error && vp->v_type != VDIR) {
3385 error = ENOTDIR;
3386 vput(vp);
3387 vp = NULL;
3388 }
3389 if (error) {
3390 nfsm_reply(NFSX_UNSIGNED);
3391 nfsm_srvpostop_attr(getret, &at);
3392 error = 0;
3393 goto nfsmout;
3394 }
3395 error = getret = VOP_GETATTR(vp, &at, cred, td);
3396 #if 0
3397 /*
3398 * XXX This check may be too strict for Solaris 2.5 clients.
3399 */
3400 if (!error && toff && verf && verf != at.va_filerev)
3401 error = NFSERR_BAD_COOKIE;
3402 #endif
3403 if (!error)
3404 error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
3405 if (error) {
3406 vput(vp);
3407 vp = NULL;
3408 nfsm_reply(NFSX_V3POSTOPATTR);
3409 nfsm_srvpostop_attr(getret, &at);
3410 error = 0;
3411 goto nfsmout;
3412 }
3413 VOP_UNLOCK(vp, 0, td);
3414 MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3415 again:
3416 iv.iov_base = rbuf;
3417 iv.iov_len = fullsiz;
3418 io.uio_iov = &iv;
3419 io.uio_iovcnt = 1;
3420 io.uio_offset = (off_t)off;
3421 io.uio_resid = fullsiz;
3422 io.uio_segflg = UIO_SYSSPACE;
3423 io.uio_rw = UIO_READ;
3424 io.uio_td = NULL;
3425 eofflag = 0;
3426 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3427 if (cookies) {
3428 free((caddr_t)cookies, M_TEMP);
3429 cookies = NULL;
3430 }
3431 error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3432 off = (u_quad_t)io.uio_offset;
3433 getret = VOP_GETATTR(vp, &at, cred, td);
3434 VOP_UNLOCK(vp, 0, td);
3435 if (!cookies && !error)
3436 error = NFSERR_PERM;
3437 if (!error)
3438 error = getret;
3439 if (error) {
3440 vrele(vp);
3441 vp = NULL;
3442 if (cookies)
3443 free((caddr_t)cookies, M_TEMP);
3444 free((caddr_t)rbuf, M_TEMP);
3445 nfsm_reply(NFSX_V3POSTOPATTR);
3446 nfsm_srvpostop_attr(getret, &at);
3447 error = 0;
3448 goto nfsmout;
3449 }
3450 if (io.uio_resid) {
3451 siz -= io.uio_resid;
3452
3453 /*
3454 * If nothing read, return eof
3455 * rpc reply
3456 */
3457 if (siz == 0) {
3458 vrele(vp);
3459 vp = NULL;
3460 nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3461 2 * NFSX_UNSIGNED);
3462 nfsm_srvpostop_attr(getret, &at);
3463 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
3464 txdr_hyper(at.va_filerev, tl);
3465 tl += 2;
3466 *tl++ = nfsrv_nfs_false;
3467 *tl = nfsrv_nfs_true;
3468 FREE((caddr_t)cookies, M_TEMP);
3469 FREE((caddr_t)rbuf, M_TEMP);
3470 error = 0;
3471 goto nfsmout;
3472 }
3473 }
3474
3475 /*
3476 * Check for degenerate cases of nothing useful read.
3477 * If so go try again
3478 */
3479 cpos = rbuf;
3480 cend = rbuf + siz;
3481 dp = (struct dirent *)cpos;
3482 cookiep = cookies;
3483 /*
3484 * For some reason FreeBSD's ufs_readdir() chooses to back the
3485 * directory offset up to a block boundary, so it is necessary to
3486 * skip over the records that precede the requested offset. This
3487 * requires the assumption that file offset cookies monotonically
3488 * increase.
3489 */
3490 while (cpos < cend && ncookies > 0 &&
3491 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
3492 ((u_quad_t)(*cookiep)) <= toff)) {
3493 cpos += dp->d_reclen;
3494 dp = (struct dirent *)cpos;
3495 cookiep++;
3496 ncookies--;
3497 }
3498 if (cpos >= cend || ncookies == 0) {
3499 toff = off;
3500 siz = fullsiz;
3501 goto again;
3502 }
3503
3504 /*
3505 * Probe one of the directory entries to see if the filesystem
3506 * supports VGET.
3507 */
3508 if (VFS_VGET(vp->v_mount, dp->d_fileno, LK_EXCLUSIVE, &nvp) ==
3509 EOPNOTSUPP) {
3510 error = NFSERR_NOTSUPP;
3511 vrele(vp);
3512 vp = NULL;
3513 free((caddr_t)cookies, M_TEMP);
3514 free((caddr_t)rbuf, M_TEMP);
3515 nfsm_reply(NFSX_V3POSTOPATTR);
3516 nfsm_srvpostop_attr(getret, &at);
3517 error = 0;
3518 goto nfsmout;
3519 }
3520 vput(nvp);
3521 nvp = NULL;
3522
3523 dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3524 2 * NFSX_UNSIGNED;
3525 nfsm_reply(cnt);
3526 nfsm_srvpostop_attr(getret, &at);
3527 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
3528 txdr_hyper(at.va_filerev, tl);
3529 mp = mb;
3530 bp = bpos;
3531 be = bp + M_TRAILINGSPACE(mp);
3532
3533 /* Loop through the records and build reply */
3534 while (cpos < cend && ncookies > 0) {
3535 if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
3536 nlen = dp->d_namlen;
3537 rem = nfsm_rndup(nlen)-nlen;
3538
3539 /*
3540 * For readdir_and_lookup get the vnode using
3541 * the file number.
3542 */
3543 if (VFS_VGET(vp->v_mount, dp->d_fileno, LK_EXCLUSIVE,
3544 &nvp))
3545 goto invalid;
3546 bzero((caddr_t)nfhp, NFSX_V3FH);
3547 nfhp->fh_fsid =
3548 nvp->v_mount->mnt_stat.f_fsid;
3549 if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3550 vput(nvp);
3551 nvp = NULL;
3552 goto invalid;
3553 }
3554 if (VOP_GETATTR(nvp, vap, cred, td)) {
3555 vput(nvp);
3556 nvp = NULL;
3557 goto invalid;
3558 }
3559 vput(nvp);
3560 nvp = NULL;
3561
3562 /*
3563 * If either the dircount or maxcount will be
3564 * exceeded, get out now. Both of these lengths
3565 * are calculated conservatively, including all
3566 * XDR overheads.
3567 */
3568 len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3569 NFSX_V3POSTOPATTR);
3570 dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3571 if (len > cnt || dirlen > fullsiz) {
3572 eofflag = 0;
3573 break;
3574 }
3575
3576 /*
3577 * Build the directory record xdr from
3578 * the dirent entry.
3579 */
3580 fp = (struct nfs_fattr *)&fl.fl_fattr;
3581 nfsm_srvfillattr(vap, fp);
3582 fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3583 fl.fl_fhok = nfsrv_nfs_true;
3584 fl.fl_postopok = nfsrv_nfs_true;
3585 fl.fl_off.nfsuquad[0] = 0;
3586 fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3587
3588 nfsm_clget;
3589 *tl = nfsrv_nfs_true;
3590 bp += NFSX_UNSIGNED;
3591 nfsm_clget;
3592 *tl = 0;
3593 bp += NFSX_UNSIGNED;
3594 nfsm_clget;
3595 *tl = txdr_unsigned(dp->d_fileno);
3596 bp += NFSX_UNSIGNED;
3597 nfsm_clget;
3598 *tl = txdr_unsigned(nlen);
3599 bp += NFSX_UNSIGNED;
3600
3601 /* And loop around copying the name */
3602 xfer = nlen;
3603 cp = dp->d_name;
3604 while (xfer > 0) {
3605 nfsm_clget;
3606 if ((bp + xfer) > be)
3607 tsiz = be - bp;
3608 else
3609 tsiz = xfer;
3610 bcopy(cp, bp, tsiz);
3611 bp += tsiz;
3612 xfer -= tsiz;
3613 if (xfer > 0)
3614 cp += tsiz;
3615 }
3616 /* And null pad to an int32_t boundary. */
3617 for (i = 0; i < rem; i++)
3618 *bp++ = '\0';
3619
3620 /*
3621 * Now copy the flrep structure out.
3622 */
3623 xfer = sizeof (struct flrep);
3624 cp = (caddr_t)&fl;
3625 while (xfer > 0) {
3626 nfsm_clget;
3627 if ((bp + xfer) > be)
3628 tsiz = be - bp;
3629 else
3630 tsiz = xfer;
3631 bcopy(cp, bp, tsiz);
3632 bp += tsiz;
3633 xfer -= tsiz;
3634 if (xfer > 0)
3635 cp += tsiz;
3636 }
3637 }
3638 invalid:
3639 cpos += dp->d_reclen;
3640 dp = (struct dirent *)cpos;
3641 cookiep++;
3642 ncookies--;
3643 }
3644 vrele(vp);
3645 vp = NULL;
3646 nfsm_clget;
3647 *tl = nfsrv_nfs_false;
3648 bp += NFSX_UNSIGNED;
3649 nfsm_clget;
3650 if (eofflag)
3651 *tl = nfsrv_nfs_true;
3652 else
3653 *tl = nfsrv_nfs_false;
3654 bp += NFSX_UNSIGNED;
3655 if (mp != mb) {
3656 if (bp < be)
3657 mp->m_len = bp - mtod(mp, caddr_t);
3658 } else
3659 mp->m_len += bp - bpos;
3660 FREE((caddr_t)cookies, M_TEMP);
3661 FREE((caddr_t)rbuf, M_TEMP);
3662 nfsmout:
3663 if (vp)
3664 vrele(vp);
3665 return(error);
3666 }
3667
3668 /*
3669 * nfs commit service
3670 */
3671 int
3672 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3673 struct thread *td, struct mbuf **mrq)
3674 {
3675 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3676 struct sockaddr *nam = nfsd->nd_nam;
3677 caddr_t dpos = nfsd->nd_dpos;
3678 struct ucred *cred = &nfsd->nd_cr;
3679 struct vattr bfor, aft;
3680 struct vnode *vp = NULL;
3681 nfsfh_t nfh;
3682 fhandle_t *fhp;
3683 u_int32_t *tl;
3684 caddr_t bpos;
3685 int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3686 struct mbuf *mb, *mreq;
3687 u_quad_t off;
3688 struct mount *mp = NULL;
3689 int v3 = (nfsd->nd_flag & ND_NFSV3);
3690
3691 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3692 if (!v3)
3693 panic("nfsrv_commit: v3 proc called on a v2 connection");
3694 fhp = &nfh.fh_generic;
3695 nfsm_srvmtofh(fhp);
3696 if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
3697 error = ESTALE;
3698 goto ereply;
3699 }
3700 if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != 0) {
3701 mp = NULL;
3702 goto ereply;
3703 }
3704 (void) vn_start_write(vp, &mp, V_WAIT);
3705 vput(vp);
3706 vp = NULL;
3707 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
3708
3709 /*
3710 * XXX At this time VOP_FSYNC() does not accept offset and byte
3711 * count parameters, so these arguments are useless (someday maybe).
3712 */
3713 off = fxdr_hyper(tl);
3714 tl += 2;
3715 cnt = fxdr_unsigned(int, *tl);
3716 error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
3717 if (error) {
3718 nfsm_reply(2 * NFSX_UNSIGNED);
3719 nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3720 error = 0;
3721 goto nfsmout;
3722 }
3723 for_ret = VOP_GETATTR(vp, &bfor, cred, td);
3724
3725 if (cnt > MAX_COMMIT_COUNT) {
3726 /*
3727 * Give up and do the whole thing
3728 */
3729 if (vp->v_object &&
3730 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3731 VM_OBJECT_LOCK(vp->v_object);
3732 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3733 VM_OBJECT_UNLOCK(vp->v_object);
3734 }
3735 error = VOP_FSYNC(vp, cred, MNT_WAIT, td);
3736 } else {
3737 /*
3738 * Locate and synchronously write any buffers that fall
3739 * into the requested range. Note: we are assuming that
3740 * f_iosize is a power of 2.
3741 */
3742 int iosize = vp->v_mount->mnt_stat.f_iosize;
3743 int iomask = iosize - 1;
3744 int s;
3745 daddr_t lblkno;
3746
3747 /*
3748 * Align to iosize boundry, super-align to page boundry.
3749 */
3750 if (off & iomask) {
3751 cnt += off & iomask;
3752 off &= ~(u_quad_t)iomask;
3753 }
3754 if (off & PAGE_MASK) {
3755 cnt += off & PAGE_MASK;
3756 off &= ~(u_quad_t)PAGE_MASK;
3757 }
3758 lblkno = off / iosize;
3759
3760 if (vp->v_object &&
3761 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3762 VM_OBJECT_LOCK(vp->v_object);
3763 vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3764 VM_OBJECT_UNLOCK(vp->v_object);
3765 }
3766
3767 s = splbio();
3768 VI_LOCK(vp);
3769 while (cnt > 0) {
3770 struct buf *bp;
3771
3772 /*
3773 * If we have a buffer and it is marked B_DELWRI we
3774 * have to lock and write it. Otherwise the prior
3775 * write is assumed to have already been committed.
3776 *
3777 * gbincore() can return invalid buffers now so we
3778 * have to check that bit as well (though B_DELWRI
3779 * should not be set if B_INVAL is set there could be
3780 * a race here since we haven't locked the buffer).
3781 */
3782 if ((bp = gbincore(vp, lblkno)) != NULL) {
3783 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
3784 LK_INTERLOCK, VI_MTX(vp)) == ENOLCK) {
3785 VI_LOCK(vp);
3786 continue; /* retry */
3787 }
3788 if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
3789 B_DELWRI) {
3790 bremfree(bp);
3791 bp->b_flags &= ~B_ASYNC;
3792 BUF_WRITE(bp);
3793 ++nfs_commit_miss;
3794 } else
3795 BUF_UNLOCK(bp);
3796 VI_LOCK(vp);
3797 }
3798 ++nfs_commit_blks;
3799 if (cnt < iosize)
3800 break;
3801 cnt -= iosize;
3802 ++lblkno;
3803 }
3804 VI_UNLOCK(vp);
3805 splx(s);
3806 }
3807
3808 aft_ret = VOP_GETATTR(vp, &aft, cred, td);
3809 vput(vp);
3810 vp = NULL;
3811 ereply:
3812 nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
3813 nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3814 if (!error) {
3815 tl = nfsm_build(u_int32_t *, NFSX_V3WRITEVERF);
3816 if (nfsver.tv_sec == 0)
3817 nfsver = boottime;
3818 *tl++ = txdr_unsigned(nfsver.tv_sec);
3819 *tl = txdr_unsigned(nfsver.tv_usec);
3820 } else {
3821 error = 0;
3822 }
3823 nfsmout:
3824 if (vp)
3825 vput(vp);
3826 vn_finished_write(mp);
3827 return(error);
3828 }
3829
3830 /*
3831 * nfs statfs service
3832 */
3833 int
3834 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3835 struct thread *td, struct mbuf **mrq)
3836 {
3837 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3838 struct sockaddr *nam = nfsd->nd_nam;
3839 caddr_t dpos = nfsd->nd_dpos;
3840 struct ucred *cred = &nfsd->nd_cr;
3841 struct statfs *sf;
3842 struct nfs_statfs *sfp;
3843 caddr_t bpos;
3844 int error = 0, rdonly, getret = 1;
3845 int v3 = (nfsd->nd_flag & ND_NFSV3);
3846 struct mbuf *mb, *mreq;
3847 struct vnode *vp = NULL;
3848 struct vattr at;
3849 nfsfh_t nfh;
3850 fhandle_t *fhp;
3851 struct statfs statfs;
3852 u_quad_t tval;
3853
3854 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3855 fhp = &nfh.fh_generic;
3856 nfsm_srvmtofh(fhp);
3857 error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
3858 if (error) {
3859 nfsm_reply(NFSX_UNSIGNED);
3860 if (v3)
3861 nfsm_srvpostop_attr(getret, &at);
3862 error = 0;
3863 goto nfsmout;
3864 }
3865 sf = &statfs;
3866 error = VFS_STATFS(vp->v_mount, sf, td);
3867 getret = VOP_GETATTR(vp, &at, cred, td);
3868 vput(vp);
3869 vp = NULL;
3870 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3));
3871 if (v3)
3872 nfsm_srvpostop_attr(getret, &at);
3873 if (error) {
3874 error = 0;
3875 goto nfsmout;
3876 }
3877 sfp = nfsm_build(struct nfs_statfs *, NFSX_STATFS(v3));
3878 if (v3) {
3879 tval = (u_quad_t)sf->f_blocks;
3880 tval *= (u_quad_t)sf->f_bsize;
3881 txdr_hyper(tval, &sfp->sf_tbytes);
3882 tval = (u_quad_t)sf->f_bfree;
3883 tval *= (u_quad_t)sf->f_bsize;
3884 txdr_hyper(tval, &sfp->sf_fbytes);
3885 tval = (u_quad_t)sf->f_bavail;
3886 tval *= (u_quad_t)sf->f_bsize;
3887 txdr_hyper(tval, &sfp->sf_abytes);
3888 sfp->sf_tfiles.nfsuquad[0] = 0;
3889 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3890 sfp->sf_ffiles.nfsuquad[0] = 0;
3891 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3892 sfp->sf_afiles.nfsuquad[0] = 0;
3893 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3894 sfp->sf_invarsec = 0;
3895 } else {
3896 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3897 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3898 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3899 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3900 sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3901 }
3902 nfsmout:
3903 if (vp)
3904 vput(vp);
3905 return(error);
3906 }
3907
3908 /*
3909 * nfs fsinfo service
3910 */
3911 int
3912 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3913 struct thread *td, struct mbuf **mrq)
3914 {
3915 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3916 struct sockaddr *nam = nfsd->nd_nam;
3917 caddr_t dpos = nfsd->nd_dpos;
3918 struct ucred *cred = &nfsd->nd_cr;
3919 struct nfsv3_fsinfo *sip;
3920 caddr_t bpos;
3921 int error = 0, rdonly, getret = 1, pref;
3922 struct mbuf *mb, *mreq;
3923 struct vnode *vp = NULL;
3924 struct vattr at;
3925 nfsfh_t nfh;
3926 fhandle_t *fhp;
3927 u_quad_t maxfsize;
3928 struct statfs sb;
3929 int v3 = (nfsd->nd_flag & ND_NFSV3);
3930
3931 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3932 if (!v3)
3933 panic("nfsrv_fsinfo: v3 proc called on a v2 connection");
3934 fhp = &nfh.fh_generic;
3935 nfsm_srvmtofh(fhp);
3936 error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
3937 if (error) {
3938 nfsm_reply(NFSX_UNSIGNED);
3939 nfsm_srvpostop_attr(getret, &at);
3940 error = 0;
3941 goto nfsmout;
3942 }
3943
3944 /* XXX Try to make a guess on the max file size. */
3945 VFS_STATFS(vp->v_mount, &sb, td);
3946 maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3947
3948 getret = VOP_GETATTR(vp, &at, cred, td);
3949 vput(vp);
3950 vp = NULL;
3951 nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO);
3952 nfsm_srvpostop_attr(getret, &at);
3953 sip = nfsm_build(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
3954
3955 /*
3956 * XXX
3957 * There should be filesystem VFS OP(s) to get this information.
3958 * For now, assume ufs.
3959 */
3960 if (slp->ns_so->so_type == SOCK_DGRAM)
3961 pref = NFS_MAXDGRAMDATA;
3962 else
3963 pref = NFS_MAXDATA;
3964 sip->fs_rtmax = txdr_unsigned(pref);
3965 sip->fs_rtpref = txdr_unsigned(pref);
3966 sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3967 sip->fs_wtmax = txdr_unsigned(pref);
3968 sip->fs_wtpref = txdr_unsigned(pref);
3969 sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3970 sip->fs_dtpref = txdr_unsigned(pref);
3971 txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3972 sip->fs_timedelta.nfsv3_sec = 0;
3973 sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3974 sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3975 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3976 NFSV3FSINFO_CANSETTIME);
3977 nfsmout:
3978 if (vp)
3979 vput(vp);
3980 return(error);
3981 }
3982
3983 /*
3984 * nfs pathconf service
3985 */
3986 int
3987 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3988 struct thread *td, struct mbuf **mrq)
3989 {
3990 struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3991 struct sockaddr *nam = nfsd->nd_nam;
3992 caddr_t dpos = nfsd->nd_dpos;
3993 struct ucred *cred = &nfsd->nd_cr;
3994 struct nfsv3_pathconf *pc;
3995 caddr_t bpos;
3996 int error = 0, rdonly, getret = 1;
3997 register_t linkmax, namemax, chownres, notrunc;
3998 struct mbuf *mb, *mreq;
3999 struct vnode *vp = NULL;
4000 struct vattr at;
4001 nfsfh_t nfh;
4002 fhandle_t *fhp;
4003 int v3 = (nfsd->nd_flag & ND_NFSV3);
4004
4005 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
4006 if (!v3)
4007 panic("nfsrv_pathconf: v3 proc called on a v2 connection");
4008 fhp = &nfh.fh_generic;
4009 nfsm_srvmtofh(fhp);
4010 error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
4011 if (error) {
4012 nfsm_reply(NFSX_UNSIGNED);
4013 nfsm_srvpostop_attr(getret, &at);
4014 error = 0;
4015 goto nfsmout;
4016 }
4017 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
4018 if (!error)
4019 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
4020 if (!error)
4021 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
4022 if (!error)
4023 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, ¬runc);
4024 getret = VOP_GETATTR(vp, &at, cred, td);
4025 vput(vp);
4026 vp = NULL;
4027 nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF);
4028 nfsm_srvpostop_attr(getret, &at);
4029 if (error) {
4030 error = 0;
4031 goto nfsmout;
4032 }
4033 pc = nfsm_build(struct nfsv3_pathconf *, NFSX_V3PATHCONF);
4034
4035 pc->pc_linkmax = txdr_unsigned(linkmax);
4036 pc->pc_namemax = txdr_unsigned(namemax);
4037 pc->pc_notrunc = txdr_unsigned(notrunc);
4038 pc->pc_chownrestricted = txdr_unsigned(chownres);
4039
4040 /*
4041 * These should probably be supported by VOP_PATHCONF(), but
4042 * until msdosfs is exportable (why would you want to?), the
4043 * Unix defaults should be ok.
4044 */
4045 pc->pc_caseinsensitive = nfsrv_nfs_false;
4046 pc->pc_casepreserving = nfsrv_nfs_true;
4047 nfsmout:
4048 if (vp)
4049 vput(vp);
4050 return(error);
4051 }
4052
4053 /*
4054 * Null operation, used by clients to ping server
4055 */
4056 /* ARGSUSED */
4057 int
4058 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
4059 struct thread *td, struct mbuf **mrq)
4060 {
4061 struct mbuf *mrep = nfsd->nd_mrep;
4062 caddr_t bpos;
4063 int error = NFSERR_RETVOID;
4064 struct mbuf *mb, *mreq;
4065
4066 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
4067 nfsm_reply(0);
4068 nfsmout:
4069 return (error);
4070 }
4071
4072 /*
4073 * No operation, used for obsolete procedures
4074 */
4075 /* ARGSUSED */
4076 int
4077 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
4078 struct thread *td, struct mbuf **mrq)
4079 {
4080 struct mbuf *mrep = nfsd->nd_mrep;
4081 caddr_t bpos;
4082 int error;
4083 struct mbuf *mb, *mreq;
4084
4085 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
4086 if (nfsd->nd_repstat)
4087 error = nfsd->nd_repstat;
4088 else
4089 error = EPROCUNAVAIL;
4090 nfsm_reply(0);
4091 error = 0;
4092 nfsmout:
4093 return (error);
4094 }
4095
4096 /*
4097 * Perform access checking for vnodes obtained from file handles that would
4098 * refer to files already opened by a Unix client. You cannot just use
4099 * vn_writechk() and VOP_ACCESS() for two reasons.
4100 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
4101 * 2 - The owner is to be given access irrespective of mode bits for some
4102 * operations, so that processes that chmod after opening a file don't
4103 * break. I don't like this because it opens a security hole, but since
4104 * the nfs server opens a security hole the size of a barn door anyhow,
4105 * what the heck.
4106 *
4107 * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
4108 * will return EPERM instead of EACCESS. EPERM is always an error.
4109 */
4110 static int
4111 nfsrv_access(struct vnode *vp, int flags, struct ucred *cred, int rdonly,
4112 struct thread *td, int override)
4113 {
4114 struct vattr vattr;
4115 int error;
4116
4117 nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
4118 if (flags & VWRITE) {
4119 /* Just vn_writechk() changed to check rdonly */
4120 /*
4121 * Disallow write attempts on read-only filesystems;
4122 * unless the file is a socket or a block or character
4123 * device resident on the filesystem.
4124 */
4125 if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
4126 switch (vp->v_type) {
4127 case VREG:
4128 case VDIR:
4129 case VLNK:
4130 return (EROFS);
4131 default:
4132 break;
4133 }
4134 }
4135 /*
4136 * If there's shared text associated with
4137 * the inode, we can't allow writing.
4138 */
4139 if (vp->v_vflag & VV_TEXT)
4140 return (ETXTBSY);
4141 }
4142 error = VOP_GETATTR(vp, &vattr, cred, td);
4143 if (error)
4144 return (error);
4145 error = VOP_ACCESS(vp, flags, cred, td);
4146 /*
4147 * Allow certain operations for the owner (reads and writes
4148 * on files that are already open).
4149 */
4150 if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
4151 error = 0;
4152 return error;
4153 }
Cache object: 00cc2cad5b1f5aeed447b0b96ac3a065
|