1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 #include <sys/capsicum.h>
40 #include <sys/extattr.h>
41
42 /*
43 * Functions that perform the vfs operations required by the routines in
44 * nfsd_serv.c. It is hoped that this change will make the server more
45 * portable.
46 */
47
48 #include <fs/nfs/nfsport.h>
49 #include <security/mac/mac_framework.h>
50 #include <sys/callout.h>
51 #include <sys/filio.h>
52 #include <sys/hash.h>
53 #include <sys/sysctl.h>
54 #include <nlm/nlm_prot.h>
55 #include <nlm/nlm.h>
56
57 FEATURE(nfsd, "NFSv4 server");
58
59 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
60 extern int nfsrv_useacl;
61 extern int newnfs_numnfsd;
62 extern struct mount nfsv4root_mnt;
63 extern struct nfsrv_stablefirst nfsrv_stablefirst;
64 extern SVCPOOL *nfsrvd_pool;
65 extern struct nfsv4lock nfsd_suspend_lock;
66 extern struct nfsclienthashhead *nfsclienthash;
67 extern struct nfslockhashhead *nfslockhash;
68 extern struct nfssessionhash *nfssessionhash;
69 extern int nfsrv_sessionhashsize;
70 extern struct nfsstatsv1 nfsstatsv1;
71 extern struct nfslayouthash *nfslayouthash;
72 extern int nfsrv_layouthashsize;
73 extern struct mtx nfsrv_dslock_mtx;
74 extern int nfs_pnfsiothreads;
75 extern struct nfsdontlisthead nfsrv_dontlisthead;
76 extern volatile int nfsrv_dontlistlen;
77 extern volatile int nfsrv_devidcnt;
78 extern int nfsrv_maxpnfsmirror;
79 extern uint32_t nfs_srvmaxio;
80 extern int nfs_bufpackets;
81 extern u_long sb_max_adj;
82 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
83 NFSDLOCKMUTEX;
84 NFSSTATESPINLOCK;
85 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
86 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
87 struct mtx nfsrc_udpmtx;
88 struct mtx nfs_v4root_mutex;
89 struct mtx nfsrv_dontlistlock_mtx;
90 struct mtx nfsrv_recalllock_mtx;
91 struct nfsrvfh nfs_rootfh, nfs_pubfh;
92 int nfs_pubfhset = 0, nfs_rootfhset = 0;
93 struct proc *nfsd_master_proc = NULL;
94 int nfsd_debuglevel = 0;
95 static pid_t nfsd_master_pid = (pid_t)-1;
96 static char nfsd_master_comm[MAXCOMLEN + 1];
97 static struct timeval nfsd_master_start;
98 static uint32_t nfsv4_sysid = 0;
99 static fhandle_t zerofh;
100 struct callout nfsd_callout;
101
102 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
103 struct ucred *);
104 static void nfsvno_updateds(struct vnode *, struct ucred *, struct thread *);
105
106 int nfsrv_enable_crossmntpt = 1;
107 static int nfs_commit_blks;
108 static int nfs_commit_miss;
109 extern int nfsrv_issuedelegs;
110 extern int nfsrv_dolocallocks;
111 extern int nfsd_enable_stringtouid;
112 extern struct nfsdevicehead nfsrv_devidhead;
113
114 static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **,
115 struct iovec **);
116 static int nfsrv_createiovec_extpgs(int, int, struct mbuf **,
117 struct mbuf **, struct iovec **);
118 static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **,
119 int *);
120 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *,
121 NFSPROC_T *);
122 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **,
123 int *, char *, fhandle_t *);
124 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *,
125 NFSPROC_T *);
126 static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *,
127 struct thread *, int, struct mbuf **, char *, struct mbuf **,
128 struct nfsvattr *, struct acl *, off_t *, int, bool *);
129 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *);
130 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *,
131 NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **);
132 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *,
133 NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **,
134 char *, int *);
135 static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
136 NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
137 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
138 struct vnode *, struct nfsmount **, int, struct acl *, int *);
139 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
140 struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *);
141 static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
142 struct vnode *, struct nfsmount *, struct nfsvattr *);
143 static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *,
144 NFSPROC_T *, struct nfsmount *);
145 static int nfsrv_putfhname(fhandle_t *, char *);
146 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *,
147 struct pnfsdsfile *, struct vnode **, NFSPROC_T *);
148 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *,
149 struct vnode *, NFSPROC_T *);
150 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *);
151 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *,
152 NFSPROC_T *);
153 static int nfsrv_pnfsstatfs(struct statfs *, struct mount *);
154
155 int nfs_pnfsio(task_fn_t *, void *);
156
157 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
158 "NFS server");
159 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
160 &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points");
161 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
162 0, "");
163 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
164 0, "");
165 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
166 &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
167 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
168 &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
169 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel,
170 0, "Debug level for NFS server");
171 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW,
172 &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names");
173 static int nfsrv_pnfsgetdsattr = 1;
174 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW,
175 &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC");
176
177 /*
178 * nfsrv_dsdirsize can only be increased and only when the nfsd threads are
179 * not running.
180 * The dsN subdirectories for the increased values must have been created
181 * on all DS servers before this increase is done.
182 */
183 u_int nfsrv_dsdirsize = 20;
184 static int
185 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS)
186 {
187 int error, newdsdirsize;
188
189 newdsdirsize = nfsrv_dsdirsize;
190 error = sysctl_handle_int(oidp, &newdsdirsize, 0, req);
191 if (error != 0 || req->newptr == NULL)
192 return (error);
193 if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 ||
194 newnfs_numnfsd != 0)
195 return (EINVAL);
196 nfsrv_dsdirsize = newdsdirsize;
197 return (0);
198 }
199 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize,
200 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize),
201 sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers");
202
203 /*
204 * nfs_srvmaxio can only be increased and only when the nfsd threads are
205 * not running. The setting must be a power of 2, with the current limit of
206 * 1Mbyte.
207 */
208 static int
209 sysctl_srvmaxio(SYSCTL_HANDLER_ARGS)
210 {
211 int error;
212 u_int newsrvmaxio;
213 uint64_t tval;
214
215 newsrvmaxio = nfs_srvmaxio;
216 error = sysctl_handle_int(oidp, &newsrvmaxio, 0, req);
217 if (error != 0 || req->newptr == NULL)
218 return (error);
219 if (newsrvmaxio == nfs_srvmaxio)
220 return (0);
221 if (newsrvmaxio < nfs_srvmaxio) {
222 printf("nfsd: vfs.nfsd.srvmaxio can only be increased\n");
223 return (EINVAL);
224 }
225 if (newsrvmaxio > 1048576) {
226 printf("nfsd: vfs.nfsd.srvmaxio cannot be > 1Mbyte\n");
227 return (EINVAL);
228 }
229 if ((newsrvmaxio & (newsrvmaxio - 1)) != 0) {
230 printf("nfsd: vfs.nfsd.srvmaxio must be a power of 2\n");
231 return (EINVAL);
232 }
233
234 /*
235 * Check that kern.ipc.maxsockbuf is large enough for
236 * newsrviomax, given the setting of vfs.nfs.bufpackets.
237 */
238 if ((newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets >
239 sb_max_adj) {
240 /*
241 * Suggest vfs.nfs.bufpackets * maximum RPC message for
242 * sb_max_adj.
243 */
244 tval = (newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets;
245
246 /*
247 * Convert suggested sb_max_adj value to a suggested
248 * sb_max value, which is what is set via kern.ipc.maxsockbuf.
249 * Perform the inverse calculation of (from uipc_sockbuf.c):
250 * sb_max_adj = (u_quad_t)sb_max * MCLBYTES /
251 * (MSIZE + MCLBYTES);
252 * XXX If the calculation of sb_max_adj from sb_max changes,
253 * this calculation must be changed as well.
254 */
255 tval *= (MSIZE + MCLBYTES); /* Brackets for readability. */
256 tval += MCLBYTES - 1; /* Round up divide. */
257 tval /= MCLBYTES;
258 printf("nfsd: set kern.ipc.maxsockbuf to a minimum of "
259 "%ju to support %ubyte NFS I/O\n", (uintmax_t)tval,
260 newsrvmaxio);
261 return (EINVAL);
262 }
263
264 NFSD_LOCK();
265 if (newnfs_numnfsd != 0) {
266 NFSD_UNLOCK();
267 printf("nfsd: cannot set vfs.nfsd.srvmaxio when nfsd "
268 "threads are running\n");
269 return (EINVAL);
270 }
271
272
273 nfs_srvmaxio = newsrvmaxio;
274 NFSD_UNLOCK();
275 return (0);
276 }
277 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio,
278 CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
279 sysctl_srvmaxio, "IU", "Maximum I/O size in bytes");
280
281 #define MAX_REORDERED_RPC 16
282 #define NUM_HEURISTIC 1031
283 #define NHUSE_INIT 64
284 #define NHUSE_INC 16
285 #define NHUSE_MAX 2048
286
287 static struct nfsheur {
288 struct vnode *nh_vp; /* vp to match (unreferenced pointer) */
289 off_t nh_nextoff; /* next offset for sequential detection */
290 int nh_use; /* use count for selection */
291 int nh_seqcount; /* heuristic */
292 } nfsheur[NUM_HEURISTIC];
293
294 /*
295 * Heuristic to detect sequential operation.
296 */
297 static struct nfsheur *
298 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
299 {
300 struct nfsheur *nh;
301 int hi, try;
302
303 /* Locate best candidate. */
304 try = 32;
305 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
306 nh = &nfsheur[hi];
307 while (try--) {
308 if (nfsheur[hi].nh_vp == vp) {
309 nh = &nfsheur[hi];
310 break;
311 }
312 if (nfsheur[hi].nh_use > 0)
313 --nfsheur[hi].nh_use;
314 hi = (hi + 1) % NUM_HEURISTIC;
315 if (nfsheur[hi].nh_use < nh->nh_use)
316 nh = &nfsheur[hi];
317 }
318
319 /* Initialize hint if this is a new file. */
320 if (nh->nh_vp != vp) {
321 nh->nh_vp = vp;
322 nh->nh_nextoff = uio->uio_offset;
323 nh->nh_use = NHUSE_INIT;
324 if (uio->uio_offset == 0)
325 nh->nh_seqcount = 4;
326 else
327 nh->nh_seqcount = 1;
328 }
329
330 /* Calculate heuristic. */
331 if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
332 uio->uio_offset == nh->nh_nextoff) {
333 /* See comments in vfs_vnops.c:sequential_heuristic(). */
334 nh->nh_seqcount += howmany(uio->uio_resid, 16384);
335 if (nh->nh_seqcount > IO_SEQMAX)
336 nh->nh_seqcount = IO_SEQMAX;
337 } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
338 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
339 /* Probably a reordered RPC, leave seqcount alone. */
340 } else if (nh->nh_seqcount > 1) {
341 nh->nh_seqcount /= 2;
342 } else {
343 nh->nh_seqcount = 0;
344 }
345 nh->nh_use += NHUSE_INC;
346 if (nh->nh_use > NHUSE_MAX)
347 nh->nh_use = NHUSE_MAX;
348 return (nh);
349 }
350
351 /*
352 * Get attributes into nfsvattr structure.
353 */
354 int
355 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap,
356 struct nfsrv_descript *nd, struct thread *p, int vpislocked,
357 nfsattrbit_t *attrbitp)
358 {
359 int error, gotattr, lockedit = 0;
360 struct nfsvattr na;
361
362 if (vpislocked == 0) {
363 /*
364 * When vpislocked == 0, the vnode is either exclusively
365 * locked by this thread or not locked by this thread.
366 * As such, shared lock it, if not exclusively locked.
367 */
368 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
369 lockedit = 1;
370 NFSVOPLOCK(vp, LK_SHARED | LK_RETRY);
371 }
372 }
373
374 /*
375 * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed
376 * attributes, as required.
377 * This needs to be done for regular files if:
378 * - non-NFSv4 RPCs or
379 * - when attrbitp == NULL or
380 * - an NFSv4 RPC with any of the above attributes in attrbitp.
381 * A return of 0 for nfsrv_proxyds() indicates that it has acquired
382 * these attributes. nfsrv_proxyds() will return an error if the
383 * server is not a pNFS one.
384 */
385 gotattr = 0;
386 if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL ||
387 (nd->nd_flag & ND_NFSV4) == 0 ||
388 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) ||
389 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) ||
390 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) ||
391 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) ||
392 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) {
393 error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p,
394 NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0,
395 NULL);
396 if (error == 0)
397 gotattr = 1;
398 }
399
400 error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred);
401 if (lockedit != 0)
402 NFSVOPUNLOCK(vp);
403
404 /*
405 * If we got the Change, Size and Modify Time from the DS,
406 * replace them.
407 */
408 if (gotattr != 0) {
409 nvap->na_atime = na.na_atime;
410 nvap->na_mtime = na.na_mtime;
411 nvap->na_filerev = na.na_filerev;
412 nvap->na_size = na.na_size;
413 nvap->na_bytes = na.na_bytes;
414 }
415 NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr,
416 error, (uintmax_t)na.na_filerev);
417
418 NFSEXITCODE(error);
419 return (error);
420 }
421
422 /*
423 * Get a file handle for a vnode.
424 */
425 int
426 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
427 {
428 int error;
429
430 NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
431 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
432 error = VOP_VPTOFH(vp, &fhp->fh_fid);
433
434 NFSEXITCODE(error);
435 return (error);
436 }
437
438 /*
439 * Perform access checking for vnodes obtained from file handles that would
440 * refer to files already opened by a Unix client. You cannot just use
441 * vn_writechk() and VOP_ACCESSX() for two reasons.
442 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
443 * case.
444 * 2 - The owner is to be given access irrespective of mode bits for some
445 * operations, so that processes that chmod after opening a file don't
446 * break.
447 */
448 int
449 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
450 struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
451 u_int32_t *supportedtypep)
452 {
453 struct vattr vattr;
454 int error = 0, getret = 0;
455
456 if (vpislocked == 0) {
457 if (NFSVOPLOCK(vp, LK_SHARED) != 0) {
458 error = EPERM;
459 goto out;
460 }
461 }
462 if (accmode & VWRITE) {
463 /* Just vn_writechk() changed to check rdonly */
464 /*
465 * Disallow write attempts on read-only file systems;
466 * unless the file is a socket or a block or character
467 * device resident on the file system.
468 */
469 if (NFSVNO_EXRDONLY(exp) ||
470 (vp->v_mount->mnt_flag & MNT_RDONLY)) {
471 switch (vp->v_type) {
472 case VREG:
473 case VDIR:
474 case VLNK:
475 error = EROFS;
476 default:
477 break;
478 }
479 }
480 /*
481 * If there's shared text associated with
482 * the inode, try to free it up once. If
483 * we fail, we can't allow writing.
484 */
485 if (VOP_IS_TEXT(vp) && error == 0)
486 error = ETXTBSY;
487 }
488 if (error != 0) {
489 if (vpislocked == 0)
490 NFSVOPUNLOCK(vp);
491 goto out;
492 }
493
494 /*
495 * Should the override still be applied when ACLs are enabled?
496 */
497 error = VOP_ACCESSX(vp, accmode, cred, p);
498 if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
499 /*
500 * Try again with VEXPLICIT_DENY, to see if the test for
501 * deletion is supported.
502 */
503 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
504 if (error == 0) {
505 if (vp->v_type == VDIR) {
506 accmode &= ~(VDELETE | VDELETE_CHILD);
507 accmode |= VWRITE;
508 error = VOP_ACCESSX(vp, accmode, cred, p);
509 } else if (supportedtypep != NULL) {
510 *supportedtypep &= ~NFSACCESS_DELETE;
511 }
512 }
513 }
514
515 /*
516 * Allow certain operations for the owner (reads and writes
517 * on files that are already open).
518 */
519 if (override != NFSACCCHK_NOOVERRIDE &&
520 (error == EPERM || error == EACCES)) {
521 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
522 error = 0;
523 else if (override & NFSACCCHK_ALLOWOWNER) {
524 getret = VOP_GETATTR(vp, &vattr, cred);
525 if (getret == 0 && cred->cr_uid == vattr.va_uid)
526 error = 0;
527 }
528 }
529 if (vpislocked == 0)
530 NFSVOPUNLOCK(vp);
531
532 out:
533 NFSEXITCODE(error);
534 return (error);
535 }
536
537 /*
538 * Set attribute(s) vnop.
539 */
540 int
541 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
542 struct thread *p, struct nfsexstuff *exp)
543 {
544 u_quad_t savsize = 0;
545 int error, savedit;
546 time_t savbtime;
547
548 /*
549 * If this is an exported file system and a pNFS service is running,
550 * don't VOP_SETATTR() of size for the MDS file system.
551 */
552 savedit = 0;
553 error = 0;
554 if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 &&
555 nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL &&
556 nvap->na_vattr.va_size > 0) {
557 savsize = nvap->na_vattr.va_size;
558 nvap->na_vattr.va_size = VNOVAL;
559 if (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
560 nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
561 nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
562 nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
563 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)
564 savedit = 1;
565 else
566 savedit = 2;
567 }
568 if (savedit != 2)
569 error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
570 if (savedit != 0)
571 nvap->na_vattr.va_size = savsize;
572 if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
573 nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
574 nvap->na_vattr.va_size != VNOVAL ||
575 nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
576 nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
577 nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) {
578 /* Never modify birthtime on a DS file. */
579 savbtime = nvap->na_vattr.va_birthtime.tv_sec;
580 nvap->na_vattr.va_birthtime.tv_sec = VNOVAL;
581 /* For a pNFS server, set the attributes on the DS file. */
582 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR,
583 NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL);
584 nvap->na_vattr.va_birthtime.tv_sec = savbtime;
585 if (error == ENOENT)
586 error = 0;
587 }
588 NFSEXITCODE(error);
589 return (error);
590 }
591
592 /*
593 * Set up nameidata for a lookup() call and do it.
594 */
595 int
596 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
597 struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
598 struct vnode **retdirp)
599 {
600 struct componentname *cnp = &ndp->ni_cnd;
601 int i;
602 struct iovec aiov;
603 struct uio auio;
604 int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
605 int error = 0;
606 char *cp;
607
608 *retdirp = NULL;
609 cnp->cn_nameptr = cnp->cn_pnbuf;
610 ndp->ni_lcf = 0;
611 /*
612 * Extract and set starting directory.
613 */
614 if (dp->v_type != VDIR) {
615 if (islocked)
616 vput(dp);
617 else
618 vrele(dp);
619 nfsvno_relpathbuf(ndp);
620 error = ENOTDIR;
621 goto out1;
622 }
623 if (islocked)
624 NFSVOPUNLOCK(dp);
625 VREF(dp);
626 *retdirp = dp;
627 if (NFSVNO_EXRDONLY(exp))
628 cnp->cn_flags |= RDONLY;
629 ndp->ni_segflg = UIO_SYSSPACE;
630
631 if (nd->nd_flag & ND_PUBLOOKUP) {
632 ndp->ni_loopcnt = 0;
633 if (cnp->cn_pnbuf[0] == '/') {
634 vrele(dp);
635 /*
636 * Check for degenerate pathnames here, since lookup()
637 * panics on them.
638 */
639 for (i = 1; i < ndp->ni_pathlen; i++)
640 if (cnp->cn_pnbuf[i] != '/')
641 break;
642 if (i == ndp->ni_pathlen) {
643 error = NFSERR_ACCES;
644 goto out;
645 }
646 dp = rootvnode;
647 VREF(dp);
648 }
649 } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
650 (nd->nd_flag & ND_NFSV4) == 0) {
651 /*
652 * Only cross mount points for NFSv4 when doing a
653 * mount while traversing the file system above
654 * the mount point, unless nfsrv_enable_crossmntpt is set.
655 */
656 cnp->cn_flags |= NOCROSSMOUNT;
657 }
658
659 /*
660 * Initialize for scan, set ni_startdir and bump ref on dp again
661 * because lookup() will dereference ni_startdir.
662 */
663
664 cnp->cn_thread = p;
665 ndp->ni_startdir = dp;
666 ndp->ni_rootdir = rootvnode;
667 ndp->ni_topdir = NULL;
668
669 if (!lockleaf)
670 cnp->cn_flags |= LOCKLEAF;
671 for (;;) {
672 cnp->cn_nameptr = cnp->cn_pnbuf;
673 /*
674 * Call lookup() to do the real work. If an error occurs,
675 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
676 * we do not have to dereference anything before returning.
677 * In either case ni_startdir will be dereferenced and NULLed
678 * out.
679 */
680 error = lookup(ndp);
681 if (error)
682 break;
683
684 /*
685 * Check for encountering a symbolic link. Trivial
686 * termination occurs if no symlink encountered.
687 */
688 if ((cnp->cn_flags & ISSYMLINK) == 0) {
689 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
690 nfsvno_relpathbuf(ndp);
691 if (ndp->ni_vp && !lockleaf)
692 NFSVOPUNLOCK(ndp->ni_vp);
693 break;
694 }
695
696 /*
697 * Validate symlink
698 */
699 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
700 NFSVOPUNLOCK(ndp->ni_dvp);
701 if (!(nd->nd_flag & ND_PUBLOOKUP)) {
702 error = EINVAL;
703 goto badlink2;
704 }
705
706 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
707 error = ELOOP;
708 goto badlink2;
709 }
710 if (ndp->ni_pathlen > 1)
711 cp = uma_zalloc(namei_zone, M_WAITOK);
712 else
713 cp = cnp->cn_pnbuf;
714 aiov.iov_base = cp;
715 aiov.iov_len = MAXPATHLEN;
716 auio.uio_iov = &aiov;
717 auio.uio_iovcnt = 1;
718 auio.uio_offset = 0;
719 auio.uio_rw = UIO_READ;
720 auio.uio_segflg = UIO_SYSSPACE;
721 auio.uio_td = NULL;
722 auio.uio_resid = MAXPATHLEN;
723 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
724 if (error) {
725 badlink1:
726 if (ndp->ni_pathlen > 1)
727 uma_zfree(namei_zone, cp);
728 badlink2:
729 vrele(ndp->ni_dvp);
730 vput(ndp->ni_vp);
731 break;
732 }
733 linklen = MAXPATHLEN - auio.uio_resid;
734 if (linklen == 0) {
735 error = ENOENT;
736 goto badlink1;
737 }
738 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
739 error = ENAMETOOLONG;
740 goto badlink1;
741 }
742
743 /*
744 * Adjust or replace path
745 */
746 if (ndp->ni_pathlen > 1) {
747 NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
748 uma_zfree(namei_zone, cnp->cn_pnbuf);
749 cnp->cn_pnbuf = cp;
750 } else
751 cnp->cn_pnbuf[linklen] = '\0';
752 ndp->ni_pathlen += linklen;
753
754 /*
755 * Cleanup refs for next loop and check if root directory
756 * should replace current directory. Normally ni_dvp
757 * becomes the new base directory and is cleaned up when
758 * we loop. Explicitly null pointers after invalidation
759 * to clarify operation.
760 */
761 vput(ndp->ni_vp);
762 ndp->ni_vp = NULL;
763
764 if (cnp->cn_pnbuf[0] == '/') {
765 vrele(ndp->ni_dvp);
766 ndp->ni_dvp = ndp->ni_rootdir;
767 VREF(ndp->ni_dvp);
768 }
769 ndp->ni_startdir = ndp->ni_dvp;
770 ndp->ni_dvp = NULL;
771 }
772 if (!lockleaf)
773 cnp->cn_flags &= ~LOCKLEAF;
774
775 out:
776 if (error) {
777 nfsvno_relpathbuf(ndp);
778 ndp->ni_vp = NULL;
779 ndp->ni_dvp = NULL;
780 ndp->ni_startdir = NULL;
781 } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
782 ndp->ni_dvp = NULL;
783 }
784
785 out1:
786 NFSEXITCODE2(error, nd);
787 return (error);
788 }
789
790 /*
791 * Set up a pathname buffer and return a pointer to it and, optionally
792 * set a hash pointer.
793 */
794 void
795 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
796 {
797 struct componentname *cnp = &ndp->ni_cnd;
798
799 cnp->cn_flags |= (NOMACCHECK | HASBUF);
800 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
801 if (hashpp != NULL)
802 *hashpp = NULL;
803 *bufpp = cnp->cn_pnbuf;
804 }
805
806 /*
807 * Release the above path buffer, if not released by nfsvno_namei().
808 */
809 void
810 nfsvno_relpathbuf(struct nameidata *ndp)
811 {
812
813 if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
814 panic("nfsrelpath");
815 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
816 ndp->ni_cnd.cn_flags &= ~HASBUF;
817 }
818
819 /*
820 * Readlink vnode op into an mbuf list.
821 */
822 int
823 nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz,
824 struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
825 {
826 struct iovec *iv;
827 struct uio io, *uiop = &io;
828 struct mbuf *mp, *mp3;
829 int len, tlen, error = 0;
830
831 len = NFS_MAXPATHLEN;
832 if (maxextsiz > 0)
833 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz,
834 &mp3, &mp, &iv);
835 else
836 uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv);
837 uiop->uio_iov = iv;
838 uiop->uio_offset = 0;
839 uiop->uio_resid = len;
840 uiop->uio_rw = UIO_READ;
841 uiop->uio_segflg = UIO_SYSSPACE;
842 uiop->uio_td = NULL;
843 error = VOP_READLINK(vp, uiop, cred);
844 free(iv, M_TEMP);
845 if (error) {
846 m_freem(mp3);
847 *lenp = 0;
848 goto out;
849 }
850 if (uiop->uio_resid > 0) {
851 len -= uiop->uio_resid;
852 tlen = NFSM_RNDUP(len);
853 if (tlen == 0) {
854 m_freem(mp3);
855 mp3 = mp = NULL;
856 } else if (tlen != NFS_MAXPATHLEN || tlen != len)
857 mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen,
858 tlen - len);
859 }
860 *lenp = len;
861 *mpp = mp3;
862 *mpendp = mp;
863
864 out:
865 NFSEXITCODE(error);
866 return (error);
867 }
868
869 /*
870 * Create an mbuf chain and an associated iovec that can be used to Read
871 * or Getextattr of data.
872 * Upon success, return pointers to the first and last mbufs in the chain
873 * plus the malloc'd iovec and its iovlen.
874 */
875 static int
876 nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp,
877 struct iovec **ivp)
878 {
879 struct mbuf *m, *m2 = NULL, *m3;
880 struct iovec *iv;
881 int i, left, siz;
882
883 left = len;
884 m3 = NULL;
885 /*
886 * Generate the mbuf list with the uio_iov ref. to it.
887 */
888 i = 0;
889 while (left > 0) {
890 NFSMGET(m);
891 MCLGET(m, M_WAITOK);
892 m->m_len = 0;
893 siz = min(M_TRAILINGSPACE(m), left);
894 left -= siz;
895 i++;
896 if (m3)
897 m2->m_next = m;
898 else
899 m3 = m;
900 m2 = m;
901 }
902 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK);
903 m = m3;
904 left = len;
905 i = 0;
906 while (left > 0) {
907 if (m == NULL)
908 panic("nfsrv_createiovec iov");
909 siz = min(M_TRAILINGSPACE(m), left);
910 if (siz > 0) {
911 iv->iov_base = mtod(m, caddr_t) + m->m_len;
912 iv->iov_len = siz;
913 m->m_len += siz;
914 left -= siz;
915 iv++;
916 i++;
917 }
918 m = m->m_next;
919 }
920 *mpp = m3;
921 *mpendp = m2;
922 return (i);
923 }
924
925 /*
926 * Create an mbuf chain and an associated iovec that can be used to Read
927 * or Getextattr of data.
928 * Upon success, return pointers to the first and last mbufs in the chain
929 * plus the malloc'd iovec and its iovlen.
930 * Same as above, but creates ext_pgs mbuf(s).
931 */
932 static int
933 nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp,
934 struct mbuf **mpendp, struct iovec **ivp)
935 {
936 struct mbuf *m, *m2 = NULL, *m3;
937 struct iovec *iv;
938 int i, left, pgno, siz;
939
940 left = len;
941 m3 = NULL;
942 /*
943 * Generate the mbuf list with the uio_iov ref. to it.
944 */
945 i = 0;
946 while (left > 0) {
947 siz = min(left, maxextsiz);
948 m = mb_alloc_ext_plus_pages(siz, M_WAITOK);
949 left -= siz;
950 i += m->m_epg_npgs;
951 if (m3 != NULL)
952 m2->m_next = m;
953 else
954 m3 = m;
955 m2 = m;
956 }
957 *ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK);
958 m = m3;
959 left = len;
960 i = 0;
961 pgno = 0;
962 while (left > 0) {
963 if (m == NULL)
964 panic("nfsvno_createiovec_extpgs iov");
965 siz = min(PAGE_SIZE, left);
966 if (siz > 0) {
967 iv->iov_base = (void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
968 iv->iov_len = siz;
969 m->m_len += siz;
970 if (pgno == m->m_epg_npgs - 1)
971 m->m_epg_last_len = siz;
972 left -= siz;
973 iv++;
974 i++;
975 pgno++;
976 }
977 if (pgno == m->m_epg_npgs && left > 0) {
978 m = m->m_next;
979 if (m == NULL)
980 panic("nfsvno_createiovec_extpgs iov");
981 pgno = 0;
982 }
983 }
984 *mpp = m3;
985 *mpendp = m2;
986 return (i);
987 }
988
989 /*
990 * Read vnode op call into mbuf list.
991 */
992 int
993 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
994 int maxextsiz, struct thread *p, struct mbuf **mpp,
995 struct mbuf **mpendp)
996 {
997 struct mbuf *m;
998 struct iovec *iv;
999 int error = 0, len, tlen, ioflag = 0;
1000 struct mbuf *m3;
1001 struct uio io, *uiop = &io;
1002 struct nfsheur *nh;
1003
1004 /*
1005 * Attempt to read from a DS file. A return of ENOENT implies
1006 * there is no DS file to read.
1007 */
1008 error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp,
1009 NULL, mpendp, NULL, NULL, NULL, 0, NULL);
1010 if (error != ENOENT)
1011 return (error);
1012
1013 len = NFSM_RNDUP(cnt);
1014 if (maxextsiz > 0)
1015 uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz,
1016 &m3, &m, &iv);
1017 else
1018 uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv);
1019 uiop->uio_iov = iv;
1020 uiop->uio_offset = off;
1021 uiop->uio_resid = len;
1022 uiop->uio_rw = UIO_READ;
1023 uiop->uio_segflg = UIO_SYSSPACE;
1024 uiop->uio_td = NULL;
1025 nh = nfsrv_sequential_heuristic(uiop, vp);
1026 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
1027 /* XXX KDM make this more systematic? */
1028 nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid;
1029 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
1030 free(iv, M_TEMP);
1031 if (error) {
1032 m_freem(m3);
1033 *mpp = NULL;
1034 goto out;
1035 }
1036 nh->nh_nextoff = uiop->uio_offset;
1037 tlen = len - uiop->uio_resid;
1038 cnt = cnt < tlen ? cnt : tlen;
1039 tlen = NFSM_RNDUP(cnt);
1040 if (tlen == 0) {
1041 m_freem(m3);
1042 m3 = m = NULL;
1043 } else if (len != tlen || tlen != cnt)
1044 m = nfsrv_adj(m3, len - tlen, tlen - cnt);
1045 *mpp = m3;
1046 *mpendp = m;
1047
1048 out:
1049 NFSEXITCODE(error);
1050 return (error);
1051 }
1052
1053 /*
1054 * Create the iovec for the mbuf chain passed in as an argument.
1055 * The "cp" argument is where the data starts within the first mbuf in
1056 * the chain. It returns the iovec and the iovcnt.
1057 */
1058 static int
1059 nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp,
1060 int *iovcntp)
1061 {
1062 struct mbuf *mp;
1063 struct iovec *ivp;
1064 int cnt, i, len;
1065
1066 /*
1067 * Loop through the mbuf chain, counting how many mbufs are a
1068 * part of this write operation, so the iovec size is known.
1069 */
1070 cnt = 0;
1071 len = retlen;
1072 mp = m;
1073 i = mtod(mp, caddr_t) + mp->m_len - cp;
1074 while (len > 0) {
1075 if (i > 0) {
1076 len -= i;
1077 cnt++;
1078 }
1079 mp = mp->m_next;
1080 if (!mp) {
1081 if (len > 0)
1082 return (EBADRPC);
1083 } else
1084 i = mp->m_len;
1085 }
1086
1087 /* Now, create the iovec. */
1088 mp = m;
1089 *ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP,
1090 M_WAITOK);
1091 *iovcntp = cnt;
1092 i = mtod(mp, caddr_t) + mp->m_len - cp;
1093 len = retlen;
1094 while (len > 0) {
1095 if (mp == NULL)
1096 panic("nfsrv_createiovecw");
1097 if (i > 0) {
1098 i = min(i, len);
1099 ivp->iov_base = cp;
1100 ivp->iov_len = i;
1101 ivp++;
1102 len -= i;
1103 }
1104 mp = mp->m_next;
1105 if (mp) {
1106 i = mp->m_len;
1107 cp = mtod(mp, caddr_t);
1108 }
1109 }
1110 return (0);
1111 }
1112
1113 /*
1114 * Write vnode op from an mbuf list.
1115 */
1116 int
1117 nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable,
1118 struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
1119 {
1120 struct iovec *iv;
1121 int cnt, ioflags, error;
1122 struct uio io, *uiop = &io;
1123 struct nfsheur *nh;
1124
1125 /*
1126 * Attempt to write to a DS file. A return of ENOENT implies
1127 * there is no DS file to write.
1128 */
1129 error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS,
1130 &mp, cp, NULL, NULL, NULL, NULL, 0, NULL);
1131 if (error != ENOENT) {
1132 *stable = NFSWRITE_FILESYNC;
1133 return (error);
1134 }
1135
1136 if (*stable == NFSWRITE_UNSTABLE)
1137 ioflags = IO_NODELOCKED;
1138 else
1139 ioflags = (IO_SYNC | IO_NODELOCKED);
1140 error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt);
1141 if (error != 0)
1142 return (error);
1143 uiop->uio_iov = iv;
1144 uiop->uio_iovcnt = cnt;
1145 uiop->uio_resid = retlen;
1146 uiop->uio_rw = UIO_WRITE;
1147 uiop->uio_segflg = UIO_SYSSPACE;
1148 NFSUIOPROC(uiop, p);
1149 uiop->uio_offset = off;
1150 nh = nfsrv_sequential_heuristic(uiop, vp);
1151 ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
1152 /* XXX KDM make this more systematic? */
1153 nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid;
1154 error = VOP_WRITE(vp, uiop, ioflags, cred);
1155 if (error == 0)
1156 nh->nh_nextoff = uiop->uio_offset;
1157 free(iv, M_TEMP);
1158
1159 NFSEXITCODE(error);
1160 return (error);
1161 }
1162
1163 /*
1164 * Common code for creating a regular file (plus special files for V2).
1165 */
1166 int
1167 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
1168 struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
1169 int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp)
1170 {
1171 u_quad_t tempsize;
1172 int error;
1173 struct thread *p = curthread;
1174
1175 error = nd->nd_repstat;
1176 if (!error && ndp->ni_vp == NULL) {
1177 if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
1178 vrele(ndp->ni_startdir);
1179 error = VOP_CREATE(ndp->ni_dvp,
1180 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
1181 /* For a pNFS server, create the data file on a DS. */
1182 if (error == 0 && nvap->na_type == VREG) {
1183 /*
1184 * Create a data file on a DS for a pNFS server.
1185 * This function just returns if not
1186 * running a pNFS DS or the creation fails.
1187 */
1188 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr,
1189 nd->nd_cred, p);
1190 }
1191 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp :
1192 NULL, false);
1193 nfsvno_relpathbuf(ndp);
1194 if (!error) {
1195 if (*exclusive_flagp) {
1196 *exclusive_flagp = 0;
1197 NFSVNO_ATTRINIT(nvap);
1198 nvap->na_atime.tv_sec = cverf[0];
1199 nvap->na_atime.tv_nsec = cverf[1];
1200 error = VOP_SETATTR(ndp->ni_vp,
1201 &nvap->na_vattr, nd->nd_cred);
1202 if (error != 0) {
1203 vput(ndp->ni_vp);
1204 ndp->ni_vp = NULL;
1205 error = NFSERR_NOTSUPP;
1206 }
1207 }
1208 }
1209 /*
1210 * NFS V2 Only. nfsrvd_mknod() does this for V3.
1211 * (This implies, just get out on an error.)
1212 */
1213 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
1214 nvap->na_type == VFIFO) {
1215 if (nvap->na_type == VCHR && rdev == 0xffffffff)
1216 nvap->na_type = VFIFO;
1217 if (nvap->na_type != VFIFO &&
1218 (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) {
1219 vrele(ndp->ni_startdir);
1220 nfsvno_relpathbuf(ndp);
1221 vput(ndp->ni_dvp);
1222 goto out;
1223 }
1224 nvap->na_rdev = rdev;
1225 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
1226 &ndp->ni_cnd, &nvap->na_vattr);
1227 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp :
1228 NULL, false);
1229 nfsvno_relpathbuf(ndp);
1230 vrele(ndp->ni_startdir);
1231 if (error)
1232 goto out;
1233 } else {
1234 vrele(ndp->ni_startdir);
1235 nfsvno_relpathbuf(ndp);
1236 vput(ndp->ni_dvp);
1237 error = ENXIO;
1238 goto out;
1239 }
1240 *vpp = ndp->ni_vp;
1241 } else {
1242 /*
1243 * Handle cases where error is already set and/or
1244 * the file exists.
1245 * 1 - clean up the lookup
1246 * 2 - iff !error and na_size set, truncate it
1247 */
1248 vrele(ndp->ni_startdir);
1249 nfsvno_relpathbuf(ndp);
1250 *vpp = ndp->ni_vp;
1251 if (ndp->ni_dvp == *vpp)
1252 vrele(ndp->ni_dvp);
1253 else
1254 vput(ndp->ni_dvp);
1255 if (!error && nvap->na_size != VNOVAL) {
1256 error = nfsvno_accchk(*vpp, VWRITE,
1257 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1258 NFSACCCHK_VPISLOCKED, NULL);
1259 if (!error) {
1260 tempsize = nvap->na_size;
1261 NFSVNO_ATTRINIT(nvap);
1262 nvap->na_size = tempsize;
1263 error = nfsvno_setattr(*vpp, nvap,
1264 nd->nd_cred, p, exp);
1265 }
1266 }
1267 if (error)
1268 vput(*vpp);
1269 }
1270
1271 out:
1272 NFSEXITCODE(error);
1273 return (error);
1274 }
1275
1276 /*
1277 * Do a mknod vnode op.
1278 */
1279 int
1280 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
1281 struct thread *p)
1282 {
1283 int error = 0;
1284 enum vtype vtyp;
1285
1286 vtyp = nvap->na_type;
1287 /*
1288 * Iff doesn't exist, create it.
1289 */
1290 if (ndp->ni_vp) {
1291 vrele(ndp->ni_startdir);
1292 nfsvno_relpathbuf(ndp);
1293 vput(ndp->ni_dvp);
1294 vrele(ndp->ni_vp);
1295 error = EEXIST;
1296 goto out;
1297 }
1298 if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1299 vrele(ndp->ni_startdir);
1300 nfsvno_relpathbuf(ndp);
1301 vput(ndp->ni_dvp);
1302 error = NFSERR_BADTYPE;
1303 goto out;
1304 }
1305 if (vtyp == VSOCK) {
1306 vrele(ndp->ni_startdir);
1307 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
1308 &ndp->ni_cnd, &nvap->na_vattr);
1309 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL,
1310 false);
1311 nfsvno_relpathbuf(ndp);
1312 } else {
1313 if (nvap->na_type != VFIFO &&
1314 (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) {
1315 vrele(ndp->ni_startdir);
1316 nfsvno_relpathbuf(ndp);
1317 vput(ndp->ni_dvp);
1318 goto out;
1319 }
1320 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
1321 &ndp->ni_cnd, &nvap->na_vattr);
1322 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL,
1323 false);
1324 nfsvno_relpathbuf(ndp);
1325 vrele(ndp->ni_startdir);
1326 /*
1327 * Since VOP_MKNOD returns the ni_vp, I can't
1328 * see any reason to do the lookup.
1329 */
1330 }
1331
1332 out:
1333 NFSEXITCODE(error);
1334 return (error);
1335 }
1336
1337 /*
1338 * Mkdir vnode op.
1339 */
1340 int
1341 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
1342 struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
1343 {
1344 int error = 0;
1345
1346 if (ndp->ni_vp != NULL) {
1347 if (ndp->ni_dvp == ndp->ni_vp)
1348 vrele(ndp->ni_dvp);
1349 else
1350 vput(ndp->ni_dvp);
1351 vrele(ndp->ni_vp);
1352 nfsvno_relpathbuf(ndp);
1353 error = EEXIST;
1354 goto out;
1355 }
1356 error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
1357 &nvap->na_vattr);
1358 VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false);
1359 nfsvno_relpathbuf(ndp);
1360
1361 out:
1362 NFSEXITCODE(error);
1363 return (error);
1364 }
1365
1366 /*
1367 * symlink vnode op.
1368 */
1369 int
1370 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
1371 int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
1372 struct nfsexstuff *exp)
1373 {
1374 int error = 0;
1375
1376 if (ndp->ni_vp) {
1377 vrele(ndp->ni_startdir);
1378 nfsvno_relpathbuf(ndp);
1379 if (ndp->ni_dvp == ndp->ni_vp)
1380 vrele(ndp->ni_dvp);
1381 else
1382 vput(ndp->ni_dvp);
1383 vrele(ndp->ni_vp);
1384 error = EEXIST;
1385 goto out;
1386 }
1387
1388 error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
1389 &nvap->na_vattr, pathcp);
1390 /*
1391 * Although FreeBSD still had the lookup code in
1392 * it for 7/current, there doesn't seem to be any
1393 * point, since VOP_SYMLINK() returns the ni_vp.
1394 * Just vput it for v2.
1395 */
1396 VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, !not_v2 && error == 0);
1397 vrele(ndp->ni_startdir);
1398 nfsvno_relpathbuf(ndp);
1399
1400 out:
1401 NFSEXITCODE(error);
1402 return (error);
1403 }
1404
1405 /*
1406 * Parse symbolic link arguments.
1407 * This function has an ugly side effect. It will malloc() an area for
1408 * the symlink and set iov_base to point to it, only if it succeeds.
1409 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
1410 * be FREE'd later.
1411 */
1412 int
1413 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
1414 struct thread *p, char **pathcpp, int *lenp)
1415 {
1416 u_int32_t *tl;
1417 char *pathcp = NULL;
1418 int error = 0, len;
1419 struct nfsv2_sattr *sp;
1420
1421 *pathcpp = NULL;
1422 *lenp = 0;
1423 if ((nd->nd_flag & ND_NFSV3) &&
1424 (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p)))
1425 goto nfsmout;
1426 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1427 len = fxdr_unsigned(int, *tl);
1428 if (len > NFS_MAXPATHLEN || len <= 0) {
1429 error = EBADRPC;
1430 goto nfsmout;
1431 }
1432 pathcp = malloc(len + 1, M_TEMP, M_WAITOK);
1433 error = nfsrv_mtostr(nd, pathcp, len);
1434 if (error)
1435 goto nfsmout;
1436 if (nd->nd_flag & ND_NFSV2) {
1437 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1438 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
1439 }
1440 *pathcpp = pathcp;
1441 *lenp = len;
1442 NFSEXITCODE2(0, nd);
1443 return (0);
1444 nfsmout:
1445 if (pathcp)
1446 free(pathcp, M_TEMP);
1447 NFSEXITCODE2(error, nd);
1448 return (error);
1449 }
1450
1451 /*
1452 * Remove a non-directory object.
1453 */
1454 int
1455 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
1456 struct thread *p, struct nfsexstuff *exp)
1457 {
1458 struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS];
1459 int error = 0, mirrorcnt;
1460 char fname[PNFS_FILENAME_LEN + 1];
1461 fhandle_t fh;
1462
1463 vp = ndp->ni_vp;
1464 dsdvp[0] = NULL;
1465 if (vp->v_type == VDIR)
1466 error = NFSERR_ISDIR;
1467 else if (is_v4)
1468 error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0),
1469 p);
1470 if (error == 0)
1471 nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh);
1472 if (!error)
1473 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
1474 if (error == 0 && dsdvp[0] != NULL)
1475 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
1476 if (ndp->ni_dvp == vp)
1477 vrele(ndp->ni_dvp);
1478 else
1479 vput(ndp->ni_dvp);
1480 vput(vp);
1481 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
1482 nfsvno_relpathbuf(ndp);
1483 NFSEXITCODE(error);
1484 return (error);
1485 }
1486
1487 /*
1488 * Remove a directory.
1489 */
1490 int
1491 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
1492 struct thread *p, struct nfsexstuff *exp)
1493 {
1494 struct vnode *vp;
1495 int error = 0;
1496
1497 vp = ndp->ni_vp;
1498 if (vp->v_type != VDIR) {
1499 error = ENOTDIR;
1500 goto out;
1501 }
1502 /*
1503 * No rmdir "." please.
1504 */
1505 if (ndp->ni_dvp == vp) {
1506 error = EINVAL;
1507 goto out;
1508 }
1509 /*
1510 * The root of a mounted filesystem cannot be deleted.
1511 */
1512 if (vp->v_vflag & VV_ROOT)
1513 error = EBUSY;
1514 out:
1515 if (!error)
1516 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
1517 if (ndp->ni_dvp == vp)
1518 vrele(ndp->ni_dvp);
1519 else
1520 vput(ndp->ni_dvp);
1521 vput(vp);
1522 if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
1523 nfsvno_relpathbuf(ndp);
1524 NFSEXITCODE(error);
1525 return (error);
1526 }
1527
1528 /*
1529 * Rename vnode op.
1530 */
1531 int
1532 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
1533 u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
1534 {
1535 struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS];
1536 int error = 0, mirrorcnt;
1537 char fname[PNFS_FILENAME_LEN + 1];
1538 fhandle_t fh;
1539
1540 dsdvp[0] = NULL;
1541 fvp = fromndp->ni_vp;
1542 if (ndstat) {
1543 vrele(fromndp->ni_dvp);
1544 vrele(fvp);
1545 error = ndstat;
1546 goto out1;
1547 }
1548 tdvp = tondp->ni_dvp;
1549 tvp = tondp->ni_vp;
1550 if (tvp != NULL) {
1551 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
1552 error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
1553 goto out;
1554 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
1555 error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
1556 goto out;
1557 }
1558 if (tvp->v_type == VDIR && tvp->v_mountedhere) {
1559 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1560 goto out;
1561 }
1562
1563 /*
1564 * A rename to '.' or '..' results in a prematurely
1565 * unlocked vnode on FreeBSD5, so I'm just going to fail that
1566 * here.
1567 */
1568 if ((tondp->ni_cnd.cn_namelen == 1 &&
1569 tondp->ni_cnd.cn_nameptr[0] == '.') ||
1570 (tondp->ni_cnd.cn_namelen == 2 &&
1571 tondp->ni_cnd.cn_nameptr[0] == '.' &&
1572 tondp->ni_cnd.cn_nameptr[1] == '.')) {
1573 error = EINVAL;
1574 goto out;
1575 }
1576 }
1577 if (fvp->v_type == VDIR && fvp->v_mountedhere) {
1578 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1579 goto out;
1580 }
1581 if (fvp->v_mount != tdvp->v_mount) {
1582 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1583 goto out;
1584 }
1585 if (fvp == tdvp) {
1586 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
1587 goto out;
1588 }
1589 if (fvp == tvp) {
1590 /*
1591 * If source and destination are the same, there is nothing to
1592 * do. Set error to -1 to indicate this.
1593 */
1594 error = -1;
1595 goto out;
1596 }
1597 if (ndflag & ND_NFSV4) {
1598 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
1599 error = nfsrv_checkremove(fvp, 0, NULL,
1600 (nfsquad_t)((u_quad_t)0), p);
1601 NFSVOPUNLOCK(fvp);
1602 } else
1603 error = EPERM;
1604 if (tvp && !error)
1605 error = nfsrv_checkremove(tvp, 1, NULL,
1606 (nfsquad_t)((u_quad_t)0), p);
1607 } else {
1608 /*
1609 * For NFSv2 and NFSv3, try to get rid of the delegation, so
1610 * that the NFSv4 client won't be confused by the rename.
1611 * Since nfsd_recalldelegation() can only be called on an
1612 * unlocked vnode at this point and fvp is the file that will
1613 * still exist after the rename, just do fvp.
1614 */
1615 nfsd_recalldelegation(fvp, p);
1616 }
1617 if (error == 0 && tvp != NULL) {
1618 nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh);
1619 NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup"
1620 " dsdvp=%p\n", dsdvp[0]);
1621 }
1622 out:
1623 if (!error) {
1624 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
1625 &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
1626 &tondp->ni_cnd);
1627 } else {
1628 if (tdvp == tvp)
1629 vrele(tdvp);
1630 else
1631 vput(tdvp);
1632 if (tvp)
1633 vput(tvp);
1634 vrele(fromndp->ni_dvp);
1635 vrele(fvp);
1636 if (error == -1)
1637 error = 0;
1638 }
1639
1640 /*
1641 * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and
1642 * if the rename succeeded, the DS file for the tvp needs to be
1643 * removed.
1644 */
1645 if (error == 0 && dsdvp[0] != NULL) {
1646 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
1647 NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n");
1648 }
1649
1650 vrele(tondp->ni_startdir);
1651 nfsvno_relpathbuf(tondp);
1652 out1:
1653 vrele(fromndp->ni_startdir);
1654 nfsvno_relpathbuf(fromndp);
1655 NFSEXITCODE(error);
1656 return (error);
1657 }
1658
1659 /*
1660 * Link vnode op.
1661 */
1662 int
1663 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
1664 struct thread *p, struct nfsexstuff *exp)
1665 {
1666 struct vnode *xp;
1667 int error = 0;
1668
1669 xp = ndp->ni_vp;
1670 if (xp != NULL) {
1671 error = EEXIST;
1672 } else {
1673 xp = ndp->ni_dvp;
1674 if (vp->v_mount != xp->v_mount)
1675 error = EXDEV;
1676 }
1677 if (!error) {
1678 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
1679 if (!VN_IS_DOOMED(vp))
1680 error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
1681 else
1682 error = EPERM;
1683 if (ndp->ni_dvp == vp) {
1684 vrele(ndp->ni_dvp);
1685 NFSVOPUNLOCK(vp);
1686 } else {
1687 vref(vp);
1688 VOP_VPUT_PAIR(ndp->ni_dvp, &vp, true);
1689 }
1690 } else {
1691 if (ndp->ni_dvp == ndp->ni_vp)
1692 vrele(ndp->ni_dvp);
1693 else
1694 vput(ndp->ni_dvp);
1695 if (ndp->ni_vp)
1696 vrele(ndp->ni_vp);
1697 }
1698 nfsvno_relpathbuf(ndp);
1699 NFSEXITCODE(error);
1700 return (error);
1701 }
1702
1703 /*
1704 * Do the fsync() appropriate for the commit.
1705 */
1706 int
1707 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
1708 struct thread *td)
1709 {
1710 int error = 0;
1711
1712 /*
1713 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of
1714 * file is done. At this time VOP_FSYNC does not accept offset and
1715 * byte count parameters so call VOP_FSYNC the whole file for now.
1716 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3.
1717 * File systems that do not use the buffer cache (as indicated
1718 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC().
1719 */
1720 if (cnt == 0 || cnt > MAX_COMMIT_COUNT ||
1721 (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) {
1722 /*
1723 * Give up and do the whole thing
1724 */
1725 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) {
1726 VM_OBJECT_WLOCK(vp->v_object);
1727 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
1728 VM_OBJECT_WUNLOCK(vp->v_object);
1729 }
1730 error = VOP_FSYNC(vp, MNT_WAIT, td);
1731 } else {
1732 /*
1733 * Locate and synchronously write any buffers that fall
1734 * into the requested range. Note: we are assuming that
1735 * f_iosize is a power of 2.
1736 */
1737 int iosize = vp->v_mount->mnt_stat.f_iosize;
1738 int iomask = iosize - 1;
1739 struct bufobj *bo;
1740 daddr_t lblkno;
1741
1742 /*
1743 * Align to iosize boundary, super-align to page boundary.
1744 */
1745 if (off & iomask) {
1746 cnt += off & iomask;
1747 off &= ~(u_quad_t)iomask;
1748 }
1749 if (off & PAGE_MASK) {
1750 cnt += off & PAGE_MASK;
1751 off &= ~(u_quad_t)PAGE_MASK;
1752 }
1753 lblkno = off / iosize;
1754
1755 if (vp->v_object && vm_object_mightbedirty(vp->v_object)) {
1756 VM_OBJECT_WLOCK(vp->v_object);
1757 vm_object_page_clean(vp->v_object, off, off + cnt,
1758 OBJPC_SYNC);
1759 VM_OBJECT_WUNLOCK(vp->v_object);
1760 }
1761
1762 bo = &vp->v_bufobj;
1763 BO_LOCK(bo);
1764 while (cnt > 0) {
1765 struct buf *bp;
1766
1767 /*
1768 * If we have a buffer and it is marked B_DELWRI we
1769 * have to lock and write it. Otherwise the prior
1770 * write is assumed to have already been committed.
1771 *
1772 * gbincore() can return invalid buffers now so we
1773 * have to check that bit as well (though B_DELWRI
1774 * should not be set if B_INVAL is set there could be
1775 * a race here since we haven't locked the buffer).
1776 */
1777 if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
1778 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
1779 LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) {
1780 BO_LOCK(bo);
1781 continue; /* retry */
1782 }
1783 if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
1784 B_DELWRI) {
1785 bremfree(bp);
1786 bp->b_flags &= ~B_ASYNC;
1787 bwrite(bp);
1788 ++nfs_commit_miss;
1789 } else
1790 BUF_UNLOCK(bp);
1791 BO_LOCK(bo);
1792 }
1793 ++nfs_commit_blks;
1794 if (cnt < iosize)
1795 break;
1796 cnt -= iosize;
1797 ++lblkno;
1798 }
1799 BO_UNLOCK(bo);
1800 }
1801 NFSEXITCODE(error);
1802 return (error);
1803 }
1804
1805 /*
1806 * Statfs vnode op.
1807 */
1808 int
1809 nfsvno_statfs(struct vnode *vp, struct statfs *sf)
1810 {
1811 struct statfs *tsf;
1812 int error;
1813
1814 tsf = NULL;
1815 if (nfsrv_devidcnt > 0) {
1816 /* For a pNFS service, get the DS numbers. */
1817 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO);
1818 error = nfsrv_pnfsstatfs(tsf, vp->v_mount);
1819 if (error != 0) {
1820 free(tsf, M_TEMP);
1821 tsf = NULL;
1822 }
1823 }
1824 error = VFS_STATFS(vp->v_mount, sf);
1825 if (error == 0) {
1826 if (tsf != NULL) {
1827 sf->f_blocks = tsf->f_blocks;
1828 sf->f_bavail = tsf->f_bavail;
1829 sf->f_bfree = tsf->f_bfree;
1830 sf->f_bsize = tsf->f_bsize;
1831 }
1832 /*
1833 * Since NFS handles these values as unsigned on the
1834 * wire, there is no way to represent negative values,
1835 * so set them to 0. Without this, they will appear
1836 * to be very large positive values for clients like
1837 * Solaris10.
1838 */
1839 if (sf->f_bavail < 0)
1840 sf->f_bavail = 0;
1841 if (sf->f_ffree < 0)
1842 sf->f_ffree = 0;
1843 }
1844 free(tsf, M_TEMP);
1845 NFSEXITCODE(error);
1846 return (error);
1847 }
1848
1849 /*
1850 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
1851 * must handle nfsrv_opencheck() calls after any other access checks.
1852 */
1853 void
1854 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
1855 nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
1856 int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
1857 NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred,
1858 struct nfsexstuff *exp, struct vnode **vpp)
1859 {
1860 struct vnode *vp = NULL;
1861 u_quad_t tempsize;
1862 struct nfsexstuff nes;
1863 struct thread *p = curthread;
1864
1865 if (ndp->ni_vp == NULL)
1866 nd->nd_repstat = nfsrv_opencheck(clientid,
1867 stateidp, stp, NULL, nd, p, nd->nd_repstat);
1868 if (!nd->nd_repstat) {
1869 if (ndp->ni_vp == NULL) {
1870 vrele(ndp->ni_startdir);
1871 nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
1872 &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
1873 /* For a pNFS server, create the data file on a DS. */
1874 if (nd->nd_repstat == 0) {
1875 /*
1876 * Create a data file on a DS for a pNFS server.
1877 * This function just returns if not
1878 * running a pNFS DS or the creation fails.
1879 */
1880 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr,
1881 cred, p);
1882 }
1883 VOP_VPUT_PAIR(ndp->ni_dvp, nd->nd_repstat == 0 ?
1884 &ndp->ni_vp : NULL, false);
1885 nfsvno_relpathbuf(ndp);
1886 if (!nd->nd_repstat) {
1887 if (*exclusive_flagp) {
1888 *exclusive_flagp = 0;
1889 NFSVNO_ATTRINIT(nvap);
1890 nvap->na_atime.tv_sec = cverf[0];
1891 nvap->na_atime.tv_nsec = cverf[1];
1892 nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
1893 &nvap->na_vattr, cred);
1894 if (nd->nd_repstat != 0) {
1895 vput(ndp->ni_vp);
1896 ndp->ni_vp = NULL;
1897 nd->nd_repstat = NFSERR_NOTSUPP;
1898 } else
1899 NFSSETBIT_ATTRBIT(attrbitp,
1900 NFSATTRBIT_TIMEACCESS);
1901 } else {
1902 nfsrv_fixattr(nd, ndp->ni_vp, nvap,
1903 aclp, p, attrbitp, exp);
1904 }
1905 }
1906 vp = ndp->ni_vp;
1907 } else {
1908 if (ndp->ni_startdir)
1909 vrele(ndp->ni_startdir);
1910 nfsvno_relpathbuf(ndp);
1911 vp = ndp->ni_vp;
1912 if (create == NFSV4OPEN_CREATE) {
1913 if (ndp->ni_dvp == vp)
1914 vrele(ndp->ni_dvp);
1915 else
1916 vput(ndp->ni_dvp);
1917 }
1918 if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
1919 if (ndp->ni_cnd.cn_flags & RDONLY)
1920 NFSVNO_SETEXRDONLY(&nes);
1921 else
1922 NFSVNO_EXINIT(&nes);
1923 nd->nd_repstat = nfsvno_accchk(vp,
1924 VWRITE, cred, &nes, p,
1925 NFSACCCHK_NOOVERRIDE,
1926 NFSACCCHK_VPISLOCKED, NULL);
1927 nd->nd_repstat = nfsrv_opencheck(clientid,
1928 stateidp, stp, vp, nd, p, nd->nd_repstat);
1929 if (!nd->nd_repstat) {
1930 tempsize = nvap->na_size;
1931 NFSVNO_ATTRINIT(nvap);
1932 nvap->na_size = tempsize;
1933 nd->nd_repstat = nfsvno_setattr(vp,
1934 nvap, cred, p, exp);
1935 }
1936 } else if (vp->v_type == VREG) {
1937 nd->nd_repstat = nfsrv_opencheck(clientid,
1938 stateidp, stp, vp, nd, p, nd->nd_repstat);
1939 }
1940 }
1941 } else {
1942 if (ndp->ni_cnd.cn_flags & HASBUF)
1943 nfsvno_relpathbuf(ndp);
1944 if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
1945 vrele(ndp->ni_startdir);
1946 if (ndp->ni_dvp == ndp->ni_vp)
1947 vrele(ndp->ni_dvp);
1948 else
1949 vput(ndp->ni_dvp);
1950 if (ndp->ni_vp)
1951 vput(ndp->ni_vp);
1952 }
1953 }
1954 *vpp = vp;
1955
1956 NFSEXITCODE2(0, nd);
1957 }
1958
1959 /*
1960 * Updates the file rev and sets the mtime and ctime
1961 * to the current clock time, returning the va_filerev and va_Xtime
1962 * values.
1963 * Return ESTALE to indicate the vnode is VIRF_DOOMED.
1964 */
1965 int
1966 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
1967 struct nfsrv_descript *nd, struct thread *p)
1968 {
1969 struct vattr va;
1970
1971 VATTR_NULL(&va);
1972 vfs_timestamp(&va.va_mtime);
1973 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
1974 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
1975 if (VN_IS_DOOMED(vp))
1976 return (ESTALE);
1977 }
1978 (void) VOP_SETATTR(vp, &va, nd->nd_cred);
1979 (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL);
1980 return (0);
1981 }
1982
1983 /*
1984 * Glue routine to nfsv4_fillattr().
1985 */
1986 int
1987 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
1988 struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
1989 struct ucred *cred, struct thread *p, int isdgram, int reterr,
1990 int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno)
1991 {
1992 struct statfs *sf;
1993 int error;
1994
1995 sf = NULL;
1996 if (nfsrv_devidcnt > 0 &&
1997 (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) ||
1998 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) ||
1999 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) {
2000 sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO);
2001 error = nfsrv_pnfsstatfs(sf, mp);
2002 if (error != 0) {
2003 free(sf, M_TEMP);
2004 sf = NULL;
2005 }
2006 }
2007 error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
2008 attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
2009 mounted_on_fileno, sf);
2010 free(sf, M_TEMP);
2011 NFSEXITCODE2(0, nd);
2012 return (error);
2013 }
2014
2015 /* Since the Readdir vnode ops vary, put the entire functions in here. */
2016 /*
2017 * nfs readdir service
2018 * - mallocs what it thinks is enough to read
2019 * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
2020 * - calls VOP_READDIR()
2021 * - loops around building the reply
2022 * if the output generated exceeds count break out of loop
2023 * The NFSM_CLGET macro is used here so that the reply will be packed
2024 * tightly in mbuf clusters.
2025 * - it trims out records with d_fileno == 0
2026 * this doesn't matter for Unix clients, but they might confuse clients
2027 * for other os'.
2028 * - it trims out records with d_type == DT_WHT
2029 * these cannot be seen through NFS (unless we extend the protocol)
2030 * The alternate call nfsrvd_readdirplus() does lookups as well.
2031 * PS: The NFS protocol spec. does not clarify what the "count" byte
2032 * argument is a count of.. just name strings and file id's or the
2033 * entire reply rpc or ...
2034 * I tried just file name and id sizes and it confused the Sun client,
2035 * so I am using the full rpc size now. The "paranoia.." comment refers
2036 * to including the status longwords that are not a part of the dir.
2037 * "entry" structures, but are in the rpc.
2038 */
2039 int
2040 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
2041 struct vnode *vp, struct nfsexstuff *exp)
2042 {
2043 struct dirent *dp;
2044 u_int32_t *tl;
2045 int dirlen;
2046 char *cpos, *cend, *rbuf;
2047 struct nfsvattr at;
2048 int nlen, error = 0, getret = 1;
2049 int siz, cnt, fullsiz, eofflag, ncookies;
2050 u_int64_t off, toff, verf __unused;
2051 u_long *cookies = NULL, *cookiep;
2052 struct uio io;
2053 struct iovec iv;
2054 int is_ufs;
2055 struct thread *p = curthread;
2056
2057 if (nd->nd_repstat) {
2058 nfsrv_postopattr(nd, getret, &at);
2059 goto out;
2060 }
2061 if (nd->nd_flag & ND_NFSV2) {
2062 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2063 off = fxdr_unsigned(u_quad_t, *tl++);
2064 } else {
2065 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2066 off = fxdr_hyper(tl);
2067 tl += 2;
2068 verf = fxdr_hyper(tl);
2069 tl += 2;
2070 }
2071 toff = off;
2072 cnt = fxdr_unsigned(int, *tl);
2073 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
2074 cnt = NFS_SRVMAXDATA(nd);
2075 siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2076 fullsiz = siz;
2077 if (nd->nd_flag & ND_NFSV3) {
2078 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1,
2079 NULL);
2080 #if 0
2081 /*
2082 * va_filerev is not sufficient as a cookie verifier,
2083 * since it is not supposed to change when entries are
2084 * removed/added unless that offset cookies returned to
2085 * the client are no longer valid.
2086 */
2087 if (!nd->nd_repstat && toff && verf != at.na_filerev)
2088 nd->nd_repstat = NFSERR_BAD_COOKIE;
2089 #endif
2090 }
2091 if (!nd->nd_repstat && vp->v_type != VDIR)
2092 nd->nd_repstat = NFSERR_NOTDIR;
2093 if (nd->nd_repstat == 0 && cnt == 0) {
2094 if (nd->nd_flag & ND_NFSV2)
2095 /* NFSv2 does not have NFSERR_TOOSMALL */
2096 nd->nd_repstat = EPERM;
2097 else
2098 nd->nd_repstat = NFSERR_TOOSMALL;
2099 }
2100 if (!nd->nd_repstat)
2101 nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
2102 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
2103 NFSACCCHK_VPISLOCKED, NULL);
2104 if (nd->nd_repstat) {
2105 vput(vp);
2106 if (nd->nd_flag & ND_NFSV3)
2107 nfsrv_postopattr(nd, getret, &at);
2108 goto out;
2109 }
2110 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
2111 rbuf = malloc(siz, M_TEMP, M_WAITOK);
2112 again:
2113 eofflag = 0;
2114 if (cookies) {
2115 free(cookies, M_TEMP);
2116 cookies = NULL;
2117 }
2118
2119 iv.iov_base = rbuf;
2120 iv.iov_len = siz;
2121 io.uio_iov = &iv;
2122 io.uio_iovcnt = 1;
2123 io.uio_offset = (off_t)off;
2124 io.uio_resid = siz;
2125 io.uio_segflg = UIO_SYSSPACE;
2126 io.uio_rw = UIO_READ;
2127 io.uio_td = NULL;
2128 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
2129 &cookies);
2130 off = (u_int64_t)io.uio_offset;
2131 if (io.uio_resid)
2132 siz -= io.uio_resid;
2133
2134 if (!cookies && !nd->nd_repstat)
2135 nd->nd_repstat = NFSERR_PERM;
2136 if (nd->nd_flag & ND_NFSV3) {
2137 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
2138 if (!nd->nd_repstat)
2139 nd->nd_repstat = getret;
2140 }
2141
2142 /*
2143 * Handles the failed cases. nd->nd_repstat == 0 past here.
2144 */
2145 if (nd->nd_repstat) {
2146 vput(vp);
2147 free(rbuf, M_TEMP);
2148 if (cookies)
2149 free(cookies, M_TEMP);
2150 if (nd->nd_flag & ND_NFSV3)
2151 nfsrv_postopattr(nd, getret, &at);
2152 goto out;
2153 }
2154 /*
2155 * If nothing read, return eof
2156 * rpc reply
2157 */
2158 if (siz == 0) {
2159 vput(vp);
2160 if (nd->nd_flag & ND_NFSV2) {
2161 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2162 } else {
2163 nfsrv_postopattr(nd, getret, &at);
2164 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2165 txdr_hyper(at.na_filerev, tl);
2166 tl += 2;
2167 }
2168 *tl++ = newnfs_false;
2169 *tl = newnfs_true;
2170 free(rbuf, M_TEMP);
2171 free(cookies, M_TEMP);
2172 goto out;
2173 }
2174
2175 /*
2176 * Check for degenerate cases of nothing useful read.
2177 * If so go try again
2178 */
2179 cpos = rbuf;
2180 cend = rbuf + siz;
2181 dp = (struct dirent *)cpos;
2182 cookiep = cookies;
2183
2184 /*
2185 * For some reason FreeBSD's ufs_readdir() chooses to back the
2186 * directory offset up to a block boundary, so it is necessary to
2187 * skip over the records that precede the requested offset. This
2188 * requires the assumption that file offset cookies monotonically
2189 * increase.
2190 */
2191 while (cpos < cend && ncookies > 0 &&
2192 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
2193 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) {
2194 cpos += dp->d_reclen;
2195 dp = (struct dirent *)cpos;
2196 cookiep++;
2197 ncookies--;
2198 }
2199 if (cpos >= cend || ncookies == 0) {
2200 siz = fullsiz;
2201 toff = off;
2202 goto again;
2203 }
2204 vput(vp);
2205
2206 /*
2207 * If cnt > MCLBYTES and the reply will not be saved, use
2208 * ext_pgs mbufs for TLS.
2209 * For NFSv4.0, we do not know for sure if the reply will
2210 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
2211 */
2212 if (cnt > MCLBYTES && siz > MCLBYTES &&
2213 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS &&
2214 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)
2215 nd->nd_flag |= ND_EXTPG;
2216
2217 /*
2218 * dirlen is the size of the reply, including all XDR and must
2219 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
2220 * if the XDR should be included in "count", but to be safe, we do.
2221 * (Include the two booleans at the end of the reply in dirlen now.)
2222 */
2223 if (nd->nd_flag & ND_NFSV3) {
2224 nfsrv_postopattr(nd, getret, &at);
2225 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2226 txdr_hyper(at.na_filerev, tl);
2227 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
2228 } else {
2229 dirlen = 2 * NFSX_UNSIGNED;
2230 }
2231
2232 /* Loop through the records and build reply */
2233 while (cpos < cend && ncookies > 0) {
2234 nlen = dp->d_namlen;
2235 if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
2236 nlen <= NFS_MAXNAMLEN) {
2237 if (nd->nd_flag & ND_NFSV3)
2238 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
2239 else
2240 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
2241 if (dirlen > cnt) {
2242 eofflag = 0;
2243 break;
2244 }
2245
2246 /*
2247 * Build the directory record xdr from
2248 * the dirent entry.
2249 */
2250 if (nd->nd_flag & ND_NFSV3) {
2251 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2252 *tl++ = newnfs_true;
2253 *tl++ = 0;
2254 } else {
2255 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2256 *tl++ = newnfs_true;
2257 }
2258 *tl = txdr_unsigned(dp->d_fileno);
2259 (void) nfsm_strtom(nd, dp->d_name, nlen);
2260 if (nd->nd_flag & ND_NFSV3) {
2261 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2262 txdr_hyper(*cookiep, tl);
2263 } else {
2264 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2265 *tl = txdr_unsigned(*cookiep);
2266 }
2267 }
2268 cpos += dp->d_reclen;
2269 dp = (struct dirent *)cpos;
2270 cookiep++;
2271 ncookies--;
2272 }
2273 if (cpos < cend)
2274 eofflag = 0;
2275 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2276 *tl++ = newnfs_false;
2277 if (eofflag)
2278 *tl = newnfs_true;
2279 else
2280 *tl = newnfs_false;
2281 free(rbuf, M_TEMP);
2282 free(cookies, M_TEMP);
2283
2284 out:
2285 NFSEXITCODE2(0, nd);
2286 return (0);
2287 nfsmout:
2288 vput(vp);
2289 NFSEXITCODE2(error, nd);
2290 return (error);
2291 }
2292
2293 /*
2294 * Readdirplus for V3 and Readdir for V4.
2295 */
2296 int
2297 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
2298 struct vnode *vp, struct nfsexstuff *exp)
2299 {
2300 struct dirent *dp;
2301 u_int32_t *tl;
2302 int dirlen;
2303 char *cpos, *cend, *rbuf;
2304 struct vnode *nvp;
2305 fhandle_t nfh;
2306 struct nfsvattr nva, at, *nvap = &nva;
2307 struct mbuf *mb0, *mb1;
2308 struct nfsreferral *refp;
2309 int nlen, r, error = 0, getret = 1, usevget = 1;
2310 int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
2311 caddr_t bpos0, bpos1;
2312 u_int64_t off, toff, verf;
2313 u_long *cookies = NULL, *cookiep;
2314 nfsattrbit_t attrbits, rderrbits, savbits;
2315 struct uio io;
2316 struct iovec iv;
2317 struct componentname cn;
2318 int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls;
2319 struct mount *mp, *new_mp;
2320 uint64_t mounted_on_fileno;
2321 struct thread *p = curthread;
2322 int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1;
2323
2324 if (nd->nd_repstat) {
2325 nfsrv_postopattr(nd, getret, &at);
2326 goto out;
2327 }
2328 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
2329 off = fxdr_hyper(tl);
2330 toff = off;
2331 tl += 2;
2332 verf = fxdr_hyper(tl);
2333 tl += 2;
2334 siz = fxdr_unsigned(int, *tl++);
2335 cnt = fxdr_unsigned(int, *tl);
2336
2337 /*
2338 * Use the server's maximum data transfer size as the upper bound
2339 * on reply datalen.
2340 */
2341 if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
2342 cnt = NFS_SRVMAXDATA(nd);
2343
2344 /*
2345 * siz is a "hint" of how much directory information (name, fileid,
2346 * cookie) should be in the reply. At least one client "hints" 0,
2347 * so I set it to cnt for that case. I also round it up to the
2348 * next multiple of DIRBLKSIZ.
2349 * Since the size of a Readdirplus directory entry reply will always
2350 * be greater than a directory entry returned by VOP_READDIR(), it
2351 * does not make sense to read more than NFS_SRVMAXDATA() via
2352 * VOP_READDIR().
2353 */
2354 if (siz <= 0)
2355 siz = cnt;
2356 else if (siz > NFS_SRVMAXDATA(nd))
2357 siz = NFS_SRVMAXDATA(nd);
2358 siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2359
2360 if (nd->nd_flag & ND_NFSV4) {
2361 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2362 if (error)
2363 goto nfsmout;
2364 NFSSET_ATTRBIT(&savbits, &attrbits);
2365 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd);
2366 NFSZERO_ATTRBIT(&rderrbits);
2367 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
2368 } else {
2369 NFSZERO_ATTRBIT(&attrbits);
2370 }
2371 fullsiz = siz;
2372 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
2373 #if 0
2374 if (!nd->nd_repstat) {
2375 if (off && verf != at.na_filerev) {
2376 /*
2377 * va_filerev is not sufficient as a cookie verifier,
2378 * since it is not supposed to change when entries are
2379 * removed/added unless that offset cookies returned to
2380 * the client are no longer valid.
2381 */
2382 if (nd->nd_flag & ND_NFSV4) {
2383 nd->nd_repstat = NFSERR_NOTSAME;
2384 } else {
2385 nd->nd_repstat = NFSERR_BAD_COOKIE;
2386 }
2387 }
2388 }
2389 #endif
2390 if (!nd->nd_repstat && vp->v_type != VDIR)
2391 nd->nd_repstat = NFSERR_NOTDIR;
2392 if (!nd->nd_repstat && cnt == 0)
2393 nd->nd_repstat = NFSERR_TOOSMALL;
2394 if (!nd->nd_repstat)
2395 nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
2396 nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
2397 NFSACCCHK_VPISLOCKED, NULL);
2398 if (nd->nd_repstat) {
2399 vput(vp);
2400 if (nd->nd_flag & ND_NFSV3)
2401 nfsrv_postopattr(nd, getret, &at);
2402 goto out;
2403 }
2404 is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
2405 is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0;
2406
2407 rbuf = malloc(siz, M_TEMP, M_WAITOK);
2408 again:
2409 eofflag = 0;
2410 if (cookies) {
2411 free(cookies, M_TEMP);
2412 cookies = NULL;
2413 }
2414
2415 iv.iov_base = rbuf;
2416 iv.iov_len = siz;
2417 io.uio_iov = &iv;
2418 io.uio_iovcnt = 1;
2419 io.uio_offset = (off_t)off;
2420 io.uio_resid = siz;
2421 io.uio_segflg = UIO_SYSSPACE;
2422 io.uio_rw = UIO_READ;
2423 io.uio_td = NULL;
2424 nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
2425 &cookies);
2426 off = (u_int64_t)io.uio_offset;
2427 if (io.uio_resid)
2428 siz -= io.uio_resid;
2429
2430 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
2431
2432 if (!cookies && !nd->nd_repstat)
2433 nd->nd_repstat = NFSERR_PERM;
2434 if (!nd->nd_repstat)
2435 nd->nd_repstat = getret;
2436 if (nd->nd_repstat) {
2437 vput(vp);
2438 if (cookies)
2439 free(cookies, M_TEMP);
2440 free(rbuf, M_TEMP);
2441 if (nd->nd_flag & ND_NFSV3)
2442 nfsrv_postopattr(nd, getret, &at);
2443 goto out;
2444 }
2445 /*
2446 * If nothing read, return eof
2447 * rpc reply
2448 */
2449 if (siz == 0) {
2450 vput(vp);
2451 if (nd->nd_flag & ND_NFSV3)
2452 nfsrv_postopattr(nd, getret, &at);
2453 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2454 txdr_hyper(at.na_filerev, tl);
2455 tl += 2;
2456 *tl++ = newnfs_false;
2457 *tl = newnfs_true;
2458 free(cookies, M_TEMP);
2459 free(rbuf, M_TEMP);
2460 goto out;
2461 }
2462
2463 /*
2464 * Check for degenerate cases of nothing useful read.
2465 * If so go try again
2466 */
2467 cpos = rbuf;
2468 cend = rbuf + siz;
2469 dp = (struct dirent *)cpos;
2470 cookiep = cookies;
2471
2472 /*
2473 * For some reason FreeBSD's ufs_readdir() chooses to back the
2474 * directory offset up to a block boundary, so it is necessary to
2475 * skip over the records that precede the requested offset. This
2476 * requires the assumption that file offset cookies monotonically
2477 * increase.
2478 */
2479 while (cpos < cend && ncookies > 0 &&
2480 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
2481 (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) ||
2482 ((nd->nd_flag & ND_NFSV4) &&
2483 ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
2484 (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
2485 cpos += dp->d_reclen;
2486 dp = (struct dirent *)cpos;
2487 cookiep++;
2488 ncookies--;
2489 }
2490 if (cpos >= cend || ncookies == 0) {
2491 siz = fullsiz;
2492 toff = off;
2493 goto again;
2494 }
2495
2496 /*
2497 * Busy the file system so that the mount point won't go away
2498 * and, as such, VFS_VGET() can be used safely.
2499 */
2500 mp = vp->v_mount;
2501 vfs_ref(mp);
2502 NFSVOPUNLOCK(vp);
2503 nd->nd_repstat = vfs_busy(mp, 0);
2504 vfs_rel(mp);
2505 if (nd->nd_repstat != 0) {
2506 vrele(vp);
2507 free(cookies, M_TEMP);
2508 free(rbuf, M_TEMP);
2509 if (nd->nd_flag & ND_NFSV3)
2510 nfsrv_postopattr(nd, getret, &at);
2511 goto out;
2512 }
2513
2514 /*
2515 * Check to see if entries in this directory can be safely acquired
2516 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required.
2517 * ZFS snapshot directories need VOP_LOOKUP(), so that any
2518 * automount of the snapshot directory that is required will
2519 * be done.
2520 * This needs to be done here for NFSv4, since NFSv4 never does
2521 * a VFS_VGET() for "." or "..".
2522 */
2523 if (is_zfs == 1) {
2524 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp);
2525 if (r == EOPNOTSUPP) {
2526 usevget = 0;
2527 cn.cn_nameiop = LOOKUP;
2528 cn.cn_lkflags = LK_SHARED | LK_RETRY;
2529 cn.cn_cred = nd->nd_cred;
2530 cn.cn_thread = p;
2531 } else if (r == 0)
2532 vput(nvp);
2533 }
2534
2535 /*
2536 * If the reply is likely to exceed MCLBYTES and the reply will
2537 * not be saved, use ext_pgs mbufs for TLS.
2538 * It is difficult to predict how large each entry will be and
2539 * how many entries have been read, so just assume the directory
2540 * entries grow by a factor of 4 when attributes are included.
2541 * For NFSv4.0, we do not know for sure if the reply will
2542 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
2543 */
2544 if (cnt > MCLBYTES && siz > MCLBYTES / 4 &&
2545 (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS &&
2546 (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)
2547 nd->nd_flag |= ND_EXTPG;
2548
2549 /*
2550 * Save this position, in case there is an error before one entry
2551 * is created.
2552 */
2553 mb0 = nd->nd_mb;
2554 bpos0 = nd->nd_bpos;
2555 bextpg0 = nd->nd_bextpg;
2556 bextpgsiz0 = nd->nd_bextpgsiz;
2557
2558 /*
2559 * Fill in the first part of the reply.
2560 * dirlen is the reply length in bytes and cannot exceed cnt.
2561 * (Include the two booleans at the end of the reply in dirlen now,
2562 * so we recognize when we have exceeded cnt.)
2563 */
2564 if (nd->nd_flag & ND_NFSV3) {
2565 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
2566 nfsrv_postopattr(nd, getret, &at);
2567 } else {
2568 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
2569 }
2570 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2571 txdr_hyper(at.na_filerev, tl);
2572
2573 /*
2574 * Save this position, in case there is an empty reply needed.
2575 */
2576 mb1 = nd->nd_mb;
2577 bpos1 = nd->nd_bpos;
2578 bextpg1 = nd->nd_bextpg;
2579 bextpgsiz1 = nd->nd_bextpgsiz;
2580
2581 /* Loop through the records and build reply */
2582 entrycnt = 0;
2583 while (cpos < cend && ncookies > 0 && dirlen < cnt) {
2584 nlen = dp->d_namlen;
2585 if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
2586 nlen <= NFS_MAXNAMLEN &&
2587 ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
2588 (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
2589 || (nlen == 1 && dp->d_name[0] != '.'))) {
2590 /*
2591 * Save the current position in the reply, in case
2592 * this entry exceeds cnt.
2593 */
2594 mb1 = nd->nd_mb;
2595 bpos1 = nd->nd_bpos;
2596 bextpg1 = nd->nd_bextpg;
2597 bextpgsiz1 = nd->nd_bextpgsiz;
2598
2599 /*
2600 * For readdir_and_lookup get the vnode using
2601 * the file number.
2602 */
2603 nvp = NULL;
2604 refp = NULL;
2605 r = 0;
2606 at_root = 0;
2607 needs_unbusy = 0;
2608 new_mp = mp;
2609 mounted_on_fileno = (uint64_t)dp->d_fileno;
2610 if ((nd->nd_flag & ND_NFSV3) ||
2611 NFSNONZERO_ATTRBIT(&savbits)) {
2612 if (nd->nd_flag & ND_NFSV4)
2613 refp = nfsv4root_getreferral(NULL,
2614 vp, dp->d_fileno);
2615 if (refp == NULL) {
2616 if (usevget)
2617 r = VFS_VGET(mp, dp->d_fileno,
2618 LK_SHARED, &nvp);
2619 else
2620 r = EOPNOTSUPP;
2621 if (r == EOPNOTSUPP) {
2622 if (usevget) {
2623 usevget = 0;
2624 cn.cn_nameiop = LOOKUP;
2625 cn.cn_lkflags =
2626 LK_SHARED |
2627 LK_RETRY;
2628 cn.cn_cred =
2629 nd->nd_cred;
2630 cn.cn_thread = p;
2631 }
2632 cn.cn_nameptr = dp->d_name;
2633 cn.cn_namelen = nlen;
2634 cn.cn_flags = ISLASTCN |
2635 NOFOLLOW | LOCKLEAF;
2636 if (nlen == 2 &&
2637 dp->d_name[0] == '.' &&
2638 dp->d_name[1] == '.')
2639 cn.cn_flags |=
2640 ISDOTDOT;
2641 if (NFSVOPLOCK(vp, LK_SHARED)
2642 != 0) {
2643 nd->nd_repstat = EPERM;
2644 break;
2645 }
2646 if ((vp->v_vflag & VV_ROOT) != 0
2647 && (cn.cn_flags & ISDOTDOT)
2648 != 0) {
2649 vref(vp);
2650 nvp = vp;
2651 r = 0;
2652 } else {
2653 r = VOP_LOOKUP(vp, &nvp,
2654 &cn);
2655 if (vp != nvp)
2656 NFSVOPUNLOCK(vp);
2657 }
2658 }
2659
2660 /*
2661 * For NFSv4, check to see if nvp is
2662 * a mount point and get the mount
2663 * point vnode, as required.
2664 */
2665 if (r == 0 &&
2666 nfsrv_enable_crossmntpt != 0 &&
2667 (nd->nd_flag & ND_NFSV4) != 0 &&
2668 nvp->v_type == VDIR &&
2669 nvp->v_mountedhere != NULL) {
2670 new_mp = nvp->v_mountedhere;
2671 r = vfs_busy(new_mp, 0);
2672 vput(nvp);
2673 nvp = NULL;
2674 if (r == 0) {
2675 r = VFS_ROOT(new_mp,
2676 LK_SHARED, &nvp);
2677 needs_unbusy = 1;
2678 if (r == 0)
2679 at_root = 1;
2680 }
2681 }
2682 }
2683
2684 /*
2685 * If we failed to look up the entry, then it
2686 * has become invalid, most likely removed.
2687 */
2688 if (r != 0) {
2689 if (needs_unbusy)
2690 vfs_unbusy(new_mp);
2691 goto invalid;
2692 }
2693 KASSERT(refp != NULL || nvp != NULL,
2694 ("%s: undetected lookup error", __func__));
2695
2696 if (refp == NULL &&
2697 ((nd->nd_flag & ND_NFSV3) ||
2698 NFSNONZERO_ATTRBIT(&attrbits))) {
2699 r = nfsvno_getfh(nvp, &nfh, p);
2700 if (!r)
2701 r = nfsvno_getattr(nvp, nvap, nd, p,
2702 1, &attrbits);
2703 if (r == 0 && is_zfs == 1 &&
2704 nfsrv_enable_crossmntpt != 0 &&
2705 (nd->nd_flag & ND_NFSV4) != 0 &&
2706 nvp->v_type == VDIR &&
2707 vp->v_mount != nvp->v_mount) {
2708 /*
2709 * For a ZFS snapshot, there is a
2710 * pseudo mount that does not set
2711 * v_mountedhere, so it needs to
2712 * be detected via a different
2713 * mount structure.
2714 */
2715 at_root = 1;
2716 if (new_mp == mp)
2717 new_mp = nvp->v_mount;
2718 }
2719 }
2720
2721 /*
2722 * If we failed to get attributes of the entry,
2723 * then just skip it for NFSv3 (the traditional
2724 * behavior in the old NFS server).
2725 * For NFSv4 the behavior is controlled by
2726 * RDATTRERROR: we either ignore the error or
2727 * fail the request.
2728 * The exception is EOPNOTSUPP, which can be
2729 * returned by nfsvno_getfh() for certain
2730 * file systems, such as devfs. This indicates
2731 * that the file system cannot be exported,
2732 * so just skip over the entry.
2733 * Note that RDATTRERROR is never set for NFSv3.
2734 */
2735 if (r != 0) {
2736 if (!NFSISSET_ATTRBIT(&attrbits,
2737 NFSATTRBIT_RDATTRERROR) ||
2738 r == EOPNOTSUPP) {
2739 vput(nvp);
2740 if (needs_unbusy != 0)
2741 vfs_unbusy(new_mp);
2742 if ((nd->nd_flag & ND_NFSV3) ||
2743 r == EOPNOTSUPP)
2744 goto invalid;
2745 nd->nd_repstat = r;
2746 break;
2747 }
2748 }
2749 }
2750
2751 /*
2752 * Build the directory record xdr
2753 */
2754 if (nd->nd_flag & ND_NFSV3) {
2755 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2756 *tl++ = newnfs_true;
2757 *tl++ = 0;
2758 *tl = txdr_unsigned(dp->d_fileno);
2759 dirlen += nfsm_strtom(nd, dp->d_name, nlen);
2760 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2761 txdr_hyper(*cookiep, tl);
2762 nfsrv_postopattr(nd, 0, nvap);
2763 dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
2764 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
2765 if (nvp != NULL)
2766 vput(nvp);
2767 } else {
2768 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2769 *tl++ = newnfs_true;
2770 txdr_hyper(*cookiep, tl);
2771 dirlen += nfsm_strtom(nd, dp->d_name, nlen);
2772 if (nvp != NULL) {
2773 supports_nfsv4acls =
2774 nfs_supportsnfsv4acls(nvp);
2775 NFSVOPUNLOCK(nvp);
2776 } else
2777 supports_nfsv4acls = 0;
2778 if (refp != NULL) {
2779 dirlen += nfsrv_putreferralattr(nd,
2780 &savbits, refp, 0,
2781 &nd->nd_repstat);
2782 if (nd->nd_repstat) {
2783 if (nvp != NULL)
2784 vrele(nvp);
2785 if (needs_unbusy != 0)
2786 vfs_unbusy(new_mp);
2787 break;
2788 }
2789 } else if (r) {
2790 dirlen += nfsvno_fillattr(nd, new_mp,
2791 nvp, nvap, &nfh, r, &rderrbits,
2792 nd->nd_cred, p, isdgram, 0,
2793 supports_nfsv4acls, at_root,
2794 mounted_on_fileno);
2795 } else {
2796 dirlen += nfsvno_fillattr(nd, new_mp,
2797 nvp, nvap, &nfh, r, &attrbits,
2798 nd->nd_cred, p, isdgram, 0,
2799 supports_nfsv4acls, at_root,
2800 mounted_on_fileno);
2801 }
2802 if (nvp != NULL)
2803 vrele(nvp);
2804 dirlen += (3 * NFSX_UNSIGNED);
2805 }
2806 if (needs_unbusy != 0)
2807 vfs_unbusy(new_mp);
2808 if (dirlen <= cnt)
2809 entrycnt++;
2810 }
2811 invalid:
2812 cpos += dp->d_reclen;
2813 dp = (struct dirent *)cpos;
2814 cookiep++;
2815 ncookies--;
2816 }
2817 vrele(vp);
2818 vfs_unbusy(mp);
2819
2820 /*
2821 * If dirlen > cnt, we must strip off the last entry. If that
2822 * results in an empty reply, report NFSERR_TOOSMALL.
2823 */
2824 if (dirlen > cnt || nd->nd_repstat) {
2825 if (!nd->nd_repstat && entrycnt == 0)
2826 nd->nd_repstat = NFSERR_TOOSMALL;
2827 if (nd->nd_repstat) {
2828 nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0);
2829 if (nd->nd_flag & ND_NFSV3)
2830 nfsrv_postopattr(nd, getret, &at);
2831 } else
2832 nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1);
2833 eofflag = 0;
2834 } else if (cpos < cend)
2835 eofflag = 0;
2836 if (!nd->nd_repstat) {
2837 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2838 *tl++ = newnfs_false;
2839 if (eofflag)
2840 *tl = newnfs_true;
2841 else
2842 *tl = newnfs_false;
2843 }
2844 free(cookies, M_TEMP);
2845 free(rbuf, M_TEMP);
2846
2847 out:
2848 NFSEXITCODE2(0, nd);
2849 return (0);
2850 nfsmout:
2851 vput(vp);
2852 NFSEXITCODE2(error, nd);
2853 return (error);
2854 }
2855
2856 /*
2857 * Get the settable attributes out of the mbuf list.
2858 * (Return 0 or EBADRPC)
2859 */
2860 int
2861 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
2862 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2863 {
2864 u_int32_t *tl;
2865 struct nfsv2_sattr *sp;
2866 int error = 0, toclient = 0;
2867
2868 switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
2869 case ND_NFSV2:
2870 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2871 /*
2872 * Some old clients didn't fill in the high order 16bits.
2873 * --> check the low order 2 bytes for 0xffff
2874 */
2875 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
2876 nvap->na_mode = nfstov_mode(sp->sa_mode);
2877 if (sp->sa_uid != newnfs_xdrneg1)
2878 nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
2879 if (sp->sa_gid != newnfs_xdrneg1)
2880 nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
2881 if (sp->sa_size != newnfs_xdrneg1)
2882 nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
2883 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
2884 #ifdef notyet
2885 fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
2886 #else
2887 nvap->na_atime.tv_sec =
2888 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
2889 nvap->na_atime.tv_nsec = 0;
2890 #endif
2891 }
2892 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
2893 fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
2894 break;
2895 case ND_NFSV3:
2896 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2897 if (*tl == newnfs_true) {
2898 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2899 nvap->na_mode = nfstov_mode(*tl);
2900 }
2901 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2902 if (*tl == newnfs_true) {
2903 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2904 nvap->na_uid = fxdr_unsigned(uid_t, *tl);
2905 }
2906 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2907 if (*tl == newnfs_true) {
2908 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2909 nvap->na_gid = fxdr_unsigned(gid_t, *tl);
2910 }
2911 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2912 if (*tl == newnfs_true) {
2913 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2914 nvap->na_size = fxdr_hyper(tl);
2915 }
2916 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2917 switch (fxdr_unsigned(int, *tl)) {
2918 case NFSV3SATTRTIME_TOCLIENT:
2919 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2920 fxdr_nfsv3time(tl, &nvap->na_atime);
2921 toclient = 1;
2922 break;
2923 case NFSV3SATTRTIME_TOSERVER:
2924 vfs_timestamp(&nvap->na_atime);
2925 nvap->na_vaflags |= VA_UTIMES_NULL;
2926 break;
2927 }
2928 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2929 switch (fxdr_unsigned(int, *tl)) {
2930 case NFSV3SATTRTIME_TOCLIENT:
2931 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2932 fxdr_nfsv3time(tl, &nvap->na_mtime);
2933 nvap->na_vaflags &= ~VA_UTIMES_NULL;
2934 break;
2935 case NFSV3SATTRTIME_TOSERVER:
2936 vfs_timestamp(&nvap->na_mtime);
2937 if (!toclient)
2938 nvap->na_vaflags |= VA_UTIMES_NULL;
2939 break;
2940 }
2941 break;
2942 case ND_NFSV4:
2943 error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p);
2944 }
2945 nfsmout:
2946 NFSEXITCODE2(error, nd);
2947 return (error);
2948 }
2949
2950 /*
2951 * Handle the setable attributes for V4.
2952 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
2953 */
2954 int
2955 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
2956 nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2957 {
2958 u_int32_t *tl;
2959 int attrsum = 0;
2960 int i, j;
2961 int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
2962 int moderet, toclient = 0;
2963 u_char *cp, namestr[NFSV4_SMALLSTR + 1];
2964 uid_t uid;
2965 gid_t gid;
2966 u_short mode, mask; /* Same type as va_mode. */
2967 struct vattr va;
2968
2969 error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
2970 if (error)
2971 goto nfsmout;
2972 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2973 attrsize = fxdr_unsigned(int, *tl);
2974
2975 /*
2976 * Loop around getting the setable attributes. If an unsupported
2977 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
2978 */
2979 if (retnotsup) {
2980 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2981 bitpos = NFSATTRBIT_MAX;
2982 } else {
2983 bitpos = 0;
2984 }
2985 moderet = 0;
2986 for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
2987 if (attrsum > attrsize) {
2988 error = NFSERR_BADXDR;
2989 goto nfsmout;
2990 }
2991 if (NFSISSET_ATTRBIT(attrbitp, bitpos))
2992 switch (bitpos) {
2993 case NFSATTRBIT_SIZE:
2994 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
2995 if (vp != NULL && vp->v_type != VREG) {
2996 error = (vp->v_type == VDIR) ? NFSERR_ISDIR :
2997 NFSERR_INVAL;
2998 goto nfsmout;
2999 }
3000 nvap->na_size = fxdr_hyper(tl);
3001 attrsum += NFSX_HYPER;
3002 break;
3003 case NFSATTRBIT_ACL:
3004 error = nfsrv_dissectacl(nd, aclp, true, &aceerr,
3005 &aclsize, p);
3006 if (error)
3007 goto nfsmout;
3008 if (aceerr && !nd->nd_repstat)
3009 nd->nd_repstat = aceerr;
3010 attrsum += aclsize;
3011 break;
3012 case NFSATTRBIT_ARCHIVE:
3013 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3014 if (!nd->nd_repstat)
3015 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3016 attrsum += NFSX_UNSIGNED;
3017 break;
3018 case NFSATTRBIT_HIDDEN:
3019 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3020 if (!nd->nd_repstat)
3021 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3022 attrsum += NFSX_UNSIGNED;
3023 break;
3024 case NFSATTRBIT_MIMETYPE:
3025 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3026 i = fxdr_unsigned(int, *tl);
3027 error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3028 if (error)
3029 goto nfsmout;
3030 if (!nd->nd_repstat)
3031 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3032 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
3033 break;
3034 case NFSATTRBIT_MODE:
3035 moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */
3036 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3037 nvap->na_mode = nfstov_mode(*tl);
3038 attrsum += NFSX_UNSIGNED;
3039 break;
3040 case NFSATTRBIT_OWNER:
3041 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3042 j = fxdr_unsigned(int, *tl);
3043 if (j < 0) {
3044 error = NFSERR_BADXDR;
3045 goto nfsmout;
3046 }
3047 if (j > NFSV4_SMALLSTR)
3048 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
3049 else
3050 cp = namestr;
3051 error = nfsrv_mtostr(nd, cp, j);
3052 if (error) {
3053 if (j > NFSV4_SMALLSTR)
3054 free(cp, M_NFSSTRING);
3055 goto nfsmout;
3056 }
3057 if (!nd->nd_repstat) {
3058 nd->nd_repstat = nfsv4_strtouid(nd, cp, j,
3059 &uid);
3060 if (!nd->nd_repstat)
3061 nvap->na_uid = uid;
3062 }
3063 if (j > NFSV4_SMALLSTR)
3064 free(cp, M_NFSSTRING);
3065 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
3066 break;
3067 case NFSATTRBIT_OWNERGROUP:
3068 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3069 j = fxdr_unsigned(int, *tl);
3070 if (j < 0) {
3071 error = NFSERR_BADXDR;
3072 goto nfsmout;
3073 }
3074 if (j > NFSV4_SMALLSTR)
3075 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
3076 else
3077 cp = namestr;
3078 error = nfsrv_mtostr(nd, cp, j);
3079 if (error) {
3080 if (j > NFSV4_SMALLSTR)
3081 free(cp, M_NFSSTRING);
3082 goto nfsmout;
3083 }
3084 if (!nd->nd_repstat) {
3085 nd->nd_repstat = nfsv4_strtogid(nd, cp, j,
3086 &gid);
3087 if (!nd->nd_repstat)
3088 nvap->na_gid = gid;
3089 }
3090 if (j > NFSV4_SMALLSTR)
3091 free(cp, M_NFSSTRING);
3092 attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
3093 break;
3094 case NFSATTRBIT_SYSTEM:
3095 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3096 if (!nd->nd_repstat)
3097 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3098 attrsum += NFSX_UNSIGNED;
3099 break;
3100 case NFSATTRBIT_TIMEACCESSSET:
3101 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3102 attrsum += NFSX_UNSIGNED;
3103 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
3104 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
3105 fxdr_nfsv4time(tl, &nvap->na_atime);
3106 toclient = 1;
3107 attrsum += NFSX_V4TIME;
3108 } else {
3109 vfs_timestamp(&nvap->na_atime);
3110 nvap->na_vaflags |= VA_UTIMES_NULL;
3111 }
3112 break;
3113 case NFSATTRBIT_TIMEBACKUP:
3114 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
3115 if (!nd->nd_repstat)
3116 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3117 attrsum += NFSX_V4TIME;
3118 break;
3119 case NFSATTRBIT_TIMECREATE:
3120 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
3121 fxdr_nfsv4time(tl, &nvap->na_btime);
3122 attrsum += NFSX_V4TIME;
3123 break;
3124 case NFSATTRBIT_TIMEMODIFYSET:
3125 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3126 attrsum += NFSX_UNSIGNED;
3127 if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
3128 NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
3129 fxdr_nfsv4time(tl, &nvap->na_mtime);
3130 nvap->na_vaflags &= ~VA_UTIMES_NULL;
3131 attrsum += NFSX_V4TIME;
3132 } else {
3133 vfs_timestamp(&nvap->na_mtime);
3134 if (!toclient)
3135 nvap->na_vaflags |= VA_UTIMES_NULL;
3136 }
3137 break;
3138 case NFSATTRBIT_MODESETMASKED:
3139 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
3140 mode = fxdr_unsigned(u_short, *tl++);
3141 mask = fxdr_unsigned(u_short, *tl);
3142 /*
3143 * vp == NULL implies an Open/Create operation.
3144 * This attribute can only be used for Setattr and
3145 * only for NFSv4.1 or higher.
3146 * If moderet != 0, a mode attribute has also been
3147 * specified and this attribute cannot be done in the
3148 * same Setattr operation.
3149 */
3150 if ((nd->nd_flag & ND_NFSV41) == 0)
3151 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3152 else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 ||
3153 vp == NULL)
3154 nd->nd_repstat = NFSERR_INVAL;
3155 else if (moderet == 0)
3156 moderet = VOP_GETATTR(vp, &va, nd->nd_cred);
3157 if (moderet == 0)
3158 nvap->na_mode = (mode & mask) |
3159 (va.va_mode & ~mask);
3160 else
3161 nd->nd_repstat = moderet;
3162 attrsum += 2 * NFSX_UNSIGNED;
3163 break;
3164 default:
3165 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
3166 /*
3167 * set bitpos so we drop out of the loop.
3168 */
3169 bitpos = NFSATTRBIT_MAX;
3170 break;
3171 }
3172 }
3173
3174 /*
3175 * some clients pad the attrlist, so we need to skip over the
3176 * padding.
3177 */
3178 if (attrsum > attrsize) {
3179 error = NFSERR_BADXDR;
3180 } else {
3181 attrsize = NFSM_RNDUP(attrsize);
3182 if (attrsum < attrsize)
3183 error = nfsm_advance(nd, attrsize - attrsum, -1);
3184 }
3185 nfsmout:
3186 NFSEXITCODE2(error, nd);
3187 return (error);
3188 }
3189
3190 /*
3191 * Check/setup export credentials.
3192 */
3193 int
3194 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
3195 struct ucred *credanon, bool testsec)
3196 {
3197 int error;
3198
3199 /*
3200 * Check/setup credentials.
3201 */
3202 if (nd->nd_flag & ND_GSS)
3203 exp->nes_exflag &= ~MNT_EXPORTANON;
3204
3205 /*
3206 * Check to see if the operation is allowed for this security flavor.
3207 */
3208 error = 0;
3209 if (testsec) {
3210 error = nfsvno_testexp(nd, exp);
3211 if (error != 0)
3212 goto out;
3213 }
3214
3215 /*
3216 * Check to see if the file system is exported V4 only.
3217 */
3218 if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) {
3219 error = NFSERR_PROGNOTV4;
3220 goto out;
3221 }
3222
3223 /*
3224 * Now, map the user credentials.
3225 * (Note that ND_AUTHNONE will only be set for an NFSv3
3226 * Fsinfo RPC. If set for anything else, this code might need
3227 * to change.)
3228 */
3229 if (NFSVNO_EXPORTED(exp)) {
3230 if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) ||
3231 NFSVNO_EXPORTANON(exp) ||
3232 (nd->nd_flag & ND_AUTHNONE) != 0) {
3233 nd->nd_cred->cr_uid = credanon->cr_uid;
3234 nd->nd_cred->cr_gid = credanon->cr_gid;
3235 crsetgroups(nd->nd_cred, credanon->cr_ngroups,
3236 credanon->cr_groups);
3237 } else if ((nd->nd_flag & ND_GSS) == 0) {
3238 /*
3239 * If using AUTH_SYS, call nfsrv_getgrpscred() to see
3240 * if there is a replacement credential with a group
3241 * list set up by "nfsuserd -manage-gids".
3242 * If there is no replacement, nfsrv_getgrpscred()
3243 * simply returns its argument.
3244 */
3245 nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred);
3246 }
3247 }
3248
3249 out:
3250 NFSEXITCODE2(error, nd);
3251 return (error);
3252 }
3253
3254 /*
3255 * Check exports.
3256 */
3257 int
3258 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
3259 struct ucred **credp)
3260 {
3261 int error;
3262
3263 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
3264 &exp->nes_numsecflavor, exp->nes_secflavors);
3265 if (error) {
3266 if (nfs_rootfhset) {
3267 exp->nes_exflag = 0;
3268 exp->nes_numsecflavor = 0;
3269 error = 0;
3270 }
3271 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
3272 MAXSECFLAVORS) {
3273 printf("nfsvno_checkexp: numsecflavors out of range\n");
3274 exp->nes_numsecflavor = 0;
3275 error = EACCES;
3276 }
3277 NFSEXITCODE(error);
3278 return (error);
3279 }
3280
3281 /*
3282 * Get a vnode for a file handle and export stuff.
3283 */
3284 int
3285 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
3286 int lktype, struct vnode **vpp, struct nfsexstuff *exp,
3287 struct ucred **credp)
3288 {
3289 int error;
3290
3291 *credp = NULL;
3292 exp->nes_numsecflavor = 0;
3293 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp);
3294 if (error != 0)
3295 /* Make sure the server replies ESTALE to the client. */
3296 error = ESTALE;
3297 if (nam && !error) {
3298 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
3299 &exp->nes_numsecflavor, exp->nes_secflavors);
3300 if (error) {
3301 if (nfs_rootfhset) {
3302 exp->nes_exflag = 0;
3303 exp->nes_numsecflavor = 0;
3304 error = 0;
3305 } else {
3306 vput(*vpp);
3307 }
3308 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
3309 MAXSECFLAVORS) {
3310 printf("nfsvno_fhtovp: numsecflavors out of range\n");
3311 exp->nes_numsecflavor = 0;
3312 error = EACCES;
3313 vput(*vpp);
3314 }
3315 }
3316 NFSEXITCODE(error);
3317 return (error);
3318 }
3319
3320 /*
3321 * nfsd_fhtovp() - convert a fh to a vnode ptr
3322 * - look up fsid in mount list (if not found ret error)
3323 * - get vp and export rights by calling nfsvno_fhtovp()
3324 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
3325 * for AUTH_SYS
3326 * - if mpp != NULL, return the mount point so that it can
3327 * be used for vn_finished_write() by the caller
3328 */
3329 void
3330 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
3331 struct vnode **vpp, struct nfsexstuff *exp,
3332 struct mount **mpp, int startwrite, int nextop)
3333 {
3334 struct mount *mp, *mpw;
3335 struct ucred *credanon;
3336 fhandle_t *fhp;
3337 int error;
3338
3339 if (mpp != NULL)
3340 *mpp = NULL;
3341 *vpp = NULL;
3342 fhp = (fhandle_t *)nfp->nfsrvfh_data;
3343 mp = vfs_busyfs(&fhp->fh_fsid);
3344 if (mp == NULL) {
3345 nd->nd_repstat = ESTALE;
3346 goto out;
3347 }
3348
3349 if (startwrite) {
3350 mpw = mp;
3351 error = vn_start_write(NULL, &mpw, V_WAIT);
3352 if (error != 0) {
3353 mpw = NULL;
3354 vfs_unbusy(mp);
3355 nd->nd_repstat = ESTALE;
3356 goto out;
3357 }
3358 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp)))
3359 lktype = LK_EXCLUSIVE;
3360 } else
3361 mpw = NULL;
3362
3363 nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp,
3364 &credanon);
3365 vfs_unbusy(mp);
3366
3367 /*
3368 * For NFSv4 without a pseudo root fs, unexported file handles
3369 * can be returned, so that Lookup works everywhere.
3370 */
3371 if (!nd->nd_repstat && exp->nes_exflag == 0 &&
3372 !(nd->nd_flag & ND_NFSV4)) {
3373 vput(*vpp);
3374 *vpp = NULL;
3375 nd->nd_repstat = EACCES;
3376 }
3377
3378 /*
3379 * Personally, I've never seen any point in requiring a
3380 * reserved port#, since only in the rare case where the
3381 * clients are all boxes with secure system privileges,
3382 * does it provide any enhanced security, but... some people
3383 * believe it to be useful and keep putting this code back in.
3384 * (There is also some "security checker" out there that
3385 * complains if the nfs server doesn't enforce this.)
3386 * However, note the following:
3387 * RFC3530 (NFSv4) specifies that a reserved port# not be
3388 * required.
3389 * RFC2623 recommends that, if a reserved port# is checked for,
3390 * that there be a way to turn that off--> ifdef'd.
3391 */
3392 #ifdef NFS_REQRSVPORT
3393 if (!nd->nd_repstat) {
3394 struct sockaddr_in *saddr;
3395 struct sockaddr_in6 *saddr6;
3396
3397 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
3398 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
3399 if (!(nd->nd_flag & ND_NFSV4) &&
3400 ((saddr->sin_family == AF_INET &&
3401 ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
3402 (saddr6->sin6_family == AF_INET6 &&
3403 ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
3404 vput(*vpp);
3405 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
3406 }
3407 }
3408 #endif /* NFS_REQRSVPORT */
3409
3410 /*
3411 * Check/setup credentials.
3412 */
3413 if (!nd->nd_repstat) {
3414 nd->nd_saveduid = nd->nd_cred->cr_uid;
3415 nd->nd_repstat = nfsd_excred(nd, exp, credanon,
3416 nfsrv_checkwrongsec(nd, nextop, (*vpp)->v_type));
3417 if (nd->nd_repstat)
3418 vput(*vpp);
3419 }
3420 if (credanon != NULL)
3421 crfree(credanon);
3422 if (nd->nd_repstat) {
3423 vn_finished_write(mpw);
3424 *vpp = NULL;
3425 } else if (mpp != NULL) {
3426 *mpp = mpw;
3427 }
3428
3429 out:
3430 NFSEXITCODE2(0, nd);
3431 }
3432
3433 /*
3434 * glue for fp.
3435 */
3436 static int
3437 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
3438 {
3439 struct filedesc *fdp;
3440 struct file *fp;
3441 int error = 0;
3442
3443 fdp = p->td_proc->p_fd;
3444 if (fd < 0 || fd >= fdp->fd_nfiles ||
3445 (fp = fdp->fd_ofiles[fd].fde_file) == NULL) {
3446 error = EBADF;
3447 goto out;
3448 }
3449 *fpp = fp;
3450
3451 out:
3452 NFSEXITCODE(error);
3453 return (error);
3454 }
3455
3456 /*
3457 * Called from nfssvc() to update the exports list. Just call
3458 * vfs_export(). This has to be done, since the v4 root fake fs isn't
3459 * in the mount list.
3460 */
3461 int
3462 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
3463 {
3464 struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
3465 int error = 0;
3466 struct nameidata nd;
3467 fhandle_t fh;
3468
3469 error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
3470 if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
3471 nfs_rootfhset = 0;
3472 else if (error == 0) {
3473 if (nfsexargp->fspec == NULL) {
3474 error = EPERM;
3475 goto out;
3476 }
3477 /*
3478 * If fspec != NULL, this is the v4root path.
3479 */
3480 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
3481 nfsexargp->fspec, p);
3482 if ((error = namei(&nd)) != 0)
3483 goto out;
3484 error = nfsvno_getfh(nd.ni_vp, &fh, p);
3485 vrele(nd.ni_vp);
3486 if (!error) {
3487 nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
3488 NFSBCOPY((caddr_t)&fh,
3489 nfs_rootfh.nfsrvfh_data,
3490 sizeof (fhandle_t));
3491 nfs_rootfhset = 1;
3492 }
3493 }
3494
3495 out:
3496 NFSEXITCODE(error);
3497 return (error);
3498 }
3499
3500 /*
3501 * This function needs to test to see if the system is near its limit
3502 * for memory allocation via malloc() or mget() and return True iff
3503 * either of these resources are near their limit.
3504 * XXX (For now, this is just a stub.)
3505 */
3506 int nfsrv_testmalloclimit = 0;
3507 int
3508 nfsrv_mallocmget_limit(void)
3509 {
3510 static int printmesg = 0;
3511 static int testval = 1;
3512
3513 if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
3514 if ((printmesg++ % 100) == 0)
3515 printf("nfsd: malloc/mget near limit\n");
3516 return (1);
3517 }
3518 return (0);
3519 }
3520
3521 /*
3522 * BSD specific initialization of a mount point.
3523 */
3524 void
3525 nfsd_mntinit(void)
3526 {
3527 static int inited = 0;
3528
3529 if (inited)
3530 return;
3531 inited = 1;
3532 nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
3533 TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
3534 TAILQ_INIT(&nfsv4root_mnt.mnt_lazyvnodelist);
3535 nfsv4root_mnt.mnt_export = NULL;
3536 TAILQ_INIT(&nfsv4root_opt);
3537 TAILQ_INIT(&nfsv4root_newopt);
3538 nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
3539 nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
3540 nfsv4root_mnt.mnt_nvnodelistsize = 0;
3541 nfsv4root_mnt.mnt_lazyvnodelistsize = 0;
3542 }
3543
3544 static void
3545 nfsd_timer(void *arg)
3546 {
3547
3548 nfsrv_servertimer();
3549 callout_reset_sbt(&nfsd_callout, SBT_1S, SBT_1S, nfsd_timer, NULL, 0);
3550 }
3551
3552 /*
3553 * Get a vnode for a file handle, without checking exports, etc.
3554 */
3555 struct vnode *
3556 nfsvno_getvp(fhandle_t *fhp)
3557 {
3558 struct mount *mp;
3559 struct vnode *vp;
3560 int error;
3561
3562 mp = vfs_busyfs(&fhp->fh_fsid);
3563 if (mp == NULL)
3564 return (NULL);
3565 error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp);
3566 vfs_unbusy(mp);
3567 if (error)
3568 return (NULL);
3569 return (vp);
3570 }
3571
3572 /*
3573 * Do a local VOP_ADVLOCK().
3574 */
3575 int
3576 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
3577 u_int64_t end, struct thread *td)
3578 {
3579 int error = 0;
3580 struct flock fl;
3581 u_int64_t tlen;
3582
3583 if (nfsrv_dolocallocks == 0)
3584 goto out;
3585 ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked");
3586
3587 fl.l_whence = SEEK_SET;
3588 fl.l_type = ftype;
3589 fl.l_start = (off_t)first;
3590 if (end == NFS64BITSSET) {
3591 fl.l_len = 0;
3592 } else {
3593 tlen = end - first;
3594 fl.l_len = (off_t)tlen;
3595 }
3596 /*
3597 * For FreeBSD8, the l_pid and l_sysid must be set to the same
3598 * values for all calls, so that all locks will be held by the
3599 * nfsd server. (The nfsd server handles conflicts between the
3600 * various clients.)
3601 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
3602 * bytes, so it can't be put in l_sysid.
3603 */
3604 if (nfsv4_sysid == 0)
3605 nfsv4_sysid = nlm_acquire_next_sysid();
3606 fl.l_pid = (pid_t)0;
3607 fl.l_sysid = (int)nfsv4_sysid;
3608
3609 if (ftype == F_UNLCK)
3610 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
3611 (F_POSIX | F_REMOTE));
3612 else
3613 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
3614 (F_POSIX | F_REMOTE));
3615
3616 out:
3617 NFSEXITCODE(error);
3618 return (error);
3619 }
3620
3621 /*
3622 * Check the nfsv4 root exports.
3623 */
3624 int
3625 nfsvno_v4rootexport(struct nfsrv_descript *nd)
3626 {
3627 struct ucred *credanon;
3628 int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i;
3629 uint64_t exflags;
3630
3631 error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
3632 &credanon, &numsecflavor, secflavors);
3633 if (error) {
3634 error = NFSERR_PROGUNAVAIL;
3635 goto out;
3636 }
3637 if (credanon != NULL)
3638 crfree(credanon);
3639 for (i = 0; i < numsecflavor; i++) {
3640 if (secflavors[i] == AUTH_SYS)
3641 nd->nd_flag |= ND_EXAUTHSYS;
3642 else if (secflavors[i] == RPCSEC_GSS_KRB5)
3643 nd->nd_flag |= ND_EXGSS;
3644 else if (secflavors[i] == RPCSEC_GSS_KRB5I)
3645 nd->nd_flag |= ND_EXGSSINTEGRITY;
3646 else if (secflavors[i] == RPCSEC_GSS_KRB5P)
3647 nd->nd_flag |= ND_EXGSSPRIVACY;
3648 }
3649
3650 /* And set ND_EXxx flags for TLS. */
3651 if ((exflags & MNT_EXTLS) != 0) {
3652 nd->nd_flag |= ND_EXTLS;
3653 if ((exflags & MNT_EXTLSCERT) != 0)
3654 nd->nd_flag |= ND_EXTLSCERT;
3655 if ((exflags & MNT_EXTLSCERTUSER) != 0)
3656 nd->nd_flag |= ND_EXTLSCERTUSER;
3657 }
3658
3659 out:
3660 NFSEXITCODE(error);
3661 return (error);
3662 }
3663
3664 /*
3665 * Nfs server pseudo system call for the nfsd's
3666 */
3667 /*
3668 * MPSAFE
3669 */
3670 static int
3671 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
3672 {
3673 struct file *fp;
3674 struct nfsd_addsock_args sockarg;
3675 struct nfsd_nfsd_args nfsdarg;
3676 struct nfsd_nfsd_oargs onfsdarg;
3677 struct nfsd_pnfsd_args pnfsdarg;
3678 struct vnode *vp, *nvp, *curdvp;
3679 struct pnfsdsfile *pf;
3680 struct nfsdevice *ds, *fds;
3681 cap_rights_t rights;
3682 int buflen, error, ret;
3683 char *buf, *cp, *cp2, *cp3;
3684 char fname[PNFS_FILENAME_LEN + 1];
3685
3686 if (uap->flag & NFSSVC_NFSDADDSOCK) {
3687 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
3688 if (error)
3689 goto out;
3690 /*
3691 * Since we don't know what rights might be required,
3692 * pretend that we need them all. It is better to be too
3693 * careful than too reckless.
3694 */
3695 error = fget(td, sockarg.sock,
3696 cap_rights_init_one(&rights, CAP_SOCK_SERVER), &fp);
3697 if (error != 0)
3698 goto out;
3699 if (fp->f_type != DTYPE_SOCKET) {
3700 fdrop(fp, td);
3701 error = EPERM;
3702 goto out;
3703 }
3704 error = nfsrvd_addsock(fp);
3705 fdrop(fp, td);
3706 } else if (uap->flag & NFSSVC_NFSDNFSD) {
3707 if (uap->argp == NULL) {
3708 error = EINVAL;
3709 goto out;
3710 }
3711 if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) {
3712 error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg));
3713 if (error == 0) {
3714 nfsdarg.principal = onfsdarg.principal;
3715 nfsdarg.minthreads = onfsdarg.minthreads;
3716 nfsdarg.maxthreads = onfsdarg.maxthreads;
3717 nfsdarg.version = 1;
3718 nfsdarg.addr = NULL;
3719 nfsdarg.addrlen = 0;
3720 nfsdarg.dnshost = NULL;
3721 nfsdarg.dnshostlen = 0;
3722 nfsdarg.dspath = NULL;
3723 nfsdarg.dspathlen = 0;
3724 nfsdarg.mdspath = NULL;
3725 nfsdarg.mdspathlen = 0;
3726 nfsdarg.mirrorcnt = 1;
3727 }
3728 } else
3729 error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg));
3730 if (error)
3731 goto out;
3732 if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 &&
3733 nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 &&
3734 nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 &&
3735 nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 &&
3736 nfsdarg.mirrorcnt >= 1 &&
3737 nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS &&
3738 nfsdarg.addr != NULL && nfsdarg.dnshost != NULL &&
3739 nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) {
3740 NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d"
3741 " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen,
3742 nfsdarg.dspathlen, nfsdarg.dnshostlen,
3743 nfsdarg.mdspathlen, nfsdarg.mirrorcnt);
3744 cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK);
3745 error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen);
3746 if (error != 0) {
3747 free(cp, M_TEMP);
3748 goto out;
3749 }
3750 cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */
3751 nfsdarg.addr = cp;
3752 cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK);
3753 error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen);
3754 if (error != 0) {
3755 free(nfsdarg.addr, M_TEMP);
3756 free(cp, M_TEMP);
3757 goto out;
3758 }
3759 cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */
3760 nfsdarg.dnshost = cp;
3761 cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK);
3762 error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen);
3763 if (error != 0) {
3764 free(nfsdarg.addr, M_TEMP);
3765 free(nfsdarg.dnshost, M_TEMP);
3766 free(cp, M_TEMP);
3767 goto out;
3768 }
3769 cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */
3770 nfsdarg.dspath = cp;
3771 cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK);
3772 error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen);
3773 if (error != 0) {
3774 free(nfsdarg.addr, M_TEMP);
3775 free(nfsdarg.dnshost, M_TEMP);
3776 free(nfsdarg.dspath, M_TEMP);
3777 free(cp, M_TEMP);
3778 goto out;
3779 }
3780 cp[nfsdarg.mdspathlen] = '\0'; /* Ensure nul term. */
3781 nfsdarg.mdspath = cp;
3782 } else {
3783 nfsdarg.addr = NULL;
3784 nfsdarg.addrlen = 0;
3785 nfsdarg.dnshost = NULL;
3786 nfsdarg.dnshostlen = 0;
3787 nfsdarg.dspath = NULL;
3788 nfsdarg.dspathlen = 0;
3789 nfsdarg.mdspath = NULL;
3790 nfsdarg.mdspathlen = 0;
3791 nfsdarg.mirrorcnt = 1;
3792 }
3793 nfsd_timer(NULL);
3794 error = nfsrvd_nfsd(td, &nfsdarg);
3795 free(nfsdarg.addr, M_TEMP);
3796 free(nfsdarg.dnshost, M_TEMP);
3797 free(nfsdarg.dspath, M_TEMP);
3798 free(nfsdarg.mdspath, M_TEMP);
3799 } else if (uap->flag & NFSSVC_PNFSDS) {
3800 error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg));
3801 if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER ||
3802 pnfsdarg.op == PNFSDOP_FORCEDELDS)) {
3803 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
3804 error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1,
3805 NULL);
3806 if (error == 0)
3807 error = nfsrv_deldsserver(pnfsdarg.op, cp, td);
3808 free(cp, M_TEMP);
3809 } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) {
3810 cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
3811 buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS;
3812 buf = malloc(buflen, M_TEMP, M_WAITOK);
3813 error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1,
3814 NULL);
3815 NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error);
3816 if (error == 0 && pnfsdarg.dspath != NULL) {
3817 cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
3818 error = copyinstr(pnfsdarg.dspath, cp2,
3819 PATH_MAX + 1, NULL);
3820 NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n",
3821 error);
3822 } else
3823 cp2 = NULL;
3824 if (error == 0 && pnfsdarg.curdspath != NULL) {
3825 cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
3826 error = copyinstr(pnfsdarg.curdspath, cp3,
3827 PATH_MAX + 1, NULL);
3828 NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n",
3829 error);
3830 } else
3831 cp3 = NULL;
3832 curdvp = NULL;
3833 fds = NULL;
3834 if (error == 0)
3835 error = nfsrv_mdscopymr(cp, cp2, cp3, buf,
3836 &buflen, fname, td, &vp, &nvp, &pf, &ds,
3837 &fds);
3838 NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error);
3839 if (error == 0) {
3840 if (pf->dsf_dir >= nfsrv_dsdirsize) {
3841 printf("copymr: dsdir out of range\n");
3842 pf->dsf_dir = 0;
3843 }
3844 NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen);
3845 error = nfsrv_copymr(vp, nvp,
3846 ds->nfsdev_dsdir[pf->dsf_dir], ds, pf,
3847 (struct pnfsdsfile *)buf,
3848 buflen / sizeof(*pf), td->td_ucred, td);
3849 vput(vp);
3850 vput(nvp);
3851 if (fds != NULL && error == 0) {
3852 curdvp = fds->nfsdev_dsdir[pf->dsf_dir];
3853 ret = vn_lock(curdvp, LK_EXCLUSIVE);
3854 if (ret == 0) {
3855 nfsrv_dsremove(curdvp, fname,
3856 td->td_ucred, td);
3857 NFSVOPUNLOCK(curdvp);
3858 }
3859 }
3860 NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error);
3861 }
3862 free(cp, M_TEMP);
3863 free(cp2, M_TEMP);
3864 free(cp3, M_TEMP);
3865 free(buf, M_TEMP);
3866 }
3867 } else {
3868 error = nfssvc_srvcall(td, uap, td->td_ucred);
3869 }
3870
3871 out:
3872 NFSEXITCODE(error);
3873 return (error);
3874 }
3875
3876 static int
3877 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
3878 {
3879 struct nfsex_args export;
3880 struct nfsex_oldargs oexp;
3881 struct file *fp = NULL;
3882 int stablefd, i, len;
3883 struct nfsd_clid adminrevoke;
3884 struct nfsd_dumplist dumplist;
3885 struct nfsd_dumpclients *dumpclients;
3886 struct nfsd_dumplocklist dumplocklist;
3887 struct nfsd_dumplocks *dumplocks;
3888 struct nameidata nd;
3889 vnode_t vp;
3890 int error = EINVAL, igotlock;
3891 struct proc *procp;
3892 gid_t *grps;
3893 static int suspend_nfsd = 0;
3894
3895 if (uap->flag & NFSSVC_PUBLICFH) {
3896 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
3897 sizeof (fhandle_t));
3898 error = copyin(uap->argp,
3899 &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
3900 if (!error)
3901 nfs_pubfhset = 1;
3902 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) ==
3903 (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) {
3904 error = copyin(uap->argp,(caddr_t)&export,
3905 sizeof (struct nfsex_args));
3906 if (!error) {
3907 grps = NULL;
3908 if (export.export.ex_ngroups > NGROUPS_MAX ||
3909 export.export.ex_ngroups < 0)
3910 error = EINVAL;
3911 else if (export.export.ex_ngroups > 0) {
3912 grps = malloc(export.export.ex_ngroups *
3913 sizeof(gid_t), M_TEMP, M_WAITOK);
3914 error = copyin(export.export.ex_groups, grps,
3915 export.export.ex_ngroups * sizeof(gid_t));
3916 export.export.ex_groups = grps;
3917 } else
3918 export.export.ex_groups = NULL;
3919 if (!error)
3920 error = nfsrv_v4rootexport(&export, cred, p);
3921 free(grps, M_TEMP);
3922 }
3923 } else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) ==
3924 NFSSVC_V4ROOTEXPORT) {
3925 error = copyin(uap->argp,(caddr_t)&oexp,
3926 sizeof (struct nfsex_oldargs));
3927 if (!error) {
3928 memset(&export.export, 0, sizeof(export.export));
3929 export.export.ex_flags = (uint64_t)oexp.export.ex_flags;
3930 export.export.ex_root = oexp.export.ex_root;
3931 export.export.ex_uid = oexp.export.ex_anon.cr_uid;
3932 export.export.ex_ngroups =
3933 oexp.export.ex_anon.cr_ngroups;
3934 export.export.ex_groups = NULL;
3935 if (export.export.ex_ngroups > XU_NGROUPS ||
3936 export.export.ex_ngroups < 0)
3937 error = EINVAL;
3938 else if (export.export.ex_ngroups > 0) {
3939 export.export.ex_groups = malloc(
3940 export.export.ex_ngroups * sizeof(gid_t),
3941 M_TEMP, M_WAITOK);
3942 for (i = 0; i < export.export.ex_ngroups; i++)
3943 export.export.ex_groups[i] =
3944 oexp.export.ex_anon.cr_groups[i];
3945 }
3946 export.export.ex_addr = oexp.export.ex_addr;
3947 export.export.ex_addrlen = oexp.export.ex_addrlen;
3948 export.export.ex_mask = oexp.export.ex_mask;
3949 export.export.ex_masklen = oexp.export.ex_masklen;
3950 export.export.ex_indexfile = oexp.export.ex_indexfile;
3951 export.export.ex_numsecflavors =
3952 oexp.export.ex_numsecflavors;
3953 if (export.export.ex_numsecflavors >= MAXSECFLAVORS ||
3954 export.export.ex_numsecflavors < 0)
3955 error = EINVAL;
3956 else {
3957 for (i = 0; i < export.export.ex_numsecflavors;
3958 i++)
3959 export.export.ex_secflavors[i] =
3960 oexp.export.ex_secflavors[i];
3961 }
3962 export.fspec = oexp.fspec;
3963 if (error == 0)
3964 error = nfsrv_v4rootexport(&export, cred, p);
3965 free(export.export.ex_groups, M_TEMP);
3966 }
3967 } else if (uap->flag & NFSSVC_NOPUBLICFH) {
3968 nfs_pubfhset = 0;
3969 error = 0;
3970 } else if (uap->flag & NFSSVC_STABLERESTART) {
3971 error = copyin(uap->argp, (caddr_t)&stablefd,
3972 sizeof (int));
3973 if (!error)
3974 error = fp_getfvp(p, stablefd, &fp, &vp);
3975 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
3976 error = EBADF;
3977 if (!error && newnfs_numnfsd != 0)
3978 error = EPERM;
3979 if (!error) {
3980 nfsrv_stablefirst.nsf_fp = fp;
3981 nfsrv_setupstable(p);
3982 }
3983 } else if (uap->flag & NFSSVC_ADMINREVOKE) {
3984 error = copyin(uap->argp, (caddr_t)&adminrevoke,
3985 sizeof (struct nfsd_clid));
3986 if (!error)
3987 error = nfsrv_adminrevoke(&adminrevoke, p);
3988 } else if (uap->flag & NFSSVC_DUMPCLIENTS) {
3989 error = copyin(uap->argp, (caddr_t)&dumplist,
3990 sizeof (struct nfsd_dumplist));
3991 if (!error && (dumplist.ndl_size < 1 ||
3992 dumplist.ndl_size > NFSRV_MAXDUMPLIST))
3993 error = EPERM;
3994 if (!error) {
3995 len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
3996 dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
3997 nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
3998 error = copyout(dumpclients, dumplist.ndl_list, len);
3999 free(dumpclients, M_TEMP);
4000 }
4001 } else if (uap->flag & NFSSVC_DUMPLOCKS) {
4002 error = copyin(uap->argp, (caddr_t)&dumplocklist,
4003 sizeof (struct nfsd_dumplocklist));
4004 if (!error && (dumplocklist.ndllck_size < 1 ||
4005 dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
4006 error = EPERM;
4007 if (!error)
4008 error = nfsrv_lookupfilename(&nd,
4009 dumplocklist.ndllck_fname, p);
4010 if (!error) {
4011 len = sizeof (struct nfsd_dumplocks) *
4012 dumplocklist.ndllck_size;
4013 dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
4014 nfsrv_dumplocks(nd.ni_vp, dumplocks,
4015 dumplocklist.ndllck_size, p);
4016 vput(nd.ni_vp);
4017 error = copyout(dumplocks, dumplocklist.ndllck_list,
4018 len);
4019 free(dumplocks, M_TEMP);
4020 }
4021 } else if (uap->flag & NFSSVC_BACKUPSTABLE) {
4022 procp = p->td_proc;
4023 PROC_LOCK(procp);
4024 nfsd_master_pid = procp->p_pid;
4025 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1);
4026 nfsd_master_start = procp->p_stats->p_start;
4027 nfsd_master_proc = procp;
4028 PROC_UNLOCK(procp);
4029 } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) {
4030 NFSLOCKV4ROOTMUTEX();
4031 if (suspend_nfsd == 0) {
4032 /* Lock out all nfsd threads */
4033 do {
4034 igotlock = nfsv4_lock(&nfsd_suspend_lock, 1,
4035 NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
4036 } while (igotlock == 0 && suspend_nfsd == 0);
4037 suspend_nfsd = 1;
4038 }
4039 NFSUNLOCKV4ROOTMUTEX();
4040 error = 0;
4041 } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) {
4042 NFSLOCKV4ROOTMUTEX();
4043 if (suspend_nfsd != 0) {
4044 nfsv4_unlock(&nfsd_suspend_lock, 0);
4045 suspend_nfsd = 0;
4046 }
4047 NFSUNLOCKV4ROOTMUTEX();
4048 error = 0;
4049 }
4050
4051 NFSEXITCODE(error);
4052 return (error);
4053 }
4054
4055 /*
4056 * Check exports.
4057 * Returns 0 if ok, 1 otherwise.
4058 */
4059 int
4060 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
4061 {
4062 int i;
4063
4064 if ((NFSVNO_EXTLS(exp) && (nd->nd_flag & ND_TLS) == 0) ||
4065 (NFSVNO_EXTLSCERT(exp) &&
4066 (nd->nd_flag & ND_TLSCERT) == 0) ||
4067 (NFSVNO_EXTLSCERTUSER(exp) &&
4068 (nd->nd_flag & ND_TLSCERTUSER) == 0)) {
4069 if ((nd->nd_flag & ND_NFSV4) != 0)
4070 return (NFSERR_WRONGSEC);
4071 #ifdef notnow
4072 /* There is currently no auth_stat for this. */
4073 else if ((nd->nd_flag & ND_TLS) == 0)
4074 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS);
4075 else
4076 return (NFSERR_AUTHERR | AUTH_NEEDS_TLS_MUTUAL_HOST);
4077 #endif
4078 else
4079 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
4080 }
4081
4082 /*
4083 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to use
4084 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
4085 */
4086 if ((nd->nd_flag & ND_NFSV3) != 0 && nd->nd_procnum == NFSPROC_FSINFO)
4087 return (0);
4088
4089 /*
4090 * This seems odd, but allow the case where the security flavor
4091 * list is empty. This happens when NFSv4 is traversing non-exported
4092 * file systems. Exported file systems should always have a non-empty
4093 * security flavor list.
4094 */
4095 if (exp->nes_numsecflavor == 0)
4096 return (0);
4097
4098 for (i = 0; i < exp->nes_numsecflavor; i++) {
4099 /*
4100 * The tests for privacy and integrity must be first,
4101 * since ND_GSS is set for everything but AUTH_SYS.
4102 */
4103 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
4104 (nd->nd_flag & ND_GSSPRIVACY))
4105 return (0);
4106 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
4107 (nd->nd_flag & ND_GSSINTEGRITY))
4108 return (0);
4109 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
4110 (nd->nd_flag & ND_GSS))
4111 return (0);
4112 if (exp->nes_secflavors[i] == AUTH_SYS &&
4113 (nd->nd_flag & ND_GSS) == 0)
4114 return (0);
4115 }
4116 if ((nd->nd_flag & ND_NFSV4) != 0)
4117 return (NFSERR_WRONGSEC);
4118 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
4119 }
4120
4121 /*
4122 * Calculate a hash value for the fid in a file handle.
4123 */
4124 uint32_t
4125 nfsrv_hashfh(fhandle_t *fhp)
4126 {
4127 uint32_t hashval;
4128
4129 hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
4130 return (hashval);
4131 }
4132
4133 /*
4134 * Calculate a hash value for the sessionid.
4135 */
4136 uint32_t
4137 nfsrv_hashsessionid(uint8_t *sessionid)
4138 {
4139 uint32_t hashval;
4140
4141 hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0);
4142 return (hashval);
4143 }
4144
4145 /*
4146 * Signal the userland master nfsd to backup the stable restart file.
4147 */
4148 void
4149 nfsrv_backupstable(void)
4150 {
4151 struct proc *procp;
4152
4153 if (nfsd_master_proc != NULL) {
4154 procp = pfind(nfsd_master_pid);
4155 /* Try to make sure it is the correct process. */
4156 if (procp == nfsd_master_proc &&
4157 procp->p_stats->p_start.tv_sec ==
4158 nfsd_master_start.tv_sec &&
4159 procp->p_stats->p_start.tv_usec ==
4160 nfsd_master_start.tv_usec &&
4161 strcmp(procp->p_comm, nfsd_master_comm) == 0)
4162 kern_psignal(procp, SIGUSR2);
4163 else
4164 nfsd_master_proc = NULL;
4165
4166 if (procp != NULL)
4167 PROC_UNLOCK(procp);
4168 }
4169 }
4170
4171 /*
4172 * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror.
4173 * The arguments are in a structure, so that they can be passed through
4174 * taskqueue for a kernel process to execute this function.
4175 */
4176 struct nfsrvdscreate {
4177 int done;
4178 int inprog;
4179 struct task tsk;
4180 struct ucred *tcred;
4181 struct vnode *dvp;
4182 NFSPROC_T *p;
4183 struct pnfsdsfile *pf;
4184 int err;
4185 fhandle_t fh;
4186 struct vattr va;
4187 struct vattr createva;
4188 };
4189
4190 int
4191 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap,
4192 fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa,
4193 char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp)
4194 {
4195 struct vnode *nvp;
4196 struct nameidata named;
4197 struct vattr va;
4198 char *bufp;
4199 u_long *hashp;
4200 struct nfsnode *np;
4201 struct nfsmount *nmp;
4202 int error;
4203
4204 NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE,
4205 LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE);
4206 nfsvno_setpathbuf(&named, &bufp, &hashp);
4207 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE;
4208 named.ni_cnd.cn_thread = p;
4209 named.ni_cnd.cn_nameptr = bufp;
4210 if (fnamep != NULL) {
4211 strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1);
4212 named.ni_cnd.cn_namelen = strlen(bufp);
4213 } else
4214 named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp);
4215 NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp);
4216
4217 /* Create the date file in the DS mount. */
4218 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
4219 if (error == 0) {
4220 error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap);
4221 vref(dvp);
4222 VOP_VPUT_PAIR(dvp, error == 0 ? &nvp : NULL, false);
4223 if (error == 0) {
4224 /* Set the ownership of the file. */
4225 error = VOP_SETATTR(nvp, nvap, tcred);
4226 NFSD_DEBUG(4, "nfsrv_dscreate:"
4227 " setattr-uid=%d\n", error);
4228 if (error != 0)
4229 vput(nvp);
4230 }
4231 if (error != 0)
4232 printf("pNFS: pnfscreate failed=%d\n", error);
4233 } else
4234 printf("pNFS: pnfscreate vnlock=%d\n", error);
4235 if (error == 0) {
4236 np = VTONFS(nvp);
4237 nmp = VFSTONFS(nvp->v_mount);
4238 if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs")
4239 != 0 || nmp->nm_nam->sa_len > sizeof(
4240 struct sockaddr_in6) ||
4241 np->n_fhp->nfh_len != NFSX_MYFH) {
4242 printf("Bad DS file: fstype=%s salen=%d"
4243 " fhlen=%d\n",
4244 nvp->v_mount->mnt_vfc->vfc_name,
4245 nmp->nm_nam->sa_len, np->n_fhp->nfh_len);
4246 error = ENOENT;
4247 }
4248
4249 /* Set extattrs for the DS on the MDS file. */
4250 if (error == 0) {
4251 if (dsa != NULL) {
4252 error = VOP_GETATTR(nvp, &va, tcred);
4253 if (error == 0) {
4254 dsa->dsa_filerev = va.va_filerev;
4255 dsa->dsa_size = va.va_size;
4256 dsa->dsa_atime = va.va_atime;
4257 dsa->dsa_mtime = va.va_mtime;
4258 dsa->dsa_bytes = va.va_bytes;
4259 }
4260 }
4261 if (error == 0) {
4262 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh,
4263 NFSX_MYFH);
4264 NFSBCOPY(nmp->nm_nam, &pf->dsf_sin,
4265 nmp->nm_nam->sa_len);
4266 NFSBCOPY(named.ni_cnd.cn_nameptr,
4267 pf->dsf_filename,
4268 sizeof(pf->dsf_filename));
4269 }
4270 } else
4271 printf("pNFS: pnfscreate can't get DS"
4272 " attr=%d\n", error);
4273 if (nvpp != NULL && error == 0)
4274 *nvpp = nvp;
4275 else
4276 vput(nvp);
4277 }
4278 nfsvno_relpathbuf(&named);
4279 return (error);
4280 }
4281
4282 /*
4283 * Start up the thread that will execute nfsrv_dscreate().
4284 */
4285 static void
4286 start_dscreate(void *arg, int pending)
4287 {
4288 struct nfsrvdscreate *dsc;
4289
4290 dsc = (struct nfsrvdscreate *)arg;
4291 dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh,
4292 dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL);
4293 dsc->done = 1;
4294 NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err);
4295 }
4296
4297 /*
4298 * Create a pNFS data file on the Data Server(s).
4299 */
4300 static void
4301 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
4302 NFSPROC_T *p)
4303 {
4304 struct nfsrvdscreate *dsc, *tdsc = NULL;
4305 struct nfsdevice *ds, *tds, *fds;
4306 struct mount *mp;
4307 struct pnfsdsfile *pf, *tpf;
4308 struct pnfsdsattr dsattr;
4309 struct vattr va;
4310 struct vnode *dvp[NFSDEV_MAXMIRRORS];
4311 struct nfsmount *nmp;
4312 fhandle_t fh;
4313 uid_t vauid;
4314 gid_t vagid;
4315 u_short vamode;
4316 struct ucred *tcred;
4317 int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret;
4318 int failpos, timo;
4319
4320 /* Get a DS server directory in a round-robin order. */
4321 mirrorcnt = 1;
4322 mp = vp->v_mount;
4323 ds = fds = NULL;
4324 NFSDDSLOCK();
4325 /*
4326 * Search for the first entry that handles this MDS fs, but use the
4327 * first entry for all MDS fs's otherwise.
4328 */
4329 TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) {
4330 if (tds->nfsdev_nmp != NULL) {
4331 if (tds->nfsdev_mdsisset == 0 && ds == NULL)
4332 ds = tds;
4333 else if (tds->nfsdev_mdsisset != 0 && fsidcmp(
4334 &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) {
4335 ds = fds = tds;
4336 break;
4337 }
4338 }
4339 }
4340 if (ds == NULL) {
4341 NFSDDSUNLOCK();
4342 NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n");
4343 return;
4344 }
4345 i = dsdir[0] = ds->nfsdev_nextdir;
4346 ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize;
4347 dvp[0] = ds->nfsdev_dsdir[i];
4348 tds = TAILQ_NEXT(ds, nfsdev_list);
4349 if (nfsrv_maxpnfsmirror > 1 && tds != NULL) {
4350 TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) {
4351 if (tds->nfsdev_nmp != NULL &&
4352 ((tds->nfsdev_mdsisset == 0 && fds == NULL) ||
4353 (tds->nfsdev_mdsisset != 0 && fds != NULL &&
4354 fsidcmp(&mp->mnt_stat.f_fsid,
4355 &tds->nfsdev_mdsfsid) == 0))) {
4356 dsdir[mirrorcnt] = i;
4357 dvp[mirrorcnt] = tds->nfsdev_dsdir[i];
4358 mirrorcnt++;
4359 if (mirrorcnt >= nfsrv_maxpnfsmirror)
4360 break;
4361 }
4362 }
4363 }
4364 /* Put at end of list to implement round-robin usage. */
4365 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
4366 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
4367 NFSDDSUNLOCK();
4368 dsc = NULL;
4369 if (mirrorcnt > 1)
4370 tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP,
4371 M_WAITOK | M_ZERO);
4372 tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK |
4373 M_ZERO);
4374
4375 error = nfsvno_getfh(vp, &fh, p);
4376 if (error == 0)
4377 error = VOP_GETATTR(vp, &va, cred);
4378 if (error == 0) {
4379 /* Set the attributes for "vp" to Setattr the DS vp. */
4380 vauid = va.va_uid;
4381 vagid = va.va_gid;
4382 vamode = va.va_mode;
4383 VATTR_NULL(&va);
4384 va.va_uid = vauid;
4385 va.va_gid = vagid;
4386 va.va_mode = vamode;
4387 va.va_size = 0;
4388 } else
4389 printf("pNFS: pnfscreate getfh+attr=%d\n", error);
4390
4391 NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid,
4392 cred->cr_gid);
4393 /* Make data file name based on FH. */
4394 tcred = newnfs_getcred();
4395
4396 /*
4397 * Create the file on each DS mirror, using kernel process(es) for the
4398 * additional mirrors.
4399 */
4400 failpos = -1;
4401 for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) {
4402 tpf->dsf_dir = dsdir[i];
4403 tdsc->tcred = tcred;
4404 tdsc->p = p;
4405 tdsc->pf = tpf;
4406 tdsc->createva = *vap;
4407 NFSBCOPY(&fh, &tdsc->fh, sizeof(fh));
4408 tdsc->va = va;
4409 tdsc->dvp = dvp[i];
4410 tdsc->done = 0;
4411 tdsc->inprog = 0;
4412 tdsc->err = 0;
4413 ret = EIO;
4414 if (nfs_pnfsiothreads != 0) {
4415 ret = nfs_pnfsio(start_dscreate, tdsc);
4416 NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret);
4417 }
4418 if (ret != 0) {
4419 ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL,
4420 NULL, tcred, p, NULL);
4421 if (ret != 0) {
4422 KASSERT(error == 0, ("nfsrv_dscreate err=%d",
4423 error));
4424 if (failpos == -1 && nfsds_failerr(ret))
4425 failpos = i;
4426 else
4427 error = ret;
4428 }
4429 }
4430 }
4431 if (error == 0) {
4432 tpf->dsf_dir = dsdir[mirrorcnt - 1];
4433 error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf,
4434 &dsattr, NULL, tcred, p, NULL);
4435 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) {
4436 failpos = mirrorcnt - 1;
4437 error = 0;
4438 }
4439 }
4440 timo = hz / 50; /* Wait for 20msec. */
4441 if (timo < 1)
4442 timo = 1;
4443 /* Wait for kernel task(s) to complete. */
4444 for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) {
4445 while (tdsc->inprog != 0 && tdsc->done == 0)
4446 tsleep(&tdsc->tsk, PVFS, "srvdcr", timo);
4447 if (tdsc->err != 0) {
4448 if (failpos == -1 && nfsds_failerr(tdsc->err))
4449 failpos = i;
4450 else if (error == 0)
4451 error = tdsc->err;
4452 }
4453 }
4454
4455 /*
4456 * If failpos has been set, that mirror has failed, so it needs
4457 * to be disabled.
4458 */
4459 if (failpos >= 0) {
4460 nmp = VFSTONFS(dvp[failpos]->v_mount);
4461 NFSLOCKMNT(nmp);
4462 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
4463 NFSMNTP_CANCELRPCS)) == 0) {
4464 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
4465 NFSUNLOCKMNT(nmp);
4466 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
4467 NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos,
4468 ds);
4469 if (ds != NULL)
4470 nfsrv_killrpcs(nmp);
4471 NFSLOCKMNT(nmp);
4472 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
4473 wakeup(nmp);
4474 }
4475 NFSUNLOCKMNT(nmp);
4476 }
4477
4478 NFSFREECRED(tcred);
4479 if (error == 0) {
4480 ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp");
4481
4482 NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n",
4483 mirrorcnt, nfsrv_maxpnfsmirror);
4484 /*
4485 * For all mirrors that couldn't be created, fill in the
4486 * *pf structure, but with an IP address == 0.0.0.0.
4487 */
4488 tpf = pf + mirrorcnt;
4489 for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) {
4490 *tpf = *pf;
4491 tpf->dsf_sin.sin_family = AF_INET;
4492 tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in);
4493 tpf->dsf_sin.sin_addr.s_addr = 0;
4494 tpf->dsf_sin.sin_port = 0;
4495 }
4496
4497 error = vn_extattr_set(vp, IO_NODELOCKED,
4498 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
4499 sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p);
4500 if (error == 0)
4501 error = vn_extattr_set(vp, IO_NODELOCKED,
4502 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr",
4503 sizeof(dsattr), (char *)&dsattr, p);
4504 if (error != 0)
4505 printf("pNFS: pnfscreate setextattr=%d\n",
4506 error);
4507 } else
4508 printf("pNFS: pnfscreate=%d\n", error);
4509 free(pf, M_TEMP);
4510 free(dsc, M_TEMP);
4511 }
4512
4513 /*
4514 * Get the information needed to remove the pNFS Data Server file from the
4515 * Metadata file. Upon success, ddvp is set non-NULL to the locked
4516 * DS directory vnode. The caller must unlock *ddvp when done with it.
4517 */
4518 static void
4519 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp,
4520 int *mirrorcntp, char *fname, fhandle_t *fhp)
4521 {
4522 struct vattr va;
4523 struct ucred *tcred;
4524 char *buf;
4525 int buflen, error;
4526
4527 dvpp[0] = NULL;
4528 /* If not an exported regular file or not a pNFS server, just return. */
4529 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
4530 nfsrv_devidcnt == 0)
4531 return;
4532
4533 /* Check to see if this is the last hard link. */
4534 tcred = newnfs_getcred();
4535 error = VOP_GETATTR(vp, &va, tcred);
4536 NFSFREECRED(tcred);
4537 if (error != 0) {
4538 printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error);
4539 return;
4540 }
4541 if (va.va_nlink > 1)
4542 return;
4543
4544 error = nfsvno_getfh(vp, fhp, p);
4545 if (error != 0) {
4546 printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error);
4547 return;
4548 }
4549
4550 buflen = 1024;
4551 buf = malloc(buflen, M_TEMP, M_WAITOK);
4552 /* Get the directory vnode for the DS mount and the file handle. */
4553 error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp,
4554 NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL);
4555 free(buf, M_TEMP);
4556 if (error != 0)
4557 printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error);
4558 }
4559
4560 /*
4561 * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror.
4562 * The arguments are in a structure, so that they can be passed through
4563 * taskqueue for a kernel process to execute this function.
4564 */
4565 struct nfsrvdsremove {
4566 int done;
4567 int inprog;
4568 struct task tsk;
4569 struct ucred *tcred;
4570 struct vnode *dvp;
4571 NFSPROC_T *p;
4572 int err;
4573 char fname[PNFS_FILENAME_LEN + 1];
4574 };
4575
4576 static int
4577 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred,
4578 NFSPROC_T *p)
4579 {
4580 struct nameidata named;
4581 struct vnode *nvp;
4582 char *bufp;
4583 u_long *hashp;
4584 int error;
4585
4586 error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
4587 if (error != 0)
4588 return (error);
4589 named.ni_cnd.cn_nameiop = DELETE;
4590 named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
4591 named.ni_cnd.cn_cred = tcred;
4592 named.ni_cnd.cn_thread = p;
4593 named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME;
4594 nfsvno_setpathbuf(&named, &bufp, &hashp);
4595 named.ni_cnd.cn_nameptr = bufp;
4596 named.ni_cnd.cn_namelen = strlen(fname);
4597 strlcpy(bufp, fname, NAME_MAX);
4598 NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp);
4599 error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd);
4600 NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error);
4601 if (error == 0) {
4602 error = VOP_REMOVE(dvp, nvp, &named.ni_cnd);
4603 vput(nvp);
4604 }
4605 NFSVOPUNLOCK(dvp);
4606 nfsvno_relpathbuf(&named);
4607 if (error != 0)
4608 printf("pNFS: nfsrv_pnfsremove failed=%d\n", error);
4609 return (error);
4610 }
4611
4612 /*
4613 * Start up the thread that will execute nfsrv_dsremove().
4614 */
4615 static void
4616 start_dsremove(void *arg, int pending)
4617 {
4618 struct nfsrvdsremove *dsrm;
4619
4620 dsrm = (struct nfsrvdsremove *)arg;
4621 dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred,
4622 dsrm->p);
4623 dsrm->done = 1;
4624 NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err);
4625 }
4626
4627 /*
4628 * Remove a pNFS data file from a Data Server.
4629 * nfsrv_pnfsremovesetup() must have been called before the MDS file was
4630 * removed to set up the dvp and fill in the FH.
4631 */
4632 static void
4633 nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp,
4634 NFSPROC_T *p)
4635 {
4636 struct ucred *tcred;
4637 struct nfsrvdsremove *dsrm, *tdsrm;
4638 struct nfsdevice *ds;
4639 struct nfsmount *nmp;
4640 int failpos, i, ret, timo;
4641
4642 tcred = newnfs_getcred();
4643 dsrm = NULL;
4644 if (mirrorcnt > 1)
4645 dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK);
4646 /*
4647 * Remove the file on each DS mirror, using kernel process(es) for the
4648 * additional mirrors.
4649 */
4650 failpos = -1;
4651 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) {
4652 tdsrm->tcred = tcred;
4653 tdsrm->p = p;
4654 tdsrm->dvp = dvp[i];
4655 strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1);
4656 tdsrm->inprog = 0;
4657 tdsrm->done = 0;
4658 tdsrm->err = 0;
4659 ret = EIO;
4660 if (nfs_pnfsiothreads != 0) {
4661 ret = nfs_pnfsio(start_dsremove, tdsrm);
4662 NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret);
4663 }
4664 if (ret != 0) {
4665 ret = nfsrv_dsremove(dvp[i], fname, tcred, p);
4666 if (failpos == -1 && nfsds_failerr(ret))
4667 failpos = i;
4668 }
4669 }
4670 ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p);
4671 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret))
4672 failpos = mirrorcnt - 1;
4673 timo = hz / 50; /* Wait for 20msec. */
4674 if (timo < 1)
4675 timo = 1;
4676 /* Wait for kernel task(s) to complete. */
4677 for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) {
4678 while (tdsrm->inprog != 0 && tdsrm->done == 0)
4679 tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo);
4680 if (failpos == -1 && nfsds_failerr(tdsrm->err))
4681 failpos = i;
4682 }
4683
4684 /*
4685 * If failpos has been set, that mirror has failed, so it needs
4686 * to be disabled.
4687 */
4688 if (failpos >= 0) {
4689 nmp = VFSTONFS(dvp[failpos]->v_mount);
4690 NFSLOCKMNT(nmp);
4691 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
4692 NFSMNTP_CANCELRPCS)) == 0) {
4693 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
4694 NFSUNLOCKMNT(nmp);
4695 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
4696 NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos,
4697 ds);
4698 if (ds != NULL)
4699 nfsrv_killrpcs(nmp);
4700 NFSLOCKMNT(nmp);
4701 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
4702 wakeup(nmp);
4703 }
4704 NFSUNLOCKMNT(nmp);
4705 }
4706
4707 /* Get rid all layouts for the file. */
4708 nfsrv_freefilelayouts(fhp);
4709
4710 NFSFREECRED(tcred);
4711 free(dsrm, M_TEMP);
4712 }
4713
4714 /*
4715 * Generate a file name based on the file handle and put it in *bufp.
4716 * Return the number of bytes generated.
4717 */
4718 static int
4719 nfsrv_putfhname(fhandle_t *fhp, char *bufp)
4720 {
4721 int i;
4722 uint8_t *cp;
4723 const uint8_t *hexdigits = "0123456789abcdef";
4724
4725 cp = (uint8_t *)fhp;
4726 for (i = 0; i < sizeof(*fhp); i++) {
4727 bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf];
4728 bufp[2 * i + 1] = hexdigits[*cp++ & 0xf];
4729 }
4730 bufp[2 * i] = '\0';
4731 return (2 * i);
4732 }
4733
4734 /*
4735 * Update the Metadata file's attributes from the DS file when a Read/Write
4736 * layout is returned.
4737 * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN
4738 * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file.
4739 */
4740 int
4741 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
4742 {
4743 struct ucred *tcred;
4744 int error;
4745
4746 /* Do this as root so that it won't fail with EACCES. */
4747 tcred = newnfs_getcred();
4748 error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN,
4749 NULL, NULL, NULL, nap, NULL, NULL, 0, NULL);
4750 NFSFREECRED(tcred);
4751 return (error);
4752 }
4753
4754 /*
4755 * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file.
4756 */
4757 static int
4758 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred,
4759 NFSPROC_T *p)
4760 {
4761 int error;
4762
4763 error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL,
4764 NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL);
4765 return (error);
4766 }
4767
4768 static int
4769 nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
4770 struct thread *p, int ioproc, struct mbuf **mpp, char *cp,
4771 struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp,
4772 off_t *offp, int content, bool *eofp)
4773 {
4774 struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp;
4775 fhandle_t fh[NFSDEV_MAXMIRRORS];
4776 struct vnode *dvp[NFSDEV_MAXMIRRORS];
4777 struct nfsdevice *ds;
4778 struct pnfsdsattr dsattr;
4779 struct opnfsdsattr odsattr;
4780 char *buf;
4781 int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt;
4782
4783 NFSD_DEBUG(4, "in nfsrv_proxyds\n");
4784 /*
4785 * If not a regular file, not exported or not a pNFS server,
4786 * just return ENOENT.
4787 */
4788 if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
4789 nfsrv_devidcnt == 0)
4790 return (ENOENT);
4791
4792 buflen = 1024;
4793 buf = malloc(buflen, M_TEMP, M_WAITOK);
4794 error = 0;
4795
4796 /*
4797 * For Getattr, get the Change attribute (va_filerev) and size (va_size)
4798 * from the MetaData file's extended attribute.
4799 */
4800 if (ioproc == NFSPROC_GETATTR) {
4801 error = vn_extattr_get(vp, IO_NODELOCKED,
4802 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf,
4803 p);
4804 if (error == 0) {
4805 if (buflen == sizeof(odsattr)) {
4806 NFSBCOPY(buf, &odsattr, buflen);
4807 nap->na_filerev = odsattr.dsa_filerev;
4808 nap->na_size = odsattr.dsa_size;
4809 nap->na_atime = odsattr.dsa_atime;
4810 nap->na_mtime = odsattr.dsa_mtime;
4811 /*
4812 * Fake na_bytes by rounding up na_size.
4813 * Since we don't know the block size, just
4814 * use BLKDEV_IOSIZE.
4815 */
4816 nap->na_bytes = (odsattr.dsa_size +
4817 BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1);
4818 } else if (buflen == sizeof(dsattr)) {
4819 NFSBCOPY(buf, &dsattr, buflen);
4820 nap->na_filerev = dsattr.dsa_filerev;
4821 nap->na_size = dsattr.dsa_size;
4822 nap->na_atime = dsattr.dsa_atime;
4823 nap->na_mtime = dsattr.dsa_mtime;
4824 nap->na_bytes = dsattr.dsa_bytes;
4825 } else
4826 error = ENXIO;
4827 }
4828 if (error == 0) {
4829 /*
4830 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr()
4831 * returns 0, just return now. nfsrv_checkdsattr()
4832 * returns 0 if there is no Read/Write layout
4833 * plus either an Open/Write_access or Write
4834 * delegation issued to a client for the file.
4835 */
4836 if (nfsrv_pnfsgetdsattr == 0 ||
4837 nfsrv_checkdsattr(vp, p) == 0) {
4838 free(buf, M_TEMP);
4839 return (error);
4840 }
4841 }
4842
4843 /*
4844 * Clear ENOATTR so the code below will attempt to do a
4845 * nfsrv_getattrdsrpc() to get the attributes and (re)create
4846 * the extended attribute.
4847 */
4848 if (error == ENOATTR)
4849 error = 0;
4850 }
4851
4852 origmircnt = -1;
4853 trycnt = 0;
4854 tryagain:
4855 if (error == 0) {
4856 buflen = 1024;
4857 if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) ==
4858 LK_EXCLUSIVE)
4859 printf("nfsrv_proxyds: Readds vp exclusively locked\n");
4860 error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen,
4861 &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL,
4862 NULL, NULL);
4863 if (error == 0) {
4864 for (i = 0; i < mirrorcnt; i++)
4865 nmp[i] = VFSTONFS(dvp[i]->v_mount);
4866 } else
4867 printf("pNFS: proxy getextattr sockaddr=%d\n", error);
4868 } else
4869 printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error);
4870 if (error == 0) {
4871 failpos = -1;
4872 if (origmircnt == -1)
4873 origmircnt = mirrorcnt;
4874 /*
4875 * If failpos is set to a mirror#, then that mirror has
4876 * failed and will be disabled. For Read, Getattr and Seek, the
4877 * function only tries one mirror, so if that mirror has
4878 * failed, it will need to be retried. As such, increment
4879 * tryitagain for these cases.
4880 * For Write, Setattr and Setacl, the function tries all
4881 * mirrors and will not return an error for the case where
4882 * one mirror has failed. For these cases, the functioning
4883 * mirror(s) will have been modified, so a retry isn't
4884 * necessary. These functions will set failpos for the
4885 * failed mirror#.
4886 */
4887 if (ioproc == NFSPROC_READDS) {
4888 error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0],
4889 mpp, mpp2);
4890 if (nfsds_failerr(error) && mirrorcnt > 1) {
4891 /*
4892 * Setting failpos will cause the mirror
4893 * to be disabled and then a retry of this
4894 * read is required.
4895 */
4896 failpos = 0;
4897 error = 0;
4898 trycnt++;
4899 }
4900 } else if (ioproc == NFSPROC_WRITEDS)
4901 error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp,
4902 &nmp[0], mirrorcnt, mpp, cp, &failpos);
4903 else if (ioproc == NFSPROC_SETATTR)
4904 error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0],
4905 mirrorcnt, nap, &failpos);
4906 else if (ioproc == NFSPROC_SETACL)
4907 error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0],
4908 mirrorcnt, aclp, &failpos);
4909 else if (ioproc == NFSPROC_SEEKDS) {
4910 error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred,
4911 p, nmp[0]);
4912 if (nfsds_failerr(error) && mirrorcnt > 1) {
4913 /*
4914 * Setting failpos will cause the mirror
4915 * to be disabled and then a retry of this
4916 * read is required.
4917 */
4918 failpos = 0;
4919 error = 0;
4920 trycnt++;
4921 }
4922 } else if (ioproc == NFSPROC_ALLOCATE)
4923 error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp,
4924 &nmp[0], mirrorcnt, &failpos);
4925 else {
4926 error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p,
4927 vp, nmp[mirrorcnt - 1], nap);
4928 if (nfsds_failerr(error) && mirrorcnt > 1) {
4929 /*
4930 * Setting failpos will cause the mirror
4931 * to be disabled and then a retry of this
4932 * getattr is required.
4933 */
4934 failpos = mirrorcnt - 1;
4935 error = 0;
4936 trycnt++;
4937 }
4938 }
4939 ds = NULL;
4940 if (failpos >= 0) {
4941 failnmp = nmp[failpos];
4942 NFSLOCKMNT(failnmp);
4943 if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM |
4944 NFSMNTP_CANCELRPCS)) == 0) {
4945 failnmp->nm_privflag |= NFSMNTP_CANCELRPCS;
4946 NFSUNLOCKMNT(failnmp);
4947 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER,
4948 failnmp, p);
4949 NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n",
4950 failpos, ds);
4951 if (ds != NULL)
4952 nfsrv_killrpcs(failnmp);
4953 NFSLOCKMNT(failnmp);
4954 failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
4955 wakeup(failnmp);
4956 }
4957 NFSUNLOCKMNT(failnmp);
4958 }
4959 for (i = 0; i < mirrorcnt; i++)
4960 NFSVOPUNLOCK(dvp[i]);
4961 NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error,
4962 trycnt);
4963 /* Try the Read/Getattr again if a mirror was deleted. */
4964 if (ds != NULL && trycnt > 0 && trycnt < origmircnt)
4965 goto tryagain;
4966 } else {
4967 /* Return ENOENT for any Extended Attribute error. */
4968 error = ENOENT;
4969 }
4970 free(buf, M_TEMP);
4971 NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error);
4972 return (error);
4973 }
4974
4975 /*
4976 * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended
4977 * attribute.
4978 * newnmpp - If it points to a non-NULL nmp, that is the destination and needs
4979 * to be checked. If it points to a NULL nmp, then it returns
4980 * a suitable destination.
4981 * curnmp - If non-NULL, it is the source mount for the copy.
4982 */
4983 int
4984 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp,
4985 int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp,
4986 char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp,
4987 struct nfsmount *curnmp, int *ippos, int *dsdirp)
4988 {
4989 struct vnode *dvp, *nvp = NULL, **tdvpp;
4990 struct mount *mp;
4991 struct nfsmount *nmp, *newnmp;
4992 struct sockaddr *sad;
4993 struct sockaddr_in *sin;
4994 struct nfsdevice *ds, *tds, *fndds;
4995 struct pnfsdsfile *pf;
4996 uint32_t dsdir;
4997 int error, fhiszero, fnd, gotone, i, mirrorcnt;
4998
4999 ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp");
5000 *mirrorcntp = 1;
5001 tdvpp = dvpp;
5002 if (nvpp != NULL)
5003 *nvpp = NULL;
5004 if (dvpp != NULL)
5005 *dvpp = NULL;
5006 if (ippos != NULL)
5007 *ippos = -1;
5008 if (newnmpp != NULL)
5009 newnmp = *newnmpp;
5010 else
5011 newnmp = NULL;
5012 mp = vp->v_mount;
5013 error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
5014 "pnfsd.dsfile", buflenp, buf, p);
5015 mirrorcnt = *buflenp / sizeof(*pf);
5016 if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS ||
5017 *buflenp != sizeof(*pf) * mirrorcnt))
5018 error = ENOATTR;
5019
5020 pf = (struct pnfsdsfile *)buf;
5021 /* If curnmp != NULL, check for a match in the mirror list. */
5022 if (curnmp != NULL && error == 0) {
5023 fnd = 0;
5024 for (i = 0; i < mirrorcnt; i++, pf++) {
5025 sad = (struct sockaddr *)&pf->dsf_sin;
5026 if (nfsaddr2_match(sad, curnmp->nm_nam)) {
5027 if (ippos != NULL)
5028 *ippos = i;
5029 fnd = 1;
5030 break;
5031 }
5032 }
5033 if (fnd == 0)
5034 error = ENXIO;
5035 }
5036
5037 gotone = 0;
5038 pf = (struct pnfsdsfile *)buf;
5039 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt,
5040 error);
5041 for (i = 0; i < mirrorcnt && error == 0; i++, pf++) {
5042 fhiszero = 0;
5043 sad = (struct sockaddr *)&pf->dsf_sin;
5044 sin = &pf->dsf_sin;
5045 dsdir = pf->dsf_dir;
5046 if (dsdir >= nfsrv_dsdirsize) {
5047 printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir);
5048 error = ENOATTR;
5049 } else if (nvpp != NULL && newnmp != NULL &&
5050 nfsaddr2_match(sad, newnmp->nm_nam))
5051 error = EEXIST;
5052 if (error == 0) {
5053 if (ippos != NULL && curnmp == NULL &&
5054 sad->sa_family == AF_INET &&
5055 sin->sin_addr.s_addr == 0)
5056 *ippos = i;
5057 if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0)
5058 fhiszero = 1;
5059 /* Use the socket address to find the mount point. */
5060 fndds = NULL;
5061 NFSDDSLOCK();
5062 /* Find a match for the IP address. */
5063 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
5064 if (ds->nfsdev_nmp != NULL) {
5065 dvp = ds->nfsdev_dvp;
5066 nmp = VFSTONFS(dvp->v_mount);
5067 if (nmp != ds->nfsdev_nmp)
5068 printf("different2 nmp %p %p\n",
5069 nmp, ds->nfsdev_nmp);
5070 if (nfsaddr2_match(sad, nmp->nm_nam)) {
5071 fndds = ds;
5072 break;
5073 }
5074 }
5075 }
5076 if (fndds != NULL && newnmpp != NULL &&
5077 newnmp == NULL) {
5078 /* Search for a place to make a mirror copy. */
5079 TAILQ_FOREACH(tds, &nfsrv_devidhead,
5080 nfsdev_list) {
5081 if (tds->nfsdev_nmp != NULL &&
5082 fndds != tds &&
5083 ((tds->nfsdev_mdsisset == 0 &&
5084 fndds->nfsdev_mdsisset == 0) ||
5085 (tds->nfsdev_mdsisset != 0 &&
5086 fndds->nfsdev_mdsisset != 0 &&
5087 fsidcmp(&tds->nfsdev_mdsfsid,
5088 &mp->mnt_stat.f_fsid) == 0))) {
5089 *newnmpp = tds->nfsdev_nmp;
5090 break;
5091 }
5092 }
5093 if (tds != NULL) {
5094 /*
5095 * Move this entry to the end of the
5096 * list, so it won't be selected as
5097 * easily the next time.
5098 */
5099 TAILQ_REMOVE(&nfsrv_devidhead, tds,
5100 nfsdev_list);
5101 TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds,
5102 nfsdev_list);
5103 }
5104 }
5105 NFSDDSUNLOCK();
5106 if (fndds != NULL) {
5107 dvp = fndds->nfsdev_dsdir[dsdir];
5108 if (lktype != 0 || fhiszero != 0 ||
5109 (nvpp != NULL && *nvpp == NULL)) {
5110 if (fhiszero != 0)
5111 error = vn_lock(dvp,
5112 LK_EXCLUSIVE);
5113 else if (lktype != 0)
5114 error = vn_lock(dvp, lktype);
5115 else
5116 error = vn_lock(dvp, LK_SHARED);
5117 /*
5118 * If the file handle is all 0's, try to
5119 * do a Lookup against the DS to acquire
5120 * it.
5121 * If dvpp == NULL or the Lookup fails,
5122 * unlock dvp after the call.
5123 */
5124 if (error == 0 && (fhiszero != 0 ||
5125 (nvpp != NULL && *nvpp == NULL))) {
5126 error = nfsrv_pnfslookupds(vp,
5127 dvp, pf, &nvp, p);
5128 if (error == 0) {
5129 if (fhiszero != 0)
5130 nfsrv_pnfssetfh(
5131 vp, pf,
5132 devid,
5133 fnamep,
5134 nvp, p);
5135 if (nvpp != NULL &&
5136 *nvpp == NULL) {
5137 *nvpp = nvp;
5138 *dsdirp = dsdir;
5139 } else
5140 vput(nvp);
5141 }
5142 if (error != 0 || lktype == 0)
5143 NFSVOPUNLOCK(dvp);
5144 }
5145 }
5146 if (error == 0) {
5147 gotone++;
5148 NFSD_DEBUG(4, "gotone=%d\n", gotone);
5149 if (devid != NULL) {
5150 NFSBCOPY(fndds->nfsdev_deviceid,
5151 devid, NFSX_V4DEVICEID);
5152 devid += NFSX_V4DEVICEID;
5153 }
5154 if (dvpp != NULL)
5155 *tdvpp++ = dvp;
5156 if (fhp != NULL)
5157 NFSBCOPY(&pf->dsf_fh, fhp++,
5158 NFSX_MYFH);
5159 if (fnamep != NULL && gotone == 1)
5160 strlcpy(fnamep,
5161 pf->dsf_filename,
5162 sizeof(pf->dsf_filename));
5163 } else
5164 NFSD_DEBUG(4, "nfsrv_dsgetsockmnt "
5165 "err=%d\n", error);
5166 }
5167 }
5168 }
5169 if (error == 0 && gotone == 0)
5170 error = ENOENT;
5171
5172 NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone,
5173 error);
5174 if (error == 0)
5175 *mirrorcntp = gotone;
5176 else {
5177 if (gotone > 0 && dvpp != NULL) {
5178 /*
5179 * If the error didn't occur on the first one and
5180 * dvpp != NULL, the one(s) prior to the failure will
5181 * have locked dvp's that need to be unlocked.
5182 */
5183 for (i = 0; i < gotone; i++) {
5184 NFSVOPUNLOCK(*dvpp);
5185 *dvpp++ = NULL;
5186 }
5187 }
5188 /*
5189 * If it found the vnode to be copied from before a failure,
5190 * it needs to be vput()'d.
5191 */
5192 if (nvpp != NULL && *nvpp != NULL) {
5193 vput(*nvpp);
5194 *nvpp = NULL;
5195 }
5196 }
5197 return (error);
5198 }
5199
5200 /*
5201 * Set the extended attribute for the Change attribute.
5202 */
5203 static int
5204 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
5205 {
5206 struct pnfsdsattr dsattr;
5207 int error;
5208
5209 ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp");
5210 dsattr.dsa_filerev = nap->na_filerev;
5211 dsattr.dsa_size = nap->na_size;
5212 dsattr.dsa_atime = nap->na_atime;
5213 dsattr.dsa_mtime = nap->na_mtime;
5214 dsattr.dsa_bytes = nap->na_bytes;
5215 error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
5216 "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p);
5217 if (error != 0)
5218 printf("pNFS: setextattr=%d\n", error);
5219 return (error);
5220 }
5221
5222 static int
5223 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
5224 NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp)
5225 {
5226 uint32_t *tl;
5227 struct nfsrv_descript *nd;
5228 nfsv4stateid_t st;
5229 struct mbuf *m, *m2;
5230 int error = 0, retlen, tlen, trimlen;
5231
5232 NFSD_DEBUG(4, "in nfsrv_readdsrpc\n");
5233 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
5234 *mpp = NULL;
5235 /*
5236 * Use a stateid where other is an alternating 01010 pattern and
5237 * seqid is 0xffffffff. This value is not defined as special by
5238 * the RFC and is used by the FreeBSD NFS server to indicate an
5239 * MDS->DS proxy operation.
5240 */
5241 st.other[0] = 0x55555555;
5242 st.other[1] = 0x55555555;
5243 st.other[2] = 0x55555555;
5244 st.seqid = 0xffffffff;
5245 nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp),
5246 NULL, NULL, 0, 0, cred);
5247 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
5248 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
5249 txdr_hyper(off, tl);
5250 *(tl + 2) = txdr_unsigned(len);
5251 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5252 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5253 if (error != 0) {
5254 free(nd, M_TEMP);
5255 return (error);
5256 }
5257 if (nd->nd_repstat == 0) {
5258 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
5259 NFSM_STRSIZ(retlen, len);
5260 if (retlen > 0) {
5261 /* Trim off the pre-data XDR from the mbuf chain. */
5262 m = nd->nd_mrep;
5263 while (m != NULL && m != nd->nd_md) {
5264 if (m->m_next == nd->nd_md) {
5265 m->m_next = NULL;
5266 m_freem(nd->nd_mrep);
5267 nd->nd_mrep = m = nd->nd_md;
5268 } else
5269 m = m->m_next;
5270 }
5271 if (m == NULL) {
5272 printf("nfsrv_readdsrpc: busted mbuf list\n");
5273 error = ENOENT;
5274 goto nfsmout;
5275 }
5276
5277 /*
5278 * Now, adjust first mbuf so that any XDR before the
5279 * read data is skipped over.
5280 */
5281 trimlen = nd->nd_dpos - mtod(m, char *);
5282 if (trimlen > 0) {
5283 m->m_len -= trimlen;
5284 NFSM_DATAP(m, trimlen);
5285 }
5286
5287 /*
5288 * Truncate the mbuf chain at retlen bytes of data,
5289 * plus XDR padding that brings the length up to a
5290 * multiple of 4.
5291 */
5292 tlen = NFSM_RNDUP(retlen);
5293 do {
5294 if (m->m_len >= tlen) {
5295 m->m_len = tlen;
5296 tlen = 0;
5297 m2 = m->m_next;
5298 m->m_next = NULL;
5299 m_freem(m2);
5300 break;
5301 }
5302 tlen -= m->m_len;
5303 m = m->m_next;
5304 } while (m != NULL);
5305 if (tlen > 0) {
5306 printf("nfsrv_readdsrpc: busted mbuf list\n");
5307 error = ENOENT;
5308 goto nfsmout;
5309 }
5310 *mpp = nd->nd_mrep;
5311 *mpendp = m;
5312 nd->nd_mrep = NULL;
5313 }
5314 } else
5315 error = nd->nd_repstat;
5316 nfsmout:
5317 /* If nd->nd_mrep is already NULL, this is a no-op. */
5318 m_freem(nd->nd_mrep);
5319 free(nd, M_TEMP);
5320 NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error);
5321 return (error);
5322 }
5323
5324 /*
5325 * Do a write RPC on a DS data file, using this structure for the arguments,
5326 * so that this function can be executed by a separate kernel process.
5327 */
5328 struct nfsrvwritedsdorpc {
5329 int done;
5330 int inprog;
5331 struct task tsk;
5332 fhandle_t fh;
5333 off_t off;
5334 int len;
5335 struct nfsmount *nmp;
5336 struct ucred *cred;
5337 NFSPROC_T *p;
5338 struct mbuf *m;
5339 int err;
5340 };
5341
5342 static int
5343 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len,
5344 struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p)
5345 {
5346 uint32_t *tl;
5347 struct nfsrv_descript *nd;
5348 nfsattrbit_t attrbits;
5349 nfsv4stateid_t st;
5350 int commit, error, retlen;
5351
5352 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
5353 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp,
5354 sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
5355
5356 /*
5357 * Use a stateid where other is an alternating 01010 pattern and
5358 * seqid is 0xffffffff. This value is not defined as special by
5359 * the RFC and is used by the FreeBSD NFS server to indicate an
5360 * MDS->DS proxy operation.
5361 */
5362 st.other[0] = 0x55555555;
5363 st.other[1] = 0x55555555;
5364 st.other[2] = 0x55555555;
5365 st.seqid = 0xffffffff;
5366 nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
5367 NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
5368 txdr_hyper(off, tl);
5369 tl += 2;
5370 /*
5371 * Do all writes FileSync, since the server doesn't hold onto dirty
5372 * buffers. Since clients should be accessing the DS servers directly
5373 * using the pNFS layouts, this just needs to work correctly as a
5374 * fallback.
5375 */
5376 *tl++ = txdr_unsigned(NFSWRITE_FILESYNC);
5377 *tl = txdr_unsigned(len);
5378 NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len);
5379
5380 /* Put data in mbuf chain. */
5381 nd->nd_mb->m_next = m;
5382
5383 /* Set nd_mb and nd_bpos to end of data. */
5384 while (m->m_next != NULL)
5385 m = m->m_next;
5386 nd->nd_mb = m;
5387 nfsm_set(nd, m->m_len);
5388 NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len);
5389
5390 /* Do a Getattr for the attributes that change upon writing. */
5391 NFSZERO_ATTRBIT(&attrbits);
5392 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
5393 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
5394 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
5395 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
5396 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
5397 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
5398 *tl = txdr_unsigned(NFSV4OP_GETATTR);
5399 (void) nfsrv_putattrbit(nd, &attrbits);
5400 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
5401 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5402 if (error != 0) {
5403 free(nd, M_TEMP);
5404 return (error);
5405 }
5406 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat);
5407 /* Get rid of weak cache consistency data for now. */
5408 if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
5409 (ND_NFSV4 | ND_V4WCCATTR)) {
5410 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
5411 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
5412 NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error);
5413 if (error != 0)
5414 goto nfsmout;
5415 /*
5416 * Get rid of Op# and status for next op.
5417 */
5418 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5419 if (*++tl != 0)
5420 nd->nd_flag |= ND_NOMOREDATA;
5421 }
5422 if (nd->nd_repstat == 0) {
5423 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
5424 retlen = fxdr_unsigned(int, *tl++);
5425 commit = fxdr_unsigned(int, *tl);
5426 if (commit != NFSWRITE_FILESYNC)
5427 error = NFSERR_IO;
5428 NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n",
5429 retlen, commit, error);
5430 } else
5431 error = nd->nd_repstat;
5432 /* We have no use for the Write Verifier since we use FileSync. */
5433
5434 /*
5435 * Get the Change, Size, Access Time and Modify Time attributes and set
5436 * on the Metadata file, so its attributes will be what the file's
5437 * would be if it had been written.
5438 */
5439 if (error == 0) {
5440 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5441 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
5442 NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
5443 }
5444 NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error);
5445 nfsmout:
5446 m_freem(nd->nd_mrep);
5447 free(nd, M_TEMP);
5448 NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error);
5449 return (error);
5450 }
5451
5452 /*
5453 * Start up the thread that will execute nfsrv_writedsdorpc().
5454 */
5455 static void
5456 start_writedsdorpc(void *arg, int pending)
5457 {
5458 struct nfsrvwritedsdorpc *drpc;
5459
5460 drpc = (struct nfsrvwritedsdorpc *)arg;
5461 drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
5462 drpc->len, NULL, drpc->m, drpc->cred, drpc->p);
5463 drpc->done = 1;
5464 NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err);
5465 }
5466
5467 static int
5468 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
5469 NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
5470 struct mbuf **mpp, char *cp, int *failposp)
5471 {
5472 struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL;
5473 struct nfsvattr na;
5474 struct mbuf *m;
5475 int error, i, offs, ret, timo;
5476
5477 NFSD_DEBUG(4, "in nfsrv_writedsrpc\n");
5478 KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain"));
5479 drpc = NULL;
5480 if (mirrorcnt > 1)
5481 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
|