1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39
40 #include "opt_bootp.h"
41 #include "opt_nfsroot.h"
42 #include "opt_kern_tls.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/bio.h>
48 #include <sys/buf.h>
49 #include <sys/clock.h>
50 #include <sys/jail.h>
51 #include <sys/limits.h>
52 #include <sys/lock.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/module.h>
56 #include <sys/mount.h>
57 #include <sys/proc.h>
58 #include <sys/socket.h>
59 #include <sys/socketvar.h>
60 #include <sys/sockio.h>
61 #include <sys/sysctl.h>
62 #include <sys/vnode.h>
63 #include <sys/signalvar.h>
64
65 #include <vm/vm.h>
66 #include <vm/vm_extern.h>
67 #include <vm/uma.h>
68
69 #include <net/if.h>
70 #include <net/route.h>
71 #include <net/route/route_ctl.h>
72 #include <netinet/in.h>
73
74 #include <fs/nfs/nfsport.h>
75 #include <fs/nfsclient/nfsnode.h>
76 #include <fs/nfsclient/nfsmount.h>
77 #include <fs/nfsclient/nfs.h>
78 #include <nfs/nfsdiskless.h>
79
80 #include <rpc/rpcsec_tls.h>
81
82 FEATURE(nfscl, "NFSv4 client");
83
84 extern int nfscl_ticks;
85 extern struct timeval nfsboottime;
86 extern int nfsrv_useacl;
87 extern int nfscl_debuglevel;
88 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
89 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
90 extern struct mtx ncl_iod_mutex;
91 NFSCLSTATEMUTEX;
92 extern struct mtx nfsrv_dslock_mtx;
93
94 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
95 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
96
97 SYSCTL_DECL(_vfs_nfs);
98 static int nfs_ip_paranoia = 1;
99 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
100 &nfs_ip_paranoia, 0, "");
101 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
102 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
103 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
104 /* how long between console messages "nfs server foo not responding" */
105 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
106 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
107 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
108 #ifdef NFS_DEBUG
109 int nfs_debug;
110 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
111 "Toggle debug flag");
112 #endif
113
114 static int nfs_mountroot(struct mount *);
115 static void nfs_sec_name(char *, int *);
116 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
117 struct nfs_args *argp, const char *, struct ucred *,
118 struct thread *);
119 static int mountnfs(struct nfs_args *, struct mount *,
120 struct sockaddr *, char *, u_char *, int, u_char *, int,
121 u_char *, int, struct vnode **, struct ucred *,
122 struct thread *, int, int, int, uint32_t, char *, int);
123 static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
124 struct sockaddr_storage *, int *, off_t *,
125 struct timeval *);
126 static vfs_mount_t nfs_mount;
127 static vfs_cmount_t nfs_cmount;
128 static vfs_unmount_t nfs_unmount;
129 static vfs_root_t nfs_root;
130 static vfs_statfs_t nfs_statfs;
131 static vfs_sync_t nfs_sync;
132 static vfs_sysctl_t nfs_sysctl;
133 static vfs_purge_t nfs_purge;
134
135 /*
136 * nfs vfs operations.
137 */
138 static struct vfsops nfs_vfsops = {
139 .vfs_init = ncl_init,
140 .vfs_mount = nfs_mount,
141 .vfs_cmount = nfs_cmount,
142 .vfs_root = vfs_cache_root,
143 .vfs_cachedroot = nfs_root,
144 .vfs_statfs = nfs_statfs,
145 .vfs_sync = nfs_sync,
146 .vfs_uninit = ncl_uninit,
147 .vfs_unmount = nfs_unmount,
148 .vfs_sysctl = nfs_sysctl,
149 .vfs_purge = nfs_purge,
150 };
151 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
152
153 /* So that loader and kldload(2) can find us, wherever we are.. */
154 MODULE_VERSION(nfs, 1);
155 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
156 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
157 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
158 MODULE_DEPEND(nfs, xdr, 1, 1, 1);
159
160 /*
161 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
162 * can be shared by both NFS clients. It is declared here so that it
163 * will be defined for kernels built without NFS_ROOT, although it
164 * isn't used in that case.
165 */
166 #if !defined(NFS_ROOT)
167 struct nfs_diskless nfs_diskless = { { { 0 } } };
168 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
169 int nfs_diskless_valid = 0;
170 #endif
171
172 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
173 &nfs_diskless_valid, 0,
174 "Has the diskless struct been filled correctly");
175
176 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
177 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
178
179 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
180 &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
181 "%Ssockaddr_in", "Diskless root nfs address");
182
183 void newnfsargs_ntoh(struct nfs_args *);
184 static int nfs_mountdiskless(char *,
185 struct sockaddr_in *, struct nfs_args *,
186 struct thread *, struct vnode **, struct mount *);
187 static void nfs_convert_diskless(void);
188 static void nfs_convert_oargs(struct nfs_args *args,
189 struct onfs_args *oargs);
190
191 int
192 newnfs_iosize(struct nfsmount *nmp)
193 {
194 int iosize, maxio;
195
196 /* First, set the upper limit for iosize */
197 if (nmp->nm_flag & NFSMNT_NFSV4) {
198 maxio = NFS_MAXBSIZE;
199 } else if (nmp->nm_flag & NFSMNT_NFSV3) {
200 if (nmp->nm_sotype == SOCK_DGRAM)
201 maxio = NFS_MAXDGRAMDATA;
202 else
203 maxio = NFS_MAXBSIZE;
204 } else {
205 maxio = NFS_V2MAXDATA;
206 }
207 if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
208 nmp->nm_rsize = maxio;
209 if (nmp->nm_rsize > NFS_MAXBSIZE)
210 nmp->nm_rsize = NFS_MAXBSIZE;
211 if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
212 nmp->nm_readdirsize = maxio;
213 if (nmp->nm_readdirsize > nmp->nm_rsize)
214 nmp->nm_readdirsize = nmp->nm_rsize;
215 if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
216 nmp->nm_wsize = maxio;
217 if (nmp->nm_wsize > NFS_MAXBSIZE)
218 nmp->nm_wsize = NFS_MAXBSIZE;
219
220 /*
221 * Calculate the size used for io buffers. Use the larger
222 * of the two sizes to minimise nfs requests but make sure
223 * that it is at least one VM page to avoid wasting buffer
224 * space. It must also be at least NFS_DIRBLKSIZ, since
225 * that is the buffer size used for directories.
226 */
227 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
228 iosize = imax(iosize, PAGE_SIZE);
229 iosize = imax(iosize, NFS_DIRBLKSIZ);
230 nmp->nm_mountp->mnt_stat.f_iosize = iosize;
231 return (iosize);
232 }
233
234 static void
235 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
236 {
237
238 args->version = NFS_ARGSVERSION;
239 args->addr = oargs->addr;
240 args->addrlen = oargs->addrlen;
241 args->sotype = oargs->sotype;
242 args->proto = oargs->proto;
243 args->fh = oargs->fh;
244 args->fhsize = oargs->fhsize;
245 args->flags = oargs->flags;
246 args->wsize = oargs->wsize;
247 args->rsize = oargs->rsize;
248 args->readdirsize = oargs->readdirsize;
249 args->timeo = oargs->timeo;
250 args->retrans = oargs->retrans;
251 args->readahead = oargs->readahead;
252 args->hostname = oargs->hostname;
253 }
254
255 static void
256 nfs_convert_diskless(void)
257 {
258
259 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
260 sizeof(struct ifaliasreq));
261 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
262 sizeof(struct sockaddr_in));
263 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
264 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
265 nfsv3_diskless.root_fhsize = NFSX_MYFH;
266 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
267 } else {
268 nfsv3_diskless.root_fhsize = NFSX_V2FH;
269 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
270 }
271 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
272 sizeof(struct sockaddr_in));
273 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
274 nfsv3_diskless.root_time = nfs_diskless.root_time;
275 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
276 MAXHOSTNAMELEN);
277 nfs_diskless_valid = 3;
278 }
279
280 /*
281 * nfs statfs call
282 */
283 static int
284 nfs_statfs(struct mount *mp, struct statfs *sbp)
285 {
286 struct vnode *vp;
287 struct thread *td;
288 struct nfsmount *nmp = VFSTONFS(mp);
289 struct nfsvattr nfsva;
290 struct nfsfsinfo fs;
291 struct nfsstatfs sb;
292 int error = 0, attrflag, gotfsinfo = 0, ret;
293 struct nfsnode *np;
294
295 td = curthread;
296
297 error = vfs_busy(mp, MBF_NOWAIT);
298 if (error)
299 return (error);
300 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
301 if (error) {
302 vfs_unbusy(mp);
303 return (error);
304 }
305 vp = NFSTOV(np);
306 mtx_lock(&nmp->nm_mtx);
307 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
308 mtx_unlock(&nmp->nm_mtx);
309 error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
310 &attrflag, NULL);
311 if (!error)
312 gotfsinfo = 1;
313 } else
314 mtx_unlock(&nmp->nm_mtx);
315 if (!error)
316 error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
317 &attrflag, NULL);
318 if (error != 0)
319 NFSCL_DEBUG(2, "statfs=%d\n", error);
320 if (attrflag == 0) {
321 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
322 td->td_ucred, td, &nfsva, NULL, NULL);
323 if (ret) {
324 /*
325 * Just set default values to get things going.
326 */
327 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
328 nfsva.na_vattr.va_type = VDIR;
329 nfsva.na_vattr.va_mode = 0777;
330 nfsva.na_vattr.va_nlink = 100;
331 nfsva.na_vattr.va_uid = (uid_t)0;
332 nfsva.na_vattr.va_gid = (gid_t)0;
333 nfsva.na_vattr.va_fileid = 2;
334 nfsva.na_vattr.va_gen = 1;
335 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
336 nfsva.na_vattr.va_size = 512 * 1024;
337 }
338 }
339 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
340 if (!error) {
341 mtx_lock(&nmp->nm_mtx);
342 if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
343 nfscl_loadfsinfo(nmp, &fs);
344 nfscl_loadsbinfo(nmp, &sb, sbp);
345 sbp->f_iosize = newnfs_iosize(nmp);
346 mtx_unlock(&nmp->nm_mtx);
347 if (sbp != &mp->mnt_stat) {
348 bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
349 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
350 }
351 strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
352 } else if (NFS_ISV4(vp)) {
353 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
354 }
355 vput(vp);
356 vfs_unbusy(mp);
357 return (error);
358 }
359
360 /*
361 * nfs version 3 fsinfo rpc call
362 */
363 int
364 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
365 struct thread *td)
366 {
367 struct nfsfsinfo fs;
368 struct nfsvattr nfsva;
369 int error, attrflag;
370
371 error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
372 if (!error) {
373 if (attrflag)
374 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
375 1);
376 mtx_lock(&nmp->nm_mtx);
377 nfscl_loadfsinfo(nmp, &fs);
378 mtx_unlock(&nmp->nm_mtx);
379 }
380 return (error);
381 }
382
383 /*
384 * Mount a remote root fs via. nfs. This depends on the info in the
385 * nfs_diskless structure that has been filled in properly by some primary
386 * bootstrap.
387 * It goes something like this:
388 * - do enough of "ifconfig" by calling ifioctl() so that the system
389 * can talk to the server
390 * - If nfs_diskless.mygateway is filled in, use that address as
391 * a default gateway.
392 * - build the rootfs mount point and call mountnfs() to do the rest.
393 *
394 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
395 * structure, as well as other global NFS client variables here, as
396 * nfs_mountroot() will be called once in the boot before any other NFS
397 * client activity occurs.
398 */
399 static int
400 nfs_mountroot(struct mount *mp)
401 {
402 struct thread *td = curthread;
403 struct nfsv3_diskless *nd = &nfsv3_diskless;
404 struct socket *so;
405 struct vnode *vp;
406 struct ifreq ir;
407 int error;
408 u_long l;
409 char buf[128];
410 char *cp;
411
412 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
413 bootpc_init(); /* use bootp to get nfs_diskless filled in */
414 #elif defined(NFS_ROOT)
415 nfs_setup_diskless();
416 #endif
417
418 if (nfs_diskless_valid == 0)
419 return (-1);
420 if (nfs_diskless_valid == 1)
421 nfs_convert_diskless();
422
423 /*
424 * Do enough of ifconfig(8) so that the critical net interface can
425 * talk to the server.
426 */
427 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
428 td->td_ucred, td);
429 if (error)
430 panic("nfs_mountroot: socreate(%04x): %d",
431 nd->myif.ifra_addr.sa_family, error);
432
433 #if 0 /* XXX Bad idea */
434 /*
435 * We might not have been told the right interface, so we pass
436 * over the first ten interfaces of the same kind, until we get
437 * one of them configured.
438 */
439
440 for (i = strlen(nd->myif.ifra_name) - 1;
441 nd->myif.ifra_name[i] >= '' &&
442 nd->myif.ifra_name[i] <= '9';
443 nd->myif.ifra_name[i] ++) {
444 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
445 if(!error)
446 break;
447 }
448 #endif
449 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
450 if (error)
451 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
452 if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
453 ir.ifr_mtu = strtol(cp, NULL, 10);
454 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
455 freeenv(cp);
456 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
457 if (error)
458 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
459 }
460 soclose(so);
461
462 /*
463 * If the gateway field is filled in, set it as the default route.
464 * Note that pxeboot will set a default route of 0 if the route
465 * is not set by the DHCP server. Check also for a value of 0
466 * to avoid panicking inappropriately in that situation.
467 */
468 if (nd->mygateway.sin_len != 0 &&
469 nd->mygateway.sin_addr.s_addr != 0) {
470 struct sockaddr_in mask, sin;
471 struct epoch_tracker et;
472 struct rt_addrinfo info;
473 struct rib_cmd_info rc;
474
475 bzero((caddr_t)&mask, sizeof(mask));
476 sin = mask;
477 sin.sin_family = AF_INET;
478 sin.sin_len = sizeof(sin);
479 /* XXX MRT use table 0 for this sort of thing */
480 NET_EPOCH_ENTER(et);
481 CURVNET_SET(TD_TO_VNET(td));
482
483 bzero((caddr_t)&info, sizeof(info));
484 info.rti_flags = RTF_UP | RTF_GATEWAY;
485 info.rti_info[RTAX_DST] = (struct sockaddr *)&sin;
486 info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&nd->mygateway;
487 info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&mask;
488
489 error = rib_action(RT_DEFAULT_FIB, RTM_ADD, &info, &rc);
490 CURVNET_RESTORE();
491 NET_EPOCH_EXIT(et);
492 if (error)
493 panic("nfs_mountroot: RTM_ADD: %d", error);
494 }
495
496 /*
497 * Create the rootfs mount point.
498 */
499 nd->root_args.fh = nd->root_fh;
500 nd->root_args.fhsize = nd->root_fhsize;
501 l = ntohl(nd->root_saddr.sin_addr.s_addr);
502 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
503 (l >> 24) & 0xff, (l >> 16) & 0xff,
504 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
505 printf("NFS ROOT: %s\n", buf);
506 nd->root_args.hostname = buf;
507 if ((error = nfs_mountdiskless(buf,
508 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
509 return (error);
510 }
511
512 /*
513 * This is not really an nfs issue, but it is much easier to
514 * set hostname here and then let the "/etc/rc.xxx" files
515 * mount the right /var based upon its preset value.
516 */
517 mtx_lock(&prison0.pr_mtx);
518 strlcpy(prison0.pr_hostname, nd->my_hostnam,
519 sizeof(prison0.pr_hostname));
520 mtx_unlock(&prison0.pr_mtx);
521 inittodr(ntohl(nd->root_time));
522 return (0);
523 }
524
525 /*
526 * Internal version of mount system call for diskless setup.
527 */
528 static int
529 nfs_mountdiskless(char *path,
530 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
531 struct vnode **vpp, struct mount *mp)
532 {
533 struct sockaddr *nam;
534 int dirlen, error;
535 char *dirpath;
536
537 /*
538 * Find the directory path in "path", which also has the server's
539 * name/ip address in it.
540 */
541 dirpath = strchr(path, ':');
542 if (dirpath != NULL)
543 dirlen = strlen(++dirpath);
544 else
545 dirlen = 0;
546 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
547 if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
548 NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
549 NFS_DEFAULT_NEGNAMETIMEO, 0, 0, NULL, 0)) != 0) {
550 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
551 return (error);
552 }
553 return (0);
554 }
555
556 static void
557 nfs_sec_name(char *sec, int *flagsp)
558 {
559 if (!strcmp(sec, "krb5"))
560 *flagsp |= NFSMNT_KERB;
561 else if (!strcmp(sec, "krb5i"))
562 *flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
563 else if (!strcmp(sec, "krb5p"))
564 *flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
565 }
566
567 static void
568 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
569 const char *hostname, struct ucred *cred, struct thread *td)
570 {
571 int adjsock;
572 char *p;
573
574 /*
575 * Set read-only flag if requested; otherwise, clear it if this is
576 * an update. If this is not an update, then either the read-only
577 * flag is already clear, or this is a root mount and it was set
578 * intentionally at some previous point.
579 */
580 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
581 MNT_ILOCK(mp);
582 mp->mnt_flag |= MNT_RDONLY;
583 MNT_IUNLOCK(mp);
584 } else if (mp->mnt_flag & MNT_UPDATE) {
585 MNT_ILOCK(mp);
586 mp->mnt_flag &= ~MNT_RDONLY;
587 MNT_IUNLOCK(mp);
588 }
589
590 /*
591 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
592 * no sense in that context. Also, set up appropriate retransmit
593 * and soft timeout behavior.
594 */
595 if (argp->sotype == SOCK_STREAM) {
596 nmp->nm_flag &= ~NFSMNT_NOCONN;
597 nmp->nm_timeo = NFS_MAXTIMEO;
598 if ((argp->flags & NFSMNT_NFSV4) != 0)
599 nmp->nm_retry = INT_MAX;
600 else
601 nmp->nm_retry = NFS_RETRANS_TCP;
602 }
603
604 /* Also clear RDIRPLUS if NFSv2, it crashes some servers */
605 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
606 argp->flags &= ~NFSMNT_RDIRPLUS;
607 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
608 }
609
610 /* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
611 if (nmp->nm_minorvers == 0) {
612 argp->flags &= ~NFSMNT_ONEOPENOWN;
613 nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
614 }
615
616 /* Re-bind if rsrvd port requested and wasn't on one */
617 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
618 && (argp->flags & NFSMNT_RESVPORT);
619 /* Also re-bind if we're switching to/from a connected UDP socket */
620 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
621 (argp->flags & NFSMNT_NOCONN));
622
623 /* Update flags atomically. Don't change the lock bits. */
624 nmp->nm_flag = argp->flags | nmp->nm_flag;
625
626 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
627 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
628 if (nmp->nm_timeo < NFS_MINTIMEO)
629 nmp->nm_timeo = NFS_MINTIMEO;
630 else if (nmp->nm_timeo > NFS_MAXTIMEO)
631 nmp->nm_timeo = NFS_MAXTIMEO;
632 }
633
634 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
635 nmp->nm_retry = argp->retrans;
636 if (nmp->nm_retry > NFS_MAXREXMIT)
637 nmp->nm_retry = NFS_MAXREXMIT;
638 }
639
640 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
641 nmp->nm_wsize = argp->wsize;
642 /*
643 * Clip at the power of 2 below the size. There is an
644 * issue (not isolated) that causes intermittent page
645 * faults if this is not done.
646 */
647 if (nmp->nm_wsize > NFS_FABLKSIZE)
648 nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
649 else
650 nmp->nm_wsize = NFS_FABLKSIZE;
651 }
652
653 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
654 nmp->nm_rsize = argp->rsize;
655 /*
656 * Clip at the power of 2 below the size. There is an
657 * issue (not isolated) that causes intermittent page
658 * faults if this is not done.
659 */
660 if (nmp->nm_rsize > NFS_FABLKSIZE)
661 nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
662 else
663 nmp->nm_rsize = NFS_FABLKSIZE;
664 }
665
666 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
667 nmp->nm_readdirsize = argp->readdirsize;
668 }
669
670 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
671 nmp->nm_acregmin = argp->acregmin;
672 else
673 nmp->nm_acregmin = NFS_MINATTRTIMO;
674 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
675 nmp->nm_acregmax = argp->acregmax;
676 else
677 nmp->nm_acregmax = NFS_MAXATTRTIMO;
678 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
679 nmp->nm_acdirmin = argp->acdirmin;
680 else
681 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
682 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
683 nmp->nm_acdirmax = argp->acdirmax;
684 else
685 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
686 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
687 nmp->nm_acdirmin = nmp->nm_acdirmax;
688 if (nmp->nm_acregmin > nmp->nm_acregmax)
689 nmp->nm_acregmin = nmp->nm_acregmax;
690
691 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
692 if (argp->readahead <= NFS_MAXRAHEAD)
693 nmp->nm_readahead = argp->readahead;
694 else
695 nmp->nm_readahead = NFS_MAXRAHEAD;
696 }
697 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
698 if (argp->wcommitsize < nmp->nm_wsize)
699 nmp->nm_wcommitsize = nmp->nm_wsize;
700 else
701 nmp->nm_wcommitsize = argp->wcommitsize;
702 }
703
704 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
705 (nmp->nm_soproto != argp->proto));
706
707 if (nmp->nm_client != NULL && adjsock) {
708 int haslock = 0, error = 0;
709
710 if (nmp->nm_sotype == SOCK_STREAM) {
711 error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
712 if (!error)
713 haslock = 1;
714 }
715 if (!error) {
716 newnfs_disconnect(nmp, &nmp->nm_sockreq);
717 if (haslock)
718 newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
719 nmp->nm_sotype = argp->sotype;
720 nmp->nm_soproto = argp->proto;
721 if (nmp->nm_sotype == SOCK_DGRAM)
722 while (newnfs_connect(nmp, &nmp->nm_sockreq,
723 cred, td, 0, false, &nmp->nm_sockreq.nr_client)) {
724 printf("newnfs_args: retrying connect\n");
725 (void) nfs_catnap(PSOCK, 0, "nfscon");
726 }
727 }
728 } else {
729 nmp->nm_sotype = argp->sotype;
730 nmp->nm_soproto = argp->proto;
731 }
732
733 if (hostname != NULL) {
734 strlcpy(nmp->nm_hostname, hostname,
735 sizeof(nmp->nm_hostname));
736 p = strchr(nmp->nm_hostname, ':');
737 if (p != NULL)
738 *p = '\0';
739 }
740 }
741
742 static const char *nfs_opts[] = { "from", "nfs_args",
743 "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
744 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
745 "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
746 "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
747 "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
748 "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
749 "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
750 "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
751 "pnfs", "wcommitsize", "oneopenown", "tls", "tlscertname", "nconnect",
752 NULL };
753
754 /*
755 * Parse the "from" mountarg, passed by the generic mount(8) program
756 * or the mountroot code. This is used when rerooting into NFS.
757 *
758 * Note that the "hostname" is actually a "hostname:/share/path" string.
759 */
760 static int
761 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
762 struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
763 {
764 char *nam, *delimp, *hostp, *spec;
765 int error, have_bracket = 0, offset, rv, speclen;
766 struct sockaddr_in *sin;
767 size_t len;
768
769 error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
770 if (error != 0)
771 return (error);
772 nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
773
774 /*
775 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
776 */
777 if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
778 *(delimp + 1) == ':') {
779 hostp = spec + 1;
780 spec = delimp + 2;
781 have_bracket = 1;
782 } else if ((delimp = strrchr(spec, ':')) != NULL) {
783 hostp = spec;
784 spec = delimp + 1;
785 } else if ((delimp = strrchr(spec, '@')) != NULL) {
786 printf("%s: path@server syntax is deprecated, "
787 "use server:path\n", __func__);
788 hostp = delimp + 1;
789 } else {
790 printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
791 free(nam, M_TEMP);
792 return (EINVAL);
793 }
794 *delimp = '\0';
795
796 /*
797 * If there has been a trailing slash at mounttime it seems
798 * that some mountd implementations fail to remove the mount
799 * entries from their mountlist while unmounting.
800 */
801 for (speclen = strlen(spec);
802 speclen > 1 && spec[speclen - 1] == '/';
803 speclen--)
804 spec[speclen - 1] = '\0';
805 if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
806 printf("%s: %s:%s: name too long", __func__, hostp, spec);
807 free(nam, M_TEMP);
808 return (EINVAL);
809 }
810 /* Make both '@' and ':' notations equal */
811 if (*hostp != '\0') {
812 len = strlen(hostp);
813 offset = 0;
814 if (have_bracket)
815 nam[offset++] = '[';
816 memmove(nam + offset, hostp, len);
817 if (have_bracket)
818 nam[len + offset++] = ']';
819 nam[len + offset++] = ':';
820 memmove(nam + len + offset, spec, speclen);
821 nam[len + speclen + offset] = '\0';
822 } else
823 nam[0] = '\0';
824
825 /*
826 * XXX: IPv6
827 */
828 sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
829 rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
830 if (rv != 1) {
831 printf("%s: cannot parse '%s', inet_pton() returned %d\n",
832 __func__, hostp, rv);
833 free(nam, M_TEMP);
834 free(sin, M_SONAME);
835 return (EINVAL);
836 }
837
838 sin->sin_len = sizeof(*sin);
839 sin->sin_family = AF_INET;
840 /*
841 * XXX: hardcoded port number.
842 */
843 sin->sin_port = htons(2049);
844
845 *hostnamep = strdup(nam, M_NEWNFSMNT);
846 *sinp = sin;
847 strlcpy(dirpath, spec, dirpathsize);
848 *dirlenp = strlen(dirpath);
849
850 free(nam, M_TEMP);
851 return (0);
852 }
853
854 /*
855 * VFS Operations.
856 *
857 * mount system call
858 * It seems a bit dumb to copyinstr() the host and path here and then
859 * bcopy() them in mountnfs(), but I wanted to detect errors before
860 * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
861 * an error after that means that I have to release the mbuf.
862 */
863 /* ARGSUSED */
864 static int
865 nfs_mount(struct mount *mp)
866 {
867 struct nfs_args args = {
868 .version = NFS_ARGSVERSION,
869 .addr = NULL,
870 .addrlen = sizeof (struct sockaddr_in),
871 .sotype = SOCK_STREAM,
872 .proto = 0,
873 .fh = NULL,
874 .fhsize = 0,
875 .flags = NFSMNT_RESVPORT,
876 .wsize = NFS_WSIZE,
877 .rsize = NFS_RSIZE,
878 .readdirsize = NFS_READDIRSIZE,
879 .timeo = 10,
880 .retrans = NFS_RETRANS,
881 .readahead = NFS_DEFRAHEAD,
882 .wcommitsize = 0, /* was: NQ_DEFLEASE */
883 .hostname = NULL,
884 .acregmin = NFS_MINATTRTIMO,
885 .acregmax = NFS_MAXATTRTIMO,
886 .acdirmin = NFS_MINDIRATTRTIMO,
887 .acdirmax = NFS_MAXDIRATTRTIMO,
888 };
889 int error = 0, ret, len;
890 struct sockaddr *nam = NULL;
891 struct vnode *vp;
892 struct thread *td;
893 char *hst;
894 u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
895 char *cp, *opt, *name, *secname, *tlscertname;
896 int nametimeo = NFS_DEFAULT_NAMETIMEO;
897 int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
898 int minvers = -1;
899 int dirlen, has_nfs_args_opt, has_nfs_from_opt,
900 krbnamelen, srvkrbnamelen;
901 size_t hstlen;
902 uint32_t newflag;
903 int aconn = 0;
904
905 has_nfs_args_opt = 0;
906 has_nfs_from_opt = 0;
907 newflag = 0;
908 tlscertname = NULL;
909 hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
910 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
911 error = EINVAL;
912 goto out;
913 }
914
915 td = curthread;
916 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
917 nfs_diskless_valid != 0) {
918 error = nfs_mountroot(mp);
919 goto out;
920 }
921
922 nfscl_init();
923
924 /*
925 * The old mount_nfs program passed the struct nfs_args
926 * from userspace to kernel. The new mount_nfs program
927 * passes string options via nmount() from userspace to kernel
928 * and we populate the struct nfs_args in the kernel.
929 */
930 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
931 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
932 sizeof(args));
933 if (error != 0)
934 goto out;
935
936 if (args.version != NFS_ARGSVERSION) {
937 error = EPROGMISMATCH;
938 goto out;
939 }
940 has_nfs_args_opt = 1;
941 }
942
943 /* Handle the new style options. */
944 if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
945 args.acdirmin = args.acdirmax =
946 args.acregmin = args.acregmax = 0;
947 args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
948 NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
949 }
950 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
951 args.flags |= NFSMNT_NOCONN;
952 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
953 args.flags &= ~NFSMNT_NOCONN;
954 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
955 args.flags |= NFSMNT_NOLOCKD;
956 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
957 args.flags &= ~NFSMNT_NOLOCKD;
958 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
959 args.flags |= NFSMNT_INT;
960 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
961 args.flags |= NFSMNT_RDIRPLUS;
962 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
963 args.flags |= NFSMNT_RESVPORT;
964 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
965 args.flags &= ~NFSMNT_RESVPORT;
966 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
967 args.flags |= NFSMNT_SOFT;
968 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
969 args.flags &= ~NFSMNT_SOFT;
970 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
971 args.sotype = SOCK_DGRAM;
972 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
973 args.sotype = SOCK_DGRAM;
974 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
975 args.sotype = SOCK_STREAM;
976 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
977 args.flags |= NFSMNT_NFSV3;
978 if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
979 args.flags |= NFSMNT_NFSV4;
980 args.sotype = SOCK_STREAM;
981 }
982 if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
983 args.flags |= NFSMNT_ALLGSSNAME;
984 if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
985 args.flags |= NFSMNT_NOCTO;
986 if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
987 args.flags |= NFSMNT_NONCONTIGWR;
988 if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
989 args.flags |= NFSMNT_PNFS;
990 if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
991 args.flags |= NFSMNT_ONEOPENOWN;
992 if (vfs_getopt(mp->mnt_optnew, "tls", NULL, NULL) == 0)
993 newflag |= NFSMNT_TLS;
994 if (vfs_getopt(mp->mnt_optnew, "tlscertname", (void **)&opt, &len) ==
995 0) {
996 /*
997 * tlscertname with "key.pem" appended to it forms a file
998 * name. As such, the maximum allowable strlen(tlscertname) is
999 * NAME_MAX - 7. However, "len" includes the nul termination
1000 * byte so it can be up to NAME_MAX - 6.
1001 */
1002 if (opt == NULL || len <= 1 || len > NAME_MAX - 6) {
1003 vfs_mount_error(mp, "invalid tlscertname");
1004 error = EINVAL;
1005 goto out;
1006 }
1007 tlscertname = malloc(len, M_NEWNFSMNT, M_WAITOK);
1008 strlcpy(tlscertname, opt, len);
1009 }
1010 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
1011 if (opt == NULL) {
1012 vfs_mount_error(mp, "illegal readdirsize");
1013 error = EINVAL;
1014 goto out;
1015 }
1016 ret = sscanf(opt, "%d", &args.readdirsize);
1017 if (ret != 1 || args.readdirsize <= 0) {
1018 vfs_mount_error(mp, "illegal readdirsize: %s",
1019 opt);
1020 error = EINVAL;
1021 goto out;
1022 }
1023 args.flags |= NFSMNT_READDIRSIZE;
1024 }
1025 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
1026 if (opt == NULL) {
1027 vfs_mount_error(mp, "illegal readahead");
1028 error = EINVAL;
1029 goto out;
1030 }
1031 ret = sscanf(opt, "%d", &args.readahead);
1032 if (ret != 1 || args.readahead <= 0) {
1033 vfs_mount_error(mp, "illegal readahead: %s",
1034 opt);
1035 error = EINVAL;
1036 goto out;
1037 }
1038 args.flags |= NFSMNT_READAHEAD;
1039 }
1040 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1041 if (opt == NULL) {
1042 vfs_mount_error(mp, "illegal wsize");
1043 error = EINVAL;
1044 goto out;
1045 }
1046 ret = sscanf(opt, "%d", &args.wsize);
1047 if (ret != 1 || args.wsize <= 0) {
1048 vfs_mount_error(mp, "illegal wsize: %s",
1049 opt);
1050 error = EINVAL;
1051 goto out;
1052 }
1053 args.flags |= NFSMNT_WSIZE;
1054 }
1055 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1056 if (opt == NULL) {
1057 vfs_mount_error(mp, "illegal rsize");
1058 error = EINVAL;
1059 goto out;
1060 }
1061 ret = sscanf(opt, "%d", &args.rsize);
1062 if (ret != 1 || args.rsize <= 0) {
1063 vfs_mount_error(mp, "illegal wsize: %s",
1064 opt);
1065 error = EINVAL;
1066 goto out;
1067 }
1068 args.flags |= NFSMNT_RSIZE;
1069 }
1070 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1071 if (opt == NULL) {
1072 vfs_mount_error(mp, "illegal retrans");
1073 error = EINVAL;
1074 goto out;
1075 }
1076 ret = sscanf(opt, "%d", &args.retrans);
1077 if (ret != 1 || args.retrans <= 0) {
1078 vfs_mount_error(mp, "illegal retrans: %s",
1079 opt);
1080 error = EINVAL;
1081 goto out;
1082 }
1083 args.flags |= NFSMNT_RETRANS;
1084 }
1085 if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1086 ret = sscanf(opt, "%d", &args.acregmin);
1087 if (ret != 1 || args.acregmin < 0) {
1088 vfs_mount_error(mp, "illegal actimeo: %s",
1089 opt);
1090 error = EINVAL;
1091 goto out;
1092 }
1093 args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1094 args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1095 NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1096 }
1097 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1098 ret = sscanf(opt, "%d", &args.acregmin);
1099 if (ret != 1 || args.acregmin < 0) {
1100 vfs_mount_error(mp, "illegal acregmin: %s",
1101 opt);
1102 error = EINVAL;
1103 goto out;
1104 }
1105 args.flags |= NFSMNT_ACREGMIN;
1106 }
1107 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1108 ret = sscanf(opt, "%d", &args.acregmax);
1109 if (ret != 1 || args.acregmax < 0) {
1110 vfs_mount_error(mp, "illegal acregmax: %s",
1111 opt);
1112 error = EINVAL;
1113 goto out;
1114 }
1115 args.flags |= NFSMNT_ACREGMAX;
1116 }
1117 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1118 ret = sscanf(opt, "%d", &args.acdirmin);
1119 if (ret != 1 || args.acdirmin < 0) {
1120 vfs_mount_error(mp, "illegal acdirmin: %s",
1121 opt);
1122 error = EINVAL;
1123 goto out;
1124 }
1125 args.flags |= NFSMNT_ACDIRMIN;
1126 }
1127 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1128 ret = sscanf(opt, "%d", &args.acdirmax);
1129 if (ret != 1 || args.acdirmax < 0) {
1130 vfs_mount_error(mp, "illegal acdirmax: %s",
1131 opt);
1132 error = EINVAL;
1133 goto out;
1134 }
1135 args.flags |= NFSMNT_ACDIRMAX;
1136 }
1137 if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1138 ret = sscanf(opt, "%d", &args.wcommitsize);
1139 if (ret != 1 || args.wcommitsize < 0) {
1140 vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1141 error = EINVAL;
1142 goto out;
1143 }
1144 args.flags |= NFSMNT_WCOMMITSIZE;
1145 }
1146 if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1147 ret = sscanf(opt, "%d", &args.timeo);
1148 if (ret != 1 || args.timeo <= 0) {
1149 vfs_mount_error(mp, "illegal timeo: %s",
1150 opt);
1151 error = EINVAL;
1152 goto out;
1153 }
1154 args.flags |= NFSMNT_TIMEO;
1155 }
1156 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1157 ret = sscanf(opt, "%d", &args.timeo);
1158 if (ret != 1 || args.timeo <= 0) {
1159 vfs_mount_error(mp, "illegal timeout: %s",
1160 opt);
1161 error = EINVAL;
1162 goto out;
1163 }
1164 args.flags |= NFSMNT_TIMEO;
1165 }
1166 if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1167 ret = sscanf(opt, "%d", &nametimeo);
1168 if (ret != 1 || nametimeo < 0) {
1169 vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1170 error = EINVAL;
1171 goto out;
1172 }
1173 }
1174 if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1175 == 0) {
1176 ret = sscanf(opt, "%d", &negnametimeo);
1177 if (ret != 1 || negnametimeo < 0) {
1178 vfs_mount_error(mp, "illegal negnametimeo: %s",
1179 opt);
1180 error = EINVAL;
1181 goto out;
1182 }
1183 }
1184 if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1185 0) {
1186 ret = sscanf(opt, "%d", &minvers);
1187 if (ret != 1 || minvers < 0 || minvers > 2 ||
1188 (args.flags & NFSMNT_NFSV4) == 0) {
1189 vfs_mount_error(mp, "illegal minorversion: %s", opt);
1190 error = EINVAL;
1191 goto out;
1192 }
1193 }
1194 if (vfs_getopt(mp->mnt_optnew, "nconnect", (void **)&opt, NULL) ==
1195 0) {
1196 ret = sscanf(opt, "%d", &aconn);
1197 if (ret != 1 || aconn < 1 || aconn > NFS_MAXNCONN) {
1198 vfs_mount_error(mp, "illegal nconnect: %s", opt);
1199 error = EINVAL;
1200 goto out;
1201 }
1202 /*
1203 * Setting nconnect=1 is a no-op, allowed so that
1204 * the option can be used in a Linux compatible way.
1205 */
1206 aconn--;
1207 }
1208 if (vfs_getopt(mp->mnt_optnew, "sec",
1209 (void **) &secname, NULL) == 0)
1210 nfs_sec_name(secname, &args.flags);
1211
1212 if (mp->mnt_flag & MNT_UPDATE) {
1213 struct nfsmount *nmp = VFSTONFS(mp);
1214
1215 if (nmp == NULL) {
1216 error = EIO;
1217 goto out;
1218 }
1219
1220 /*
1221 * If a change from TCP->UDP is done and there are thread(s)
1222 * that have I/O RPC(s) in progress with a transfer size
1223 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1224 * hung, retrying the RPC(s) forever. Usually these threads
1225 * will be seen doing an uninterruptible sleep on wait channel
1226 * "nfsreq".
1227 */
1228 if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1229 tprintf(td->td_proc, LOG_WARNING,
1230 "Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1231
1232 /*
1233 * When doing an update, we can't change version,
1234 * security, switch lockd strategies, change cookie
1235 * translation or switch oneopenown.
1236 */
1237 args.flags = (args.flags &
1238 ~(NFSMNT_NFSV3 |
1239 NFSMNT_NFSV4 |
1240 NFSMNT_KERB |
1241 NFSMNT_INTEGRITY |
1242 NFSMNT_PRIVACY |
1243 NFSMNT_ONEOPENOWN |
1244 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1245 (nmp->nm_flag &
1246 (NFSMNT_NFSV3 |
1247 NFSMNT_NFSV4 |
1248 NFSMNT_KERB |
1249 NFSMNT_INTEGRITY |
1250 NFSMNT_PRIVACY |
1251 NFSMNT_ONEOPENOWN |
1252 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1253 nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1254 goto out;
1255 }
1256
1257 /*
1258 * Make the nfs_ip_paranoia sysctl serve as the default connection
1259 * or no-connection mode for those protocols that support
1260 * no-connection mode (the flag will be cleared later for protocols
1261 * that do not support no-connection mode). This will allow a client
1262 * to receive replies from a different IP then the request was
1263 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1264 * not 0.
1265 */
1266 if (nfs_ip_paranoia == 0)
1267 args.flags |= NFSMNT_NOCONN;
1268
1269 if (has_nfs_args_opt != 0) {
1270 /*
1271 * In the 'nfs_args' case, the pointers in the args
1272 * structure are in userland - we copy them in here.
1273 */
1274 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1275 vfs_mount_error(mp, "Bad file handle");
1276 error = EINVAL;
1277 goto out;
1278 }
1279 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1280 args.fhsize);
1281 if (error != 0)
1282 goto out;
1283 error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1284 if (error != 0)
1285 goto out;
1286 bzero(&hst[hstlen], MNAMELEN - hstlen);
1287 args.hostname = hst;
1288 /* getsockaddr() call must be after above copyin() calls */
1289 error = getsockaddr(&nam, args.addr, args.addrlen);
1290 if (error != 0)
1291 goto out;
1292 } else if (nfs_mount_parse_from(mp->mnt_optnew,
1293 &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1294 sizeof(dirpath), &dirlen) == 0) {
1295 has_nfs_from_opt = 1;
1296 bcopy(args.hostname, hst, MNAMELEN);
1297 hst[MNAMELEN - 1] = '\0';
1298
1299 /*
1300 * This only works with NFSv4 for now.
1301 */
1302 args.fhsize = 0;
1303 args.flags |= NFSMNT_NFSV4;
1304 args.sotype = SOCK_STREAM;
1305 } else {
1306 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1307 &args.fhsize) == 0) {
1308 if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1309 vfs_mount_error(mp, "Bad file handle");
1310 error = EINVAL;
1311 goto out;
1312 }
1313 bcopy(args.fh, nfh, args.fhsize);
1314 } else {
1315 args.fhsize = 0;
1316 }
1317 (void) vfs_getopt(mp->mnt_optnew, "hostname",
1318 (void **)&args.hostname, &len);
1319 if (args.hostname == NULL) {
1320 vfs_mount_error(mp, "Invalid hostname");
1321 error = EINVAL;
1322 goto out;
1323 }
1324 if (len >= MNAMELEN) {
1325 vfs_mount_error(mp, "Hostname too long");
1326 error = EINVAL;
1327 goto out;
1328 }
1329 bcopy(args.hostname, hst, len);
1330 hst[len] = '\0';
1331 }
1332
1333 if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1334 strlcpy(srvkrbname, name, sizeof (srvkrbname));
1335 else {
1336 snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1337 cp = strchr(srvkrbname, ':');
1338 if (cp != NULL)
1339 *cp = '\0';
1340 }
1341 srvkrbnamelen = strlen(srvkrbname);
1342
1343 if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1344 strlcpy(krbname, name, sizeof (krbname));
1345 else
1346 krbname[0] = '\0';
1347 krbnamelen = strlen(krbname);
1348
1349 if (has_nfs_from_opt == 0) {
1350 if (vfs_getopt(mp->mnt_optnew,
1351 "dirpath", (void **)&name, NULL) == 0)
1352 strlcpy(dirpath, name, sizeof (dirpath));
1353 else
1354 dirpath[0] = '\0';
1355 dirlen = strlen(dirpath);
1356 }
1357
1358 if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1359 if (vfs_getopt(mp->mnt_optnew, "addr",
1360 (void **)&args.addr, &args.addrlen) == 0) {
1361 if (args.addrlen > SOCK_MAXADDRLEN) {
1362 error = ENAMETOOLONG;
1363 goto out;
1364 }
1365 nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1366 bcopy(args.addr, nam, args.addrlen);
1367 nam->sa_len = args.addrlen;
1368 } else {
1369 vfs_mount_error(mp, "No server address");
1370 error = EINVAL;
1371 goto out;
1372 }
1373 }
1374
1375 if (aconn > 0 && (args.sotype != SOCK_STREAM ||
1376 (args.flags & NFSMNT_NFSV4) == 0 || minvers == 0)) {
1377 /*
1378 * RFC 5661 requires that an NFSv4.1/4.2 server
1379 * send an RPC reply on the same TCP connection
1380 * as the one it received the request on.
1381 * This property in required for "nconnect" and
1382 * might not be the case for NFSv3 or NFSv4.0 servers.
1383 */
1384 vfs_mount_error(mp, "nconnect should only be used "
1385 "for NFSv4.1/4.2 mounts");
1386 error = EINVAL;
1387 goto out;
1388 }
1389
1390 args.fh = nfh;
1391 error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1392 dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1393 nametimeo, negnametimeo, minvers, newflag, tlscertname, aconn);
1394 out:
1395 if (!error) {
1396 MNT_ILOCK(mp);
1397 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1398 MNTK_USES_BCACHE;
1399 if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1400 mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1401 MNT_IUNLOCK(mp);
1402 }
1403 free(hst, M_TEMP);
1404 return (error);
1405 }
1406
1407 /*
1408 * VFS Operations.
1409 *
1410 * mount system call
1411 * It seems a bit dumb to copyinstr() the host and path here and then
1412 * bcopy() them in mountnfs(), but I wanted to detect errors before
1413 * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1414 * an error after that means that I have to release the mbuf.
1415 */
1416 /* ARGSUSED */
1417 static int
1418 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1419 {
1420 int error;
1421 struct nfs_args args;
1422
1423 error = copyin(data, &args, sizeof (struct nfs_args));
1424 if (error)
1425 return error;
1426
1427 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1428
1429 error = kernel_mount(ma, flags);
1430 return (error);
1431 }
1432
1433 /*
1434 * Common code for mount and mountroot
1435 */
1436 static int
1437 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1438 char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1439 u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1440 struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1441 int minvers, uint32_t newflag, char *tlscertname, int aconn)
1442 {
1443 struct nfsmount *nmp;
1444 struct nfsnode *np;
1445 int error, trycnt, ret;
1446 struct nfsvattr nfsva;
1447 struct nfsclclient *clp;
1448 struct nfsclds *dsp, *tdsp;
1449 uint32_t lease;
1450 bool tryminvers;
1451 static u_int64_t clval = 0;
1452 #ifdef KERN_TLS
1453 u_int maxlen;
1454 #endif
1455
1456 NFSCL_DEBUG(3, "in mnt\n");
1457 clp = NULL;
1458 if (mp->mnt_flag & MNT_UPDATE) {
1459 nmp = VFSTONFS(mp);
1460 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1461 free(nam, M_SONAME);
1462 free(tlscertname, M_NEWNFSMNT);
1463 return (0);
1464 } else {
1465 /* NFS-over-TLS requires that rpctls be functioning. */
1466 if ((newflag & NFSMNT_TLS) != 0) {
1467 error = EINVAL;
1468 #ifdef KERN_TLS
1469 /* KERN_TLS is only supported for TCP. */
1470 if (argp->sotype == SOCK_STREAM &&
1471 rpctls_getinfo(&maxlen, true, false))
1472 error = 0;
1473 #endif
1474 if (error != 0) {
1475 free(nam, M_SONAME);
1476 free(tlscertname, M_NEWNFSMNT);
1477 return (error);
1478 }
1479 }
1480 nmp = malloc(sizeof (struct nfsmount) +
1481 krbnamelen + dirlen + srvkrbnamelen + 2,
1482 M_NEWNFSMNT, M_WAITOK | M_ZERO);
1483 nmp->nm_tlscertname = tlscertname;
1484 nmp->nm_newflag = newflag;
1485 TAILQ_INIT(&nmp->nm_bufq);
1486 TAILQ_INIT(&nmp->nm_sess);
1487 if (clval == 0)
1488 clval = (u_int64_t)nfsboottime.tv_sec;
1489 nmp->nm_clval = clval++;
1490 nmp->nm_krbnamelen = krbnamelen;
1491 nmp->nm_dirpathlen = dirlen;
1492 nmp->nm_srvkrbnamelen = srvkrbnamelen;
1493 if (td->td_ucred->cr_uid != (uid_t)0) {
1494 /*
1495 * nm_uid is used to get KerberosV credentials for
1496 * the nfsv4 state handling operations if there is
1497 * no host based principal set. Use the uid of
1498 * this user if not root, since they are doing the
1499 * mount. I don't think setting this for root will
1500 * work, since root normally does not have user
1501 * credentials in a credentials cache.
1502 */
1503 nmp->nm_uid = td->td_ucred->cr_uid;
1504 } else {
1505 /*
1506 * Just set to -1, so it won't be used.
1507 */
1508 nmp->nm_uid = (uid_t)-1;
1509 }
1510
1511 /* Copy and null terminate all the names */
1512 if (nmp->nm_krbnamelen > 0) {
1513 bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1514 nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1515 }
1516 if (nmp->nm_dirpathlen > 0) {
1517 bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1518 nmp->nm_dirpathlen);
1519 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1520 + 1] = '\0';
1521 }
1522 if (nmp->nm_srvkrbnamelen > 0) {
1523 bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1524 nmp->nm_srvkrbnamelen);
1525 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1526 + nmp->nm_srvkrbnamelen + 2] = '\0';
1527 }
1528 nmp->nm_sockreq.nr_cred = crhold(cred);
1529 mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1530 mp->mnt_data = nmp;
1531 nmp->nm_getinfo = nfs_getnlminfo;
1532 nmp->nm_vinvalbuf = ncl_vinvalbuf;
1533 }
1534 vfs_getnewfsid(mp);
1535 nmp->nm_mountp = mp;
1536 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1537
1538 /*
1539 * Since nfs_decode_args() might optionally set them, these
1540 * need to be set to defaults before the call, so that the
1541 * optional settings aren't overwritten.
1542 */
1543 nmp->nm_nametimeo = nametimeo;
1544 nmp->nm_negnametimeo = negnametimeo;
1545 nmp->nm_timeo = NFS_TIMEO;
1546 nmp->nm_retry = NFS_RETRANS;
1547 nmp->nm_readahead = NFS_DEFRAHEAD;
1548
1549 /* This is empirical approximation of sqrt(hibufspace) * 256. */
1550 nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1551 while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1552 nmp->nm_wcommitsize *= 2;
1553 nmp->nm_wcommitsize *= 256;
1554
1555 tryminvers = false;
1556 if ((argp->flags & NFSMNT_NFSV4) != 0) {
1557 if (minvers < 0) {
1558 tryminvers = true;
1559 minvers = NFSV42_MINORVERSION;
1560 }
1561 nmp->nm_minorvers = minvers;
1562 } else
1563 nmp->nm_minorvers = 0;
1564
1565 nfs_decode_args(mp, nmp, argp, hst, cred, td);
1566
1567 /*
1568 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1569 * high, depending on whether we end up with negative offsets in
1570 * the client or server somewhere. 2GB-1 may be safer.
1571 *
1572 * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum
1573 * that we can handle until we find out otherwise.
1574 */
1575 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1576 nmp->nm_maxfilesize = 0xffffffffLL;
1577 else
1578 nmp->nm_maxfilesize = OFF_MAX;
1579
1580 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1581 nmp->nm_wsize = NFS_WSIZE;
1582 nmp->nm_rsize = NFS_RSIZE;
1583 nmp->nm_readdirsize = NFS_READDIRSIZE;
1584 }
1585 nmp->nm_numgrps = NFS_MAXGRPS;
1586 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1587 if (nmp->nm_tprintf_delay < 0)
1588 nmp->nm_tprintf_delay = 0;
1589 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1590 if (nmp->nm_tprintf_initial_delay < 0)
1591 nmp->nm_tprintf_initial_delay = 0;
1592 nmp->nm_fhsize = argp->fhsize;
1593 if (nmp->nm_fhsize > 0)
1594 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1595 strlcpy(mp->mnt_stat.f_mntfromname, hst, MNAMELEN);
1596 nmp->nm_nam = nam;
1597 /* Set up the sockets and per-host congestion */
1598 nmp->nm_sotype = argp->sotype;
1599 nmp->nm_soproto = argp->proto;
1600 nmp->nm_sockreq.nr_prog = NFS_PROG;
1601 if ((argp->flags & NFSMNT_NFSV4))
1602 nmp->nm_sockreq.nr_vers = NFS_VER4;
1603 else if ((argp->flags & NFSMNT_NFSV3))
1604 nmp->nm_sockreq.nr_vers = NFS_VER3;
1605 else
1606 nmp->nm_sockreq.nr_vers = NFS_VER2;
1607
1608 if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0, false,
1609 &nmp->nm_sockreq.nr_client)))
1610 goto bad;
1611 /* For NFSv4, get the clientid now. */
1612 if ((argp->flags & NFSMNT_NFSV4) != 0) {
1613 NFSCL_DEBUG(3, "at getcl\n");
1614 error = nfscl_getcl(mp, cred, td, tryminvers, true, &clp);
1615 NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1616 if (error != 0)
1617 goto bad;
1618 if (aconn > 0 && nmp->nm_minorvers == 0) {
1619 vfs_mount_error(mp, "nconnect should only be used "
1620 "for NFSv4.1/4.2 mounts");
1621 error = EINVAL;
1622 goto bad;
1623 }
1624 }
1625
1626 if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1627 nmp->nm_dirpathlen > 0) {
1628 NFSCL_DEBUG(3, "in dirp\n");
1629 /*
1630 * If the fhsize on the mount point == 0 for V4, the mount
1631 * path needs to be looked up.
1632 */
1633 trycnt = 3;
1634 do {
1635 error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1636 cred, td);
1637 NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1638 if (error)
1639 (void) nfs_catnap(PZERO, error, "nfsgetdirp");
1640 } while (error && --trycnt > 0);
1641 if (error)
1642 goto bad;
1643 }
1644
1645 /*
1646 * A reference count is needed on the nfsnode representing the
1647 * remote root. If this object is not persistent, then backward
1648 * traversals of the mount point (i.e. "..") will not work if
1649 * the nfsnode gets flushed out of the cache. Ufs does not have
1650 * this problem, because one can identify root inodes by their
1651 * number == UFS_ROOTINO (2).
1652 */
1653 if (nmp->nm_fhsize > 0) {
1654 /*
1655 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1656 * non-zero for the root vnode. f_iosize will be set correctly
1657 * by nfs_statfs() before any I/O occurs.
1658 */
1659 mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1660 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1661 LK_EXCLUSIVE);
1662 if (error)
1663 goto bad;
1664 *vpp = NFSTOV(np);
1665
1666 /*
1667 * Get file attributes and transfer parameters for the
1668 * mountpoint. This has the side effect of filling in
1669 * (*vpp)->v_type with the correct value.
1670 */
1671 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1672 cred, td, &nfsva, NULL, &lease);
1673 if (ret) {
1674 /*
1675 * Just set default values to get things going.
1676 */
1677 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1678 nfsva.na_vattr.va_type = VDIR;
1679 nfsva.na_vattr.va_mode = 0777;
1680 nfsva.na_vattr.va_nlink = 100;
1681 nfsva.na_vattr.va_uid = (uid_t)0;
1682 nfsva.na_vattr.va_gid = (gid_t)0;
1683 nfsva.na_vattr.va_fileid = 2;
1684 nfsva.na_vattr.va_gen = 1;
1685 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1686 nfsva.na_vattr.va_size = 512 * 1024;
1687 lease = 60;
1688 }
1689 (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1690 if ((argp->flags & NFSMNT_NFSV4) != 0) {
1691 NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1692 NFSLOCKCLSTATE();
1693 clp->nfsc_renew = NFSCL_RENEW(lease);
1694 clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1695 clp->nfsc_clientidrev++;
1696 if (clp->nfsc_clientidrev == 0)
1697 clp->nfsc_clientidrev++;
1698 NFSUNLOCKCLSTATE();
1699 /*
1700 * Mount will succeed, so the renew thread can be
1701 * started now.
1702 */
1703 nfscl_start_renewthread(clp);
1704 nfscl_clientrelease(clp);
1705 }
1706 if (argp->flags & NFSMNT_NFSV3)
1707 ncl_fsinfo(nmp, *vpp, cred, td);
1708
1709 /* Mark if the mount point supports NFSv4 ACLs. */
1710 if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1711 ret == 0 &&
1712 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1713 MNT_ILOCK(mp);
1714 mp->mnt_flag |= MNT_NFS4ACLS;
1715 MNT_IUNLOCK(mp);
1716 }
1717
1718 /* Can now allow additional connections. */
1719 if (aconn > 0)
1720 nmp->nm_aconnect = aconn;
1721
1722 /*
1723 * Lose the lock but keep the ref.
1724 */
1725 NFSVOPUNLOCK(*vpp);
1726 vfs_cache_root_set(mp, *vpp);
1727 return (0);
1728 }
1729 error = EIO;
1730
1731 bad:
1732 if (clp != NULL)
1733 nfscl_clientrelease(clp);
1734 newnfs_disconnect(NULL, &nmp->nm_sockreq);
1735 crfree(nmp->nm_sockreq.nr_cred);
1736 if (nmp->nm_sockreq.nr_auth != NULL)
1737 AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1738 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1739 mtx_destroy(&nmp->nm_mtx);
1740 if (nmp->nm_clp != NULL) {
1741 NFSLOCKCLSTATE();
1742 LIST_REMOVE(nmp->nm_clp, nfsc_list);
1743 NFSUNLOCKCLSTATE();
1744 free(nmp->nm_clp, M_NFSCLCLIENT);
1745 }
1746 TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1747 if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1748 dsp->nfsclds_sockp != NULL)
1749 newnfs_disconnect(NULL, dsp->nfsclds_sockp);
1750 nfscl_freenfsclds(dsp);
1751 }
1752 free(nmp->nm_tlscertname, M_NEWNFSMNT);
1753 free(nmp, M_NEWNFSMNT);
1754 free(nam, M_SONAME);
1755 return (error);
1756 }
1757
1758 /*
1759 * unmount system call
1760 */
1761 static int
1762 nfs_unmount(struct mount *mp, int mntflags)
1763 {
1764 struct thread *td;
1765 struct nfsmount *nmp;
1766 int error, flags = 0, i, trycnt = 0;
1767 struct nfsclds *dsp, *tdsp;
1768 struct nfscldeleg *dp, *ndp;
1769 struct nfscldeleghead dh;
1770
1771 td = curthread;
1772 TAILQ_INIT(&dh);
1773
1774 if (mntflags & MNT_FORCE)
1775 flags |= FORCECLOSE;
1776 nmp = VFSTONFS(mp);
1777 error = 0;
1778 /*
1779 * Goes something like this..
1780 * - Call vflush() to clear out vnodes for this filesystem
1781 * - Close the socket
1782 * - Free up the data structures
1783 */
1784 /* In the forced case, cancel any outstanding requests. */
1785 if (mntflags & MNT_FORCE) {
1786 NFSDDSLOCK();
1787 if (nfsv4_findmirror(nmp) != NULL)
1788 error = ENXIO;
1789 NFSDDSUNLOCK();
1790 if (error)
1791 goto out;
1792 error = newnfs_nmcancelreqs(nmp);
1793 if (error)
1794 goto out;
1795 /* For a forced close, get rid of the renew thread now */
1796 nfscl_umount(nmp, td, &dh);
1797 }
1798 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1799 do {
1800 error = vflush(mp, 1, flags, td);
1801 if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1802 (void) nfs_catnap(PSOCK, error, "newndm");
1803 } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1804 if (error)
1805 goto out;
1806
1807 /*
1808 * We are now committed to the unmount.
1809 */
1810 if ((mntflags & MNT_FORCE) == 0)
1811 nfscl_umount(nmp, td, NULL);
1812 else {
1813 mtx_lock(&nmp->nm_mtx);
1814 nmp->nm_privflag |= NFSMNTP_FORCEDISM;
1815 mtx_unlock(&nmp->nm_mtx);
1816 }
1817 /* Make sure no nfsiods are assigned to this mount. */
1818 NFSLOCKIOD();
1819 for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1820 if (ncl_iodmount[i] == nmp) {
1821 ncl_iodwant[i] = NFSIOD_AVAILABLE;
1822 ncl_iodmount[i] = NULL;
1823 }
1824 NFSUNLOCKIOD();
1825
1826 /*
1827 * We can now set mnt_data to NULL and wait for
1828 * nfssvc(NFSSVC_FORCEDISM) to complete.
1829 */
1830 mtx_lock(&mountlist_mtx);
1831 mtx_lock(&nmp->nm_mtx);
1832 mp->mnt_data = NULL;
1833 mtx_unlock(&mountlist_mtx);
1834 while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0)
1835 msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
1836 mtx_unlock(&nmp->nm_mtx);
1837
1838 newnfs_disconnect(nmp, &nmp->nm_sockreq);
1839 crfree(nmp->nm_sockreq.nr_cred);
1840 free(nmp->nm_nam, M_SONAME);
1841 if (nmp->nm_sockreq.nr_auth != NULL)
1842 AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1843 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1844 mtx_destroy(&nmp->nm_mtx);
1845 TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1846 if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1847 dsp->nfsclds_sockp != NULL)
1848 newnfs_disconnect(NULL, dsp->nfsclds_sockp);
1849 nfscl_freenfsclds(dsp);
1850 }
1851 free(nmp->nm_tlscertname, M_NEWNFSMNT);
1852 free(nmp, M_NEWNFSMNT);
1853
1854 /* Free up the delegation structures for forced dismounts. */
1855 TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
1856 TAILQ_REMOVE(&dh, dp, nfsdl_list);
1857 free(dp, M_NFSCLDELEG);
1858 }
1859 out:
1860 return (error);
1861 }
1862
1863 /*
1864 * Return root of a filesystem
1865 */
1866 static int
1867 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1868 {
1869 struct vnode *vp;
1870 struct nfsmount *nmp;
1871 struct nfsnode *np;
1872 int error;
1873
1874 nmp = VFSTONFS(mp);
1875 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1876 if (error)
1877 return error;
1878 vp = NFSTOV(np);
1879 /*
1880 * Get transfer parameters and attributes for root vnode once.
1881 */
1882 mtx_lock(&nmp->nm_mtx);
1883 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1884 mtx_unlock(&nmp->nm_mtx);
1885 ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1886 } else
1887 mtx_unlock(&nmp->nm_mtx);
1888 if (vp->v_type == VNON)
1889 vp->v_type = VDIR;
1890 vp->v_vflag |= VV_ROOT;
1891 *vpp = vp;
1892 return (0);
1893 }
1894
1895 /*
1896 * Flush out the buffer cache
1897 */
1898 /* ARGSUSED */
1899 static int
1900 nfs_sync(struct mount *mp, int waitfor)
1901 {
1902 struct vnode *vp, *mvp;
1903 struct thread *td;
1904 int error, allerror = 0;
1905
1906 td = curthread;
1907
1908 MNT_ILOCK(mp);
1909 /*
1910 * If a forced dismount is in progress, return from here so that
1911 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1912 * calling VFS_UNMOUNT().
1913 */
1914 if (NFSCL_FORCEDISM(mp)) {
1915 MNT_IUNLOCK(mp);
1916 return (EBADF);
1917 }
1918 MNT_IUNLOCK(mp);
1919
1920 /*
1921 * Force stale buffer cache information to be flushed.
1922 */
1923 loop:
1924 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1925 /* XXX Racy bv_cnt check. */
1926 if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1927 waitfor == MNT_LAZY) {
1928 VI_UNLOCK(vp);
1929 continue;
1930 }
1931 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
1932 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1933 goto loop;
1934 }
1935 error = VOP_FSYNC(vp, waitfor, td);
1936 if (error)
1937 allerror = error;
1938 NFSVOPUNLOCK(vp);
1939 vrele(vp);
1940 }
1941 return (allerror);
1942 }
1943
1944 static int
1945 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1946 {
1947 struct nfsmount *nmp = VFSTONFS(mp);
1948 struct vfsquery vq;
1949 int error;
1950
1951 bzero(&vq, sizeof(vq));
1952 switch (op) {
1953 #if 0
1954 case VFS_CTL_NOLOCKS:
1955 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1956 if (req->oldptr != NULL) {
1957 error = SYSCTL_OUT(req, &val, sizeof(val));
1958 if (error)
1959 return (error);
1960 }
1961 if (req->newptr != NULL) {
1962 error = SYSCTL_IN(req, &val, sizeof(val));
1963 if (error)
1964 return (error);
1965 if (val)
1966 nmp->nm_flag |= NFSMNT_NOLOCKS;
1967 else
1968 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1969 }
1970 break;
1971 #endif
1972 case VFS_CTL_QUERY:
1973 mtx_lock(&nmp->nm_mtx);
1974 if (nmp->nm_state & NFSSTA_TIMEO)
1975 vq.vq_flags |= VQ_NOTRESP;
1976 mtx_unlock(&nmp->nm_mtx);
1977 #if 0
1978 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1979 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1980 vq.vq_flags |= VQ_NOTRESPLOCK;
1981 #endif
1982 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1983 break;
1984 case VFS_CTL_TIMEO:
1985 if (req->oldptr != NULL) {
1986 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1987 sizeof(nmp->nm_tprintf_initial_delay));
1988 if (error)
1989 return (error);
1990 }
1991 if (req->newptr != NULL) {
1992 error = vfs_suser(mp, req->td);
1993 if (error)
1994 return (error);
1995 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1996 sizeof(nmp->nm_tprintf_initial_delay));
1997 if (error)
1998 return (error);
1999 if (nmp->nm_tprintf_initial_delay < 0)
2000 nmp->nm_tprintf_initial_delay = 0;
2001 }
2002 break;
2003 default:
2004 return (ENOTSUP);
2005 }
2006 return (0);
2007 }
2008
2009 /*
2010 * Purge any RPCs in progress, so that they will all return errors.
2011 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
2012 * forced dismount.
2013 */
2014 static void
2015 nfs_purge(struct mount *mp)
2016 {
2017 struct nfsmount *nmp = VFSTONFS(mp);
2018
2019 newnfs_nmcancelreqs(nmp);
2020 }
2021
2022 /*
2023 * Extract the information needed by the nlm from the nfs vnode.
2024 */
2025 static void
2026 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
2027 struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
2028 struct timeval *timeop)
2029 {
2030 struct nfsmount *nmp;
2031 struct nfsnode *np = VTONFS(vp);
2032
2033 nmp = VFSTONFS(vp->v_mount);
2034 if (fhlenp != NULL)
2035 *fhlenp = (size_t)np->n_fhp->nfh_len;
2036 if (fhp != NULL)
2037 bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
2038 if (sp != NULL)
2039 bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
2040 if (is_v3p != NULL)
2041 *is_v3p = NFS_ISV3(vp);
2042 if (sizep != NULL)
2043 *sizep = np->n_size;
2044 if (timeop != NULL) {
2045 timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
2046 timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
2047 }
2048 }
2049
2050 /*
2051 * This function prints out an option name, based on the conditional
2052 * argument.
2053 */
2054 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
2055 char *opt, char **buf, size_t *blen)
2056 {
2057 int len;
2058
2059 if (testval != 0 && *blen > strlen(opt)) {
2060 len = snprintf(*buf, *blen, "%s", opt);
2061 if (len != strlen(opt))
2062 printf("EEK!!\n");
2063 *buf += len;
2064 *blen -= len;
2065 }
2066 }
2067
2068 /*
2069 * This function printf out an options integer value.
2070 */
2071 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
2072 char *opt, char **buf, size_t *blen)
2073 {
2074 int len;
2075
2076 if (*blen > strlen(opt) + 1) {
2077 /* Could result in truncated output string. */
2078 len = snprintf(*buf, *blen, "%s=%d", opt, optval);
2079 if (len < *blen) {
2080 *buf += len;
2081 *blen -= len;
2082 }
2083 }
2084 }
2085
2086 /*
2087 * Load the option flags and values into the buffer.
2088 */
2089 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
2090 {
2091 char *buf;
2092 size_t blen;
2093
2094 buf = buffer;
2095 blen = buflen;
2096 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
2097 &blen);
2098 if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
2099 nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
2100 &blen);
2101 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
2102 &buf, &blen);
2103 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
2104 nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
2105 }
2106 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
2107 &blen);
2108 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
2109 "nfsv2", &buf, &blen);
2110 nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
2111 nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
2112 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
2113 &buf, &blen);
2114 nfscl_printopt(nmp, (nmp->nm_newflag & NFSMNT_TLS) != 0, ",tls", &buf,
2115 &blen);
2116 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
2117 &buf, &blen);
2118 nfscl_printoptval(nmp, nmp->nm_aconnect + 1, ",nconnect", &buf, &blen);
2119 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
2120 &blen);
2121 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
2122 &blen);
2123 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
2124 &blen);
2125 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
2126 &blen);
2127 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
2128 &blen);
2129 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
2130 ",noncontigwr", &buf, &blen);
2131 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2132 0, ",lockd", &buf, &blen);
2133 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOLOCKD) != 0, ",nolockd",
2134 &buf, &blen);
2135 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
2136 &buf, &blen);
2137 nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
2138 &buf, &blen);
2139 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2140 NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
2141 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2142 NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
2143 &buf, &blen);
2144 nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2145 NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
2146 &buf, &blen);
2147 nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
2148 nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
2149 nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2150 nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2151 nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2152 nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2153 &blen);
2154 nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2155 nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2156 nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2157 &blen);
2158 nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2159 nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2160 &blen);
2161 nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2162 nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2163 }
Cache object: 88377a4250c5d8ca8cd00a22eb2aae10
|