1 /*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: releng/9.1/sys/fs/nfsclient/nfs_clvfsops.c 235626 2012-05-18 19:48:38Z mckusick $");
37
38
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76
77 FEATURE(nfscl, "NFSv4 client");
78
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern struct nfsstats newnfsstats;
82 extern int nfsrv_useacl;
83
84 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
85 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
86
87 SYSCTL_DECL(_vfs_nfs);
88 static int nfs_ip_paranoia = 1;
89 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
90 &nfs_ip_paranoia, 0, "");
91 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
92 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
93 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
94 /* how long between console messages "nfs server foo not responding" */
95 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
96 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
97 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
98
99 static int nfs_mountroot(struct mount *);
100 static void nfs_sec_name(char *, int *);
101 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
102 struct nfs_args *argp, const char *, struct ucred *,
103 struct thread *);
104 static int mountnfs(struct nfs_args *, struct mount *,
105 struct sockaddr *, char *, u_char *, int, u_char *, int,
106 u_char *, int, struct vnode **, struct ucred *,
107 struct thread *, int, int);
108 static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
109 struct sockaddr_storage *, int *, off_t *,
110 struct timeval *);
111 static vfs_mount_t nfs_mount;
112 static vfs_cmount_t nfs_cmount;
113 static vfs_unmount_t nfs_unmount;
114 static vfs_root_t nfs_root;
115 static vfs_statfs_t nfs_statfs;
116 static vfs_sync_t nfs_sync;
117 static vfs_sysctl_t nfs_sysctl;
118
119 /*
120 * nfs vfs operations.
121 */
122 static struct vfsops nfs_vfsops = {
123 .vfs_init = ncl_init,
124 .vfs_mount = nfs_mount,
125 .vfs_cmount = nfs_cmount,
126 .vfs_root = nfs_root,
127 .vfs_statfs = nfs_statfs,
128 .vfs_sync = nfs_sync,
129 .vfs_uninit = ncl_uninit,
130 .vfs_unmount = nfs_unmount,
131 .vfs_sysctl = nfs_sysctl,
132 };
133 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
134
135 /* So that loader and kldload(2) can find us, wherever we are.. */
136 MODULE_VERSION(nfs, 1);
137 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
138 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
139 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
140 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
141
142 /*
143 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
144 * can be shared by both NFS clients. It is declared here so that it
145 * will be defined for kernels built without NFS_ROOT, although it
146 * isn't used in that case.
147 */
148 #if !defined(NFS_ROOT) && !defined(NFSCLIENT)
149 struct nfs_diskless nfs_diskless = { { { 0 } } };
150 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
151 int nfs_diskless_valid = 0;
152 #endif
153
154 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
155 &nfs_diskless_valid, 0,
156 "Has the diskless struct been filled correctly");
157
158 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
159 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
160
161 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
162 &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
163 "%Ssockaddr_in", "Diskless root nfs address");
164
165
166 void newnfsargs_ntoh(struct nfs_args *);
167 static int nfs_mountdiskless(char *,
168 struct sockaddr_in *, struct nfs_args *,
169 struct thread *, struct vnode **, struct mount *);
170 static void nfs_convert_diskless(void);
171 static void nfs_convert_oargs(struct nfs_args *args,
172 struct onfs_args *oargs);
173
174 int
175 newnfs_iosize(struct nfsmount *nmp)
176 {
177 int iosize, maxio;
178
179 /* First, set the upper limit for iosize */
180 if (nmp->nm_flag & NFSMNT_NFSV4) {
181 maxio = NFS_MAXBSIZE;
182 } else if (nmp->nm_flag & NFSMNT_NFSV3) {
183 if (nmp->nm_sotype == SOCK_DGRAM)
184 maxio = NFS_MAXDGRAMDATA;
185 else
186 maxio = NFS_MAXBSIZE;
187 } else {
188 maxio = NFS_V2MAXDATA;
189 }
190 if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
191 nmp->nm_rsize = maxio;
192 if (nmp->nm_rsize > MAXBSIZE)
193 nmp->nm_rsize = MAXBSIZE;
194 if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
195 nmp->nm_readdirsize = maxio;
196 if (nmp->nm_readdirsize > nmp->nm_rsize)
197 nmp->nm_readdirsize = nmp->nm_rsize;
198 if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
199 nmp->nm_wsize = maxio;
200 if (nmp->nm_wsize > MAXBSIZE)
201 nmp->nm_wsize = MAXBSIZE;
202
203 /*
204 * Calculate the size used for io buffers. Use the larger
205 * of the two sizes to minimise nfs requests but make sure
206 * that it is at least one VM page to avoid wasting buffer
207 * space.
208 */
209 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
210 iosize = imax(iosize, PAGE_SIZE);
211 nmp->nm_mountp->mnt_stat.f_iosize = iosize;
212 return (iosize);
213 }
214
215 static void
216 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
217 {
218
219 args->version = NFS_ARGSVERSION;
220 args->addr = oargs->addr;
221 args->addrlen = oargs->addrlen;
222 args->sotype = oargs->sotype;
223 args->proto = oargs->proto;
224 args->fh = oargs->fh;
225 args->fhsize = oargs->fhsize;
226 args->flags = oargs->flags;
227 args->wsize = oargs->wsize;
228 args->rsize = oargs->rsize;
229 args->readdirsize = oargs->readdirsize;
230 args->timeo = oargs->timeo;
231 args->retrans = oargs->retrans;
232 args->readahead = oargs->readahead;
233 args->hostname = oargs->hostname;
234 }
235
236 static void
237 nfs_convert_diskless(void)
238 {
239
240 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
241 sizeof(struct ifaliasreq));
242 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
243 sizeof(struct sockaddr_in));
244 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
245 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
246 nfsv3_diskless.root_fhsize = NFSX_MYFH;
247 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
248 } else {
249 nfsv3_diskless.root_fhsize = NFSX_V2FH;
250 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
251 }
252 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
253 sizeof(struct sockaddr_in));
254 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
255 nfsv3_diskless.root_time = nfs_diskless.root_time;
256 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
257 MAXHOSTNAMELEN);
258 nfs_diskless_valid = 3;
259 }
260
261 /*
262 * nfs statfs call
263 */
264 static int
265 nfs_statfs(struct mount *mp, struct statfs *sbp)
266 {
267 struct vnode *vp;
268 struct thread *td;
269 struct nfsmount *nmp = VFSTONFS(mp);
270 struct nfsvattr nfsva;
271 struct nfsfsinfo fs;
272 struct nfsstatfs sb;
273 int error = 0, attrflag, gotfsinfo = 0, ret;
274 struct nfsnode *np;
275
276 td = curthread;
277
278 error = vfs_busy(mp, MBF_NOWAIT);
279 if (error)
280 return (error);
281 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
282 if (error) {
283 vfs_unbusy(mp);
284 return (error);
285 }
286 vp = NFSTOV(np);
287 mtx_lock(&nmp->nm_mtx);
288 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
289 mtx_unlock(&nmp->nm_mtx);
290 error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
291 &attrflag, NULL);
292 if (!error)
293 gotfsinfo = 1;
294 } else
295 mtx_unlock(&nmp->nm_mtx);
296 if (!error)
297 error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
298 &attrflag, NULL);
299 if (attrflag == 0) {
300 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
301 td->td_ucred, td, &nfsva, NULL);
302 if (ret) {
303 /*
304 * Just set default values to get things going.
305 */
306 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
307 nfsva.na_vattr.va_type = VDIR;
308 nfsva.na_vattr.va_mode = 0777;
309 nfsva.na_vattr.va_nlink = 100;
310 nfsva.na_vattr.va_uid = (uid_t)0;
311 nfsva.na_vattr.va_gid = (gid_t)0;
312 nfsva.na_vattr.va_fileid = 2;
313 nfsva.na_vattr.va_gen = 1;
314 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
315 nfsva.na_vattr.va_size = 512 * 1024;
316 }
317 }
318 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
319 if (!error) {
320 mtx_lock(&nmp->nm_mtx);
321 if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
322 nfscl_loadfsinfo(nmp, &fs);
323 nfscl_loadsbinfo(nmp, &sb, sbp);
324 sbp->f_iosize = newnfs_iosize(nmp);
325 mtx_unlock(&nmp->nm_mtx);
326 if (sbp != &mp->mnt_stat) {
327 bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
328 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
329 }
330 strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
331 } else if (NFS_ISV4(vp)) {
332 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
333 }
334 vput(vp);
335 vfs_unbusy(mp);
336 return (error);
337 }
338
339 /*
340 * nfs version 3 fsinfo rpc call
341 */
342 int
343 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
344 struct thread *td)
345 {
346 struct nfsfsinfo fs;
347 struct nfsvattr nfsva;
348 int error, attrflag;
349
350 error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
351 if (!error) {
352 if (attrflag)
353 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
354 1);
355 mtx_lock(&nmp->nm_mtx);
356 nfscl_loadfsinfo(nmp, &fs);
357 mtx_unlock(&nmp->nm_mtx);
358 }
359 return (error);
360 }
361
362 /*
363 * Mount a remote root fs via. nfs. This depends on the info in the
364 * nfs_diskless structure that has been filled in properly by some primary
365 * bootstrap.
366 * It goes something like this:
367 * - do enough of "ifconfig" by calling ifioctl() so that the system
368 * can talk to the server
369 * - If nfs_diskless.mygateway is filled in, use that address as
370 * a default gateway.
371 * - build the rootfs mount point and call mountnfs() to do the rest.
372 *
373 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
374 * structure, as well as other global NFS client variables here, as
375 * nfs_mountroot() will be called once in the boot before any other NFS
376 * client activity occurs.
377 */
378 static int
379 nfs_mountroot(struct mount *mp)
380 {
381 struct thread *td = curthread;
382 struct nfsv3_diskless *nd = &nfsv3_diskless;
383 struct socket *so;
384 struct vnode *vp;
385 struct ifreq ir;
386 int error;
387 u_long l;
388 char buf[128];
389 char *cp;
390
391 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
392 bootpc_init(); /* use bootp to get nfs_diskless filled in */
393 #elif defined(NFS_ROOT)
394 nfs_setup_diskless();
395 #endif
396
397 if (nfs_diskless_valid == 0)
398 return (-1);
399 if (nfs_diskless_valid == 1)
400 nfs_convert_diskless();
401
402 /*
403 * XXX splnet, so networks will receive...
404 */
405 splnet();
406
407 /*
408 * Do enough of ifconfig(8) so that the critical net interface can
409 * talk to the server.
410 */
411 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
412 td->td_ucred, td);
413 if (error)
414 panic("nfs_mountroot: socreate(%04x): %d",
415 nd->myif.ifra_addr.sa_family, error);
416
417 #if 0 /* XXX Bad idea */
418 /*
419 * We might not have been told the right interface, so we pass
420 * over the first ten interfaces of the same kind, until we get
421 * one of them configured.
422 */
423
424 for (i = strlen(nd->myif.ifra_name) - 1;
425 nd->myif.ifra_name[i] >= '' &&
426 nd->myif.ifra_name[i] <= '9';
427 nd->myif.ifra_name[i] ++) {
428 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
429 if(!error)
430 break;
431 }
432 #endif
433 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
434 if (error)
435 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
436 if ((cp = getenv("boot.netif.mtu")) != NULL) {
437 ir.ifr_mtu = strtol(cp, NULL, 10);
438 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
439 freeenv(cp);
440 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
441 if (error)
442 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
443 }
444 soclose(so);
445
446 /*
447 * If the gateway field is filled in, set it as the default route.
448 * Note that pxeboot will set a default route of 0 if the route
449 * is not set by the DHCP server. Check also for a value of 0
450 * to avoid panicking inappropriately in that situation.
451 */
452 if (nd->mygateway.sin_len != 0 &&
453 nd->mygateway.sin_addr.s_addr != 0) {
454 struct sockaddr_in mask, sin;
455
456 bzero((caddr_t)&mask, sizeof(mask));
457 sin = mask;
458 sin.sin_family = AF_INET;
459 sin.sin_len = sizeof(sin);
460 /* XXX MRT use table 0 for this sort of thing */
461 CURVNET_SET(TD_TO_VNET(td));
462 error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
463 (struct sockaddr *)&nd->mygateway,
464 (struct sockaddr *)&mask,
465 RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
466 CURVNET_RESTORE();
467 if (error)
468 panic("nfs_mountroot: RTM_ADD: %d", error);
469 }
470
471 /*
472 * Create the rootfs mount point.
473 */
474 nd->root_args.fh = nd->root_fh;
475 nd->root_args.fhsize = nd->root_fhsize;
476 l = ntohl(nd->root_saddr.sin_addr.s_addr);
477 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
478 (l >> 24) & 0xff, (l >> 16) & 0xff,
479 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
480 printf("NFS ROOT: %s\n", buf);
481 nd->root_args.hostname = buf;
482 if ((error = nfs_mountdiskless(buf,
483 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
484 return (error);
485 }
486
487 /*
488 * This is not really an nfs issue, but it is much easier to
489 * set hostname here and then let the "/etc/rc.xxx" files
490 * mount the right /var based upon its preset value.
491 */
492 mtx_lock(&prison0.pr_mtx);
493 strlcpy(prison0.pr_hostname, nd->my_hostnam,
494 sizeof(prison0.pr_hostname));
495 mtx_unlock(&prison0.pr_mtx);
496 inittodr(ntohl(nd->root_time));
497 return (0);
498 }
499
500 /*
501 * Internal version of mount system call for diskless setup.
502 */
503 static int
504 nfs_mountdiskless(char *path,
505 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
506 struct vnode **vpp, struct mount *mp)
507 {
508 struct sockaddr *nam;
509 int dirlen, error;
510 char *dirpath;
511
512 /*
513 * Find the directory path in "path", which also has the server's
514 * name/ip address in it.
515 */
516 dirpath = strchr(path, ':');
517 if (dirpath != NULL)
518 dirlen = strlen(++dirpath);
519 else
520 dirlen = 0;
521 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
522 if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
523 NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
524 NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
525 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
526 return (error);
527 }
528 return (0);
529 }
530
531 static void
532 nfs_sec_name(char *sec, int *flagsp)
533 {
534 if (!strcmp(sec, "krb5"))
535 *flagsp |= NFSMNT_KERB;
536 else if (!strcmp(sec, "krb5i"))
537 *flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
538 else if (!strcmp(sec, "krb5p"))
539 *flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
540 }
541
542 static void
543 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
544 const char *hostname, struct ucred *cred, struct thread *td)
545 {
546 int s;
547 int adjsock;
548 char *p;
549
550 s = splnet();
551
552 /*
553 * Set read-only flag if requested; otherwise, clear it if this is
554 * an update. If this is not an update, then either the read-only
555 * flag is already clear, or this is a root mount and it was set
556 * intentionally at some previous point.
557 */
558 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
559 MNT_ILOCK(mp);
560 mp->mnt_flag |= MNT_RDONLY;
561 MNT_IUNLOCK(mp);
562 } else if (mp->mnt_flag & MNT_UPDATE) {
563 MNT_ILOCK(mp);
564 mp->mnt_flag &= ~MNT_RDONLY;
565 MNT_IUNLOCK(mp);
566 }
567
568 /*
569 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
570 * no sense in that context. Also, set up appropriate retransmit
571 * and soft timeout behavior.
572 */
573 if (argp->sotype == SOCK_STREAM) {
574 nmp->nm_flag &= ~NFSMNT_NOCONN;
575 nmp->nm_timeo = NFS_MAXTIMEO;
576 if ((argp->flags & NFSMNT_NFSV4) != 0)
577 nmp->nm_retry = INT_MAX;
578 else
579 nmp->nm_retry = NFS_RETRANS_TCP;
580 }
581
582 /* Also clear RDIRPLUS if NFSv2, it crashes some servers */
583 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
584 argp->flags &= ~NFSMNT_RDIRPLUS;
585 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
586 }
587
588 /* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */
589 if ((argp->flags & NFSMNT_NFSV4) != 0) {
590 argp->flags &= ~NFSMNT_RESVPORT;
591 nmp->nm_flag &= ~NFSMNT_RESVPORT;
592 }
593
594 /* Re-bind if rsrvd port requested and wasn't on one */
595 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
596 && (argp->flags & NFSMNT_RESVPORT);
597 /* Also re-bind if we're switching to/from a connected UDP socket */
598 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
599 (argp->flags & NFSMNT_NOCONN));
600
601 /* Update flags atomically. Don't change the lock bits. */
602 nmp->nm_flag = argp->flags | nmp->nm_flag;
603 splx(s);
604
605 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
606 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
607 if (nmp->nm_timeo < NFS_MINTIMEO)
608 nmp->nm_timeo = NFS_MINTIMEO;
609 else if (nmp->nm_timeo > NFS_MAXTIMEO)
610 nmp->nm_timeo = NFS_MAXTIMEO;
611 }
612
613 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
614 nmp->nm_retry = argp->retrans;
615 if (nmp->nm_retry > NFS_MAXREXMIT)
616 nmp->nm_retry = NFS_MAXREXMIT;
617 }
618
619 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
620 nmp->nm_wsize = argp->wsize;
621 /* Round down to multiple of blocksize */
622 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
623 if (nmp->nm_wsize <= 0)
624 nmp->nm_wsize = NFS_FABLKSIZE;
625 }
626
627 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
628 nmp->nm_rsize = argp->rsize;
629 /* Round down to multiple of blocksize */
630 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
631 if (nmp->nm_rsize <= 0)
632 nmp->nm_rsize = NFS_FABLKSIZE;
633 }
634
635 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
636 nmp->nm_readdirsize = argp->readdirsize;
637 }
638
639 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
640 nmp->nm_acregmin = argp->acregmin;
641 else
642 nmp->nm_acregmin = NFS_MINATTRTIMO;
643 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
644 nmp->nm_acregmax = argp->acregmax;
645 else
646 nmp->nm_acregmax = NFS_MAXATTRTIMO;
647 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
648 nmp->nm_acdirmin = argp->acdirmin;
649 else
650 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
651 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
652 nmp->nm_acdirmax = argp->acdirmax;
653 else
654 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
655 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
656 nmp->nm_acdirmin = nmp->nm_acdirmax;
657 if (nmp->nm_acregmin > nmp->nm_acregmax)
658 nmp->nm_acregmin = nmp->nm_acregmax;
659
660 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
661 if (argp->readahead <= NFS_MAXRAHEAD)
662 nmp->nm_readahead = argp->readahead;
663 else
664 nmp->nm_readahead = NFS_MAXRAHEAD;
665 }
666 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
667 if (argp->wcommitsize < nmp->nm_wsize)
668 nmp->nm_wcommitsize = nmp->nm_wsize;
669 else
670 nmp->nm_wcommitsize = argp->wcommitsize;
671 }
672
673 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
674 (nmp->nm_soproto != argp->proto));
675
676 if (nmp->nm_client != NULL && adjsock) {
677 int haslock = 0, error = 0;
678
679 if (nmp->nm_sotype == SOCK_STREAM) {
680 error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
681 if (!error)
682 haslock = 1;
683 }
684 if (!error) {
685 newnfs_disconnect(&nmp->nm_sockreq);
686 if (haslock)
687 newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
688 nmp->nm_sotype = argp->sotype;
689 nmp->nm_soproto = argp->proto;
690 if (nmp->nm_sotype == SOCK_DGRAM)
691 while (newnfs_connect(nmp, &nmp->nm_sockreq,
692 cred, td, 0)) {
693 printf("newnfs_args: retrying connect\n");
694 (void) nfs_catnap(PSOCK, 0, "newnfscon");
695 }
696 }
697 } else {
698 nmp->nm_sotype = argp->sotype;
699 nmp->nm_soproto = argp->proto;
700 }
701
702 if (hostname != NULL) {
703 strlcpy(nmp->nm_hostname, hostname,
704 sizeof(nmp->nm_hostname));
705 p = strchr(nmp->nm_hostname, ':');
706 if (p != NULL)
707 *p = '\0';
708 }
709 }
710
711 static const char *nfs_opts[] = { "from", "nfs_args",
712 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
713 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
714 "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
715 "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
716 "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
717 "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
718 "principal", "nfsv4", "gssname", "allgssname", "dirpath",
719 "nametimeo", "negnametimeo", "nocto", "wcommitsize",
720 NULL };
721
722 /*
723 * VFS Operations.
724 *
725 * mount system call
726 * It seems a bit dumb to copyinstr() the host and path here and then
727 * bcopy() them in mountnfs(), but I wanted to detect errors before
728 * doing the sockargs() call because sockargs() allocates an mbuf and
729 * an error after that means that I have to release the mbuf.
730 */
731 /* ARGSUSED */
732 static int
733 nfs_mount(struct mount *mp)
734 {
735 struct nfs_args args = {
736 .version = NFS_ARGSVERSION,
737 .addr = NULL,
738 .addrlen = sizeof (struct sockaddr_in),
739 .sotype = SOCK_STREAM,
740 .proto = 0,
741 .fh = NULL,
742 .fhsize = 0,
743 .flags = NFSMNT_RESVPORT,
744 .wsize = NFS_WSIZE,
745 .rsize = NFS_RSIZE,
746 .readdirsize = NFS_READDIRSIZE,
747 .timeo = 10,
748 .retrans = NFS_RETRANS,
749 .readahead = NFS_DEFRAHEAD,
750 .wcommitsize = 0, /* was: NQ_DEFLEASE */
751 .hostname = NULL,
752 .acregmin = NFS_MINATTRTIMO,
753 .acregmax = NFS_MAXATTRTIMO,
754 .acdirmin = NFS_MINDIRATTRTIMO,
755 .acdirmax = NFS_MAXDIRATTRTIMO,
756 };
757 int error = 0, ret, len;
758 struct sockaddr *nam = NULL;
759 struct vnode *vp;
760 struct thread *td;
761 char hst[MNAMELEN];
762 u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
763 char *opt, *name, *secname;
764 int nametimeo = NFS_DEFAULT_NAMETIMEO;
765 int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
766 int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
767 size_t hstlen;
768
769 has_nfs_args_opt = 0;
770 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
771 error = EINVAL;
772 goto out;
773 }
774
775 td = curthread;
776 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
777 error = nfs_mountroot(mp);
778 goto out;
779 }
780
781 nfscl_init();
782
783 /*
784 * The old mount_nfs program passed the struct nfs_args
785 * from userspace to kernel. The new mount_nfs program
786 * passes string options via nmount() from userspace to kernel
787 * and we populate the struct nfs_args in the kernel.
788 */
789 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
790 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
791 sizeof(args));
792 if (error != 0)
793 goto out;
794
795 if (args.version != NFS_ARGSVERSION) {
796 error = EPROGMISMATCH;
797 goto out;
798 }
799 has_nfs_args_opt = 1;
800 }
801
802 /* Handle the new style options. */
803 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
804 args.flags |= NFSMNT_NOCONN;
805 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
806 args.flags |= NFSMNT_NOCONN;
807 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
808 args.flags |= NFSMNT_NOLOCKD;
809 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
810 args.flags &= ~NFSMNT_NOLOCKD;
811 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
812 args.flags |= NFSMNT_INT;
813 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
814 args.flags |= NFSMNT_RDIRPLUS;
815 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
816 args.flags |= NFSMNT_RESVPORT;
817 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
818 args.flags &= ~NFSMNT_RESVPORT;
819 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
820 args.flags |= NFSMNT_SOFT;
821 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
822 args.flags &= ~NFSMNT_SOFT;
823 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
824 args.sotype = SOCK_DGRAM;
825 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
826 args.sotype = SOCK_DGRAM;
827 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
828 args.sotype = SOCK_STREAM;
829 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
830 args.flags |= NFSMNT_NFSV3;
831 if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
832 args.flags |= NFSMNT_NFSV4;
833 args.sotype = SOCK_STREAM;
834 }
835 if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
836 args.flags |= NFSMNT_ALLGSSNAME;
837 if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
838 args.flags |= NFSMNT_NOCTO;
839 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
840 if (opt == NULL) {
841 vfs_mount_error(mp, "illegal readdirsize");
842 error = EINVAL;
843 goto out;
844 }
845 ret = sscanf(opt, "%d", &args.readdirsize);
846 if (ret != 1 || args.readdirsize <= 0) {
847 vfs_mount_error(mp, "illegal readdirsize: %s",
848 opt);
849 error = EINVAL;
850 goto out;
851 }
852 args.flags |= NFSMNT_READDIRSIZE;
853 }
854 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
855 if (opt == NULL) {
856 vfs_mount_error(mp, "illegal readahead");
857 error = EINVAL;
858 goto out;
859 }
860 ret = sscanf(opt, "%d", &args.readahead);
861 if (ret != 1 || args.readahead <= 0) {
862 vfs_mount_error(mp, "illegal readahead: %s",
863 opt);
864 error = EINVAL;
865 goto out;
866 }
867 args.flags |= NFSMNT_READAHEAD;
868 }
869 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
870 if (opt == NULL) {
871 vfs_mount_error(mp, "illegal wsize");
872 error = EINVAL;
873 goto out;
874 }
875 ret = sscanf(opt, "%d", &args.wsize);
876 if (ret != 1 || args.wsize <= 0) {
877 vfs_mount_error(mp, "illegal wsize: %s",
878 opt);
879 error = EINVAL;
880 goto out;
881 }
882 args.flags |= NFSMNT_WSIZE;
883 }
884 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
885 if (opt == NULL) {
886 vfs_mount_error(mp, "illegal rsize");
887 error = EINVAL;
888 goto out;
889 }
890 ret = sscanf(opt, "%d", &args.rsize);
891 if (ret != 1 || args.rsize <= 0) {
892 vfs_mount_error(mp, "illegal wsize: %s",
893 opt);
894 error = EINVAL;
895 goto out;
896 }
897 args.flags |= NFSMNT_RSIZE;
898 }
899 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
900 if (opt == NULL) {
901 vfs_mount_error(mp, "illegal retrans");
902 error = EINVAL;
903 goto out;
904 }
905 ret = sscanf(opt, "%d", &args.retrans);
906 if (ret != 1 || args.retrans <= 0) {
907 vfs_mount_error(mp, "illegal retrans: %s",
908 opt);
909 error = EINVAL;
910 goto out;
911 }
912 args.flags |= NFSMNT_RETRANS;
913 }
914 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
915 ret = sscanf(opt, "%d", &args.acregmin);
916 if (ret != 1 || args.acregmin < 0) {
917 vfs_mount_error(mp, "illegal acregmin: %s",
918 opt);
919 error = EINVAL;
920 goto out;
921 }
922 args.flags |= NFSMNT_ACREGMIN;
923 }
924 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
925 ret = sscanf(opt, "%d", &args.acregmax);
926 if (ret != 1 || args.acregmax < 0) {
927 vfs_mount_error(mp, "illegal acregmax: %s",
928 opt);
929 error = EINVAL;
930 goto out;
931 }
932 args.flags |= NFSMNT_ACREGMAX;
933 }
934 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
935 ret = sscanf(opt, "%d", &args.acdirmin);
936 if (ret != 1 || args.acdirmin < 0) {
937 vfs_mount_error(mp, "illegal acdirmin: %s",
938 opt);
939 error = EINVAL;
940 goto out;
941 }
942 args.flags |= NFSMNT_ACDIRMIN;
943 }
944 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
945 ret = sscanf(opt, "%d", &args.acdirmax);
946 if (ret != 1 || args.acdirmax < 0) {
947 vfs_mount_error(mp, "illegal acdirmax: %s",
948 opt);
949 error = EINVAL;
950 goto out;
951 }
952 args.flags |= NFSMNT_ACDIRMAX;
953 }
954 if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
955 ret = sscanf(opt, "%d", &args.wcommitsize);
956 if (ret != 1 || args.wcommitsize < 0) {
957 vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
958 error = EINVAL;
959 goto out;
960 }
961 args.flags |= NFSMNT_WCOMMITSIZE;
962 }
963 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
964 ret = sscanf(opt, "%d", &args.timeo);
965 if (ret != 1 || args.timeo <= 0) {
966 vfs_mount_error(mp, "illegal timeout: %s",
967 opt);
968 error = EINVAL;
969 goto out;
970 }
971 args.flags |= NFSMNT_TIMEO;
972 }
973 if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
974 ret = sscanf(opt, "%d", &nametimeo);
975 if (ret != 1 || nametimeo < 0) {
976 vfs_mount_error(mp, "illegal nametimeo: %s", opt);
977 error = EINVAL;
978 goto out;
979 }
980 }
981 if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
982 == 0) {
983 ret = sscanf(opt, "%d", &negnametimeo);
984 if (ret != 1 || negnametimeo < 0) {
985 vfs_mount_error(mp, "illegal negnametimeo: %s",
986 opt);
987 error = EINVAL;
988 goto out;
989 }
990 }
991 if (vfs_getopt(mp->mnt_optnew, "sec",
992 (void **) &secname, NULL) == 0)
993 nfs_sec_name(secname, &args.flags);
994
995 if (mp->mnt_flag & MNT_UPDATE) {
996 struct nfsmount *nmp = VFSTONFS(mp);
997
998 if (nmp == NULL) {
999 error = EIO;
1000 goto out;
1001 }
1002
1003 /*
1004 * If a change from TCP->UDP is done and there are thread(s)
1005 * that have I/O RPC(s) in progress with a tranfer size
1006 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1007 * hung, retrying the RPC(s) forever. Usually these threads
1008 * will be seen doing an uninterruptible sleep on wait channel
1009 * "newnfsreq" (truncated to "newnfsre" by procstat).
1010 */
1011 if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1012 tprintf(td->td_proc, LOG_WARNING,
1013 "Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1014
1015 /*
1016 * When doing an update, we can't change version,
1017 * security, switch lockd strategies or change cookie
1018 * translation
1019 */
1020 args.flags = (args.flags &
1021 ~(NFSMNT_NFSV3 |
1022 NFSMNT_NFSV4 |
1023 NFSMNT_KERB |
1024 NFSMNT_INTEGRITY |
1025 NFSMNT_PRIVACY |
1026 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1027 (nmp->nm_flag &
1028 (NFSMNT_NFSV3 |
1029 NFSMNT_NFSV4 |
1030 NFSMNT_KERB |
1031 NFSMNT_INTEGRITY |
1032 NFSMNT_PRIVACY |
1033 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1034 nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1035 goto out;
1036 }
1037
1038 /*
1039 * Make the nfs_ip_paranoia sysctl serve as the default connection
1040 * or no-connection mode for those protocols that support
1041 * no-connection mode (the flag will be cleared later for protocols
1042 * that do not support no-connection mode). This will allow a client
1043 * to receive replies from a different IP then the request was
1044 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1045 * not 0.
1046 */
1047 if (nfs_ip_paranoia == 0)
1048 args.flags |= NFSMNT_NOCONN;
1049
1050 if (has_nfs_args_opt != 0) {
1051 /*
1052 * In the 'nfs_args' case, the pointers in the args
1053 * structure are in userland - we copy them in here.
1054 */
1055 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1056 vfs_mount_error(mp, "Bad file handle");
1057 error = EINVAL;
1058 goto out;
1059 }
1060 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1061 args.fhsize);
1062 if (error != 0)
1063 goto out;
1064 error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1065 if (error != 0)
1066 goto out;
1067 bzero(&hst[hstlen], MNAMELEN - hstlen);
1068 args.hostname = hst;
1069 /* sockargs() call must be after above copyin() calls */
1070 error = getsockaddr(&nam, (caddr_t)args.addr,
1071 args.addrlen);
1072 if (error != 0)
1073 goto out;
1074 } else {
1075 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1076 &args.fhsize) == 0) {
1077 if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1078 vfs_mount_error(mp, "Bad file handle");
1079 error = EINVAL;
1080 goto out;
1081 }
1082 bcopy(args.fh, nfh, args.fhsize);
1083 } else {
1084 args.fhsize = 0;
1085 }
1086 (void) vfs_getopt(mp->mnt_optnew, "hostname",
1087 (void **)&args.hostname, &len);
1088 if (args.hostname == NULL) {
1089 vfs_mount_error(mp, "Invalid hostname");
1090 error = EINVAL;
1091 goto out;
1092 }
1093 bcopy(args.hostname, hst, MNAMELEN);
1094 hst[MNAMELEN - 1] = '\0';
1095 }
1096
1097 if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1098 strlcpy(srvkrbname, name, sizeof (srvkrbname));
1099 else
1100 snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1101 srvkrbnamelen = strlen(srvkrbname);
1102
1103 if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1104 strlcpy(krbname, name, sizeof (krbname));
1105 else
1106 krbname[0] = '\0';
1107 krbnamelen = strlen(krbname);
1108
1109 if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1110 strlcpy(dirpath, name, sizeof (dirpath));
1111 else
1112 dirpath[0] = '\0';
1113 dirlen = strlen(dirpath);
1114
1115 if (has_nfs_args_opt == 0) {
1116 if (vfs_getopt(mp->mnt_optnew, "addr",
1117 (void **)&args.addr, &args.addrlen) == 0) {
1118 if (args.addrlen > SOCK_MAXADDRLEN) {
1119 error = ENAMETOOLONG;
1120 goto out;
1121 }
1122 nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1123 bcopy(args.addr, nam, args.addrlen);
1124 nam->sa_len = args.addrlen;
1125 } else {
1126 vfs_mount_error(mp, "No server address");
1127 error = EINVAL;
1128 goto out;
1129 }
1130 }
1131
1132 args.fh = nfh;
1133 error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1134 dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1135 nametimeo, negnametimeo);
1136 out:
1137 if (!error) {
1138 MNT_ILOCK(mp);
1139 mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1140 MNT_IUNLOCK(mp);
1141 }
1142 return (error);
1143 }
1144
1145
1146 /*
1147 * VFS Operations.
1148 *
1149 * mount system call
1150 * It seems a bit dumb to copyinstr() the host and path here and then
1151 * bcopy() them in mountnfs(), but I wanted to detect errors before
1152 * doing the sockargs() call because sockargs() allocates an mbuf and
1153 * an error after that means that I have to release the mbuf.
1154 */
1155 /* ARGSUSED */
1156 static int
1157 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1158 {
1159 int error;
1160 struct nfs_args args;
1161
1162 error = copyin(data, &args, sizeof (struct nfs_args));
1163 if (error)
1164 return error;
1165
1166 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1167
1168 error = kernel_mount(ma, flags);
1169 return (error);
1170 }
1171
1172 /*
1173 * Common code for mount and mountroot
1174 */
1175 static int
1176 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1177 char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1178 u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1179 struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo)
1180 {
1181 struct nfsmount *nmp;
1182 struct nfsnode *np;
1183 int error, trycnt, ret;
1184 struct nfsvattr nfsva;
1185 static u_int64_t clval = 0;
1186
1187 if (mp->mnt_flag & MNT_UPDATE) {
1188 nmp = VFSTONFS(mp);
1189 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1190 FREE(nam, M_SONAME);
1191 return (0);
1192 } else {
1193 MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1194 krbnamelen + dirlen + srvkrbnamelen + 2,
1195 M_NEWNFSMNT, M_WAITOK | M_ZERO);
1196 TAILQ_INIT(&nmp->nm_bufq);
1197 if (clval == 0)
1198 clval = (u_int64_t)nfsboottime.tv_sec;
1199 nmp->nm_clval = clval++;
1200 nmp->nm_krbnamelen = krbnamelen;
1201 nmp->nm_dirpathlen = dirlen;
1202 nmp->nm_srvkrbnamelen = srvkrbnamelen;
1203 if (td->td_ucred->cr_uid != (uid_t)0) {
1204 /*
1205 * nm_uid is used to get KerberosV credentials for
1206 * the nfsv4 state handling operations if there is
1207 * no host based principal set. Use the uid of
1208 * this user if not root, since they are doing the
1209 * mount. I don't think setting this for root will
1210 * work, since root normally does not have user
1211 * credentials in a credentials cache.
1212 */
1213 nmp->nm_uid = td->td_ucred->cr_uid;
1214 } else {
1215 /*
1216 * Just set to -1, so it won't be used.
1217 */
1218 nmp->nm_uid = (uid_t)-1;
1219 }
1220
1221 /* Copy and null terminate all the names */
1222 if (nmp->nm_krbnamelen > 0) {
1223 bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1224 nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1225 }
1226 if (nmp->nm_dirpathlen > 0) {
1227 bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1228 nmp->nm_dirpathlen);
1229 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1230 + 1] = '\0';
1231 }
1232 if (nmp->nm_srvkrbnamelen > 0) {
1233 bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1234 nmp->nm_srvkrbnamelen);
1235 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1236 + nmp->nm_srvkrbnamelen + 2] = '\0';
1237 }
1238 nmp->nm_sockreq.nr_cred = crhold(cred);
1239 mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1240 mp->mnt_data = nmp;
1241 nmp->nm_getinfo = nfs_getnlminfo;
1242 nmp->nm_vinvalbuf = ncl_vinvalbuf;
1243 }
1244 vfs_getnewfsid(mp);
1245 nmp->nm_mountp = mp;
1246 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1247
1248 /*
1249 * Since nfs_decode_args() might optionally set them, these
1250 * need to be set to defaults before the call, so that the
1251 * optional settings aren't overwritten.
1252 */
1253 nmp->nm_nametimeo = nametimeo;
1254 nmp->nm_negnametimeo = negnametimeo;
1255 nmp->nm_timeo = NFS_TIMEO;
1256 nmp->nm_retry = NFS_RETRANS;
1257 nmp->nm_readahead = NFS_DEFRAHEAD;
1258 if (desiredvnodes >= 11000)
1259 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1260 else
1261 nmp->nm_wcommitsize = hibufspace / 10;
1262
1263 nfs_decode_args(mp, nmp, argp, hst, cred, td);
1264
1265 /*
1266 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1267 * high, depending on whether we end up with negative offsets in
1268 * the client or server somewhere. 2GB-1 may be safer.
1269 *
1270 * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum
1271 * that we can handle until we find out otherwise.
1272 * XXX Our "safe" limit on the client is what we can store in our
1273 * buffer cache using signed(!) block numbers.
1274 */
1275 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1276 nmp->nm_maxfilesize = 0xffffffffLL;
1277 else
1278 nmp->nm_maxfilesize = OFF_MAX;
1279
1280 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1281 nmp->nm_wsize = NFS_WSIZE;
1282 nmp->nm_rsize = NFS_RSIZE;
1283 nmp->nm_readdirsize = NFS_READDIRSIZE;
1284 }
1285 nmp->nm_numgrps = NFS_MAXGRPS;
1286 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1287 if (nmp->nm_tprintf_delay < 0)
1288 nmp->nm_tprintf_delay = 0;
1289 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1290 if (nmp->nm_tprintf_initial_delay < 0)
1291 nmp->nm_tprintf_initial_delay = 0;
1292 nmp->nm_fhsize = argp->fhsize;
1293 if (nmp->nm_fhsize > 0)
1294 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1295 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1296 nmp->nm_nam = nam;
1297 /* Set up the sockets and per-host congestion */
1298 nmp->nm_sotype = argp->sotype;
1299 nmp->nm_soproto = argp->proto;
1300 nmp->nm_sockreq.nr_prog = NFS_PROG;
1301 if ((argp->flags & NFSMNT_NFSV4))
1302 nmp->nm_sockreq.nr_vers = NFS_VER4;
1303 else if ((argp->flags & NFSMNT_NFSV3))
1304 nmp->nm_sockreq.nr_vers = NFS_VER3;
1305 else
1306 nmp->nm_sockreq.nr_vers = NFS_VER2;
1307
1308
1309 if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1310 goto bad;
1311
1312 /*
1313 * A reference count is needed on the nfsnode representing the
1314 * remote root. If this object is not persistent, then backward
1315 * traversals of the mount point (i.e. "..") will not work if
1316 * the nfsnode gets flushed out of the cache. Ufs does not have
1317 * this problem, because one can identify root inodes by their
1318 * number == ROOTINO (2).
1319 */
1320 if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1321 nmp->nm_dirpathlen > 0) {
1322 /*
1323 * If the fhsize on the mount point == 0 for V4, the mount
1324 * path needs to be looked up.
1325 */
1326 trycnt = 3;
1327 do {
1328 error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1329 cred, td);
1330 if (error)
1331 (void) nfs_catnap(PZERO, error, "nfsgetdirp");
1332 } while (error && --trycnt > 0);
1333 if (error) {
1334 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1335 goto bad;
1336 }
1337 }
1338 if (nmp->nm_fhsize > 0) {
1339 /*
1340 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1341 * non-zero for the root vnode. f_iosize will be set correctly
1342 * by nfs_statfs() before any I/O occurs.
1343 */
1344 mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1345 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1346 LK_EXCLUSIVE);
1347 if (error)
1348 goto bad;
1349 *vpp = NFSTOV(np);
1350
1351 /*
1352 * Get file attributes and transfer parameters for the
1353 * mountpoint. This has the side effect of filling in
1354 * (*vpp)->v_type with the correct value.
1355 */
1356 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1357 cred, td, &nfsva, NULL);
1358 if (ret) {
1359 /*
1360 * Just set default values to get things going.
1361 */
1362 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1363 nfsva.na_vattr.va_type = VDIR;
1364 nfsva.na_vattr.va_mode = 0777;
1365 nfsva.na_vattr.va_nlink = 100;
1366 nfsva.na_vattr.va_uid = (uid_t)0;
1367 nfsva.na_vattr.va_gid = (gid_t)0;
1368 nfsva.na_vattr.va_fileid = 2;
1369 nfsva.na_vattr.va_gen = 1;
1370 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1371 nfsva.na_vattr.va_size = 512 * 1024;
1372 }
1373 (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1374 if (argp->flags & NFSMNT_NFSV3)
1375 ncl_fsinfo(nmp, *vpp, cred, td);
1376
1377 /* Mark if the mount point supports NFSv4 ACLs. */
1378 if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1379 ret == 0 &&
1380 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1381 MNT_ILOCK(mp);
1382 mp->mnt_flag |= MNT_NFS4ACLS;
1383 MNT_IUNLOCK(mp);
1384 }
1385
1386 /*
1387 * Lose the lock but keep the ref.
1388 */
1389 NFSVOPUNLOCK(*vpp, 0);
1390 return (0);
1391 }
1392 error = EIO;
1393
1394 bad:
1395 newnfs_disconnect(&nmp->nm_sockreq);
1396 crfree(nmp->nm_sockreq.nr_cred);
1397 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1398 mtx_destroy(&nmp->nm_mtx);
1399 FREE(nmp, M_NEWNFSMNT);
1400 FREE(nam, M_SONAME);
1401 return (error);
1402 }
1403
1404 /*
1405 * unmount system call
1406 */
1407 static int
1408 nfs_unmount(struct mount *mp, int mntflags)
1409 {
1410 struct thread *td;
1411 struct nfsmount *nmp;
1412 int error, flags = 0, trycnt = 0;
1413
1414 td = curthread;
1415
1416 if (mntflags & MNT_FORCE)
1417 flags |= FORCECLOSE;
1418 nmp = VFSTONFS(mp);
1419 /*
1420 * Goes something like this..
1421 * - Call vflush() to clear out vnodes for this filesystem
1422 * - Close the socket
1423 * - Free up the data structures
1424 */
1425 /* In the forced case, cancel any outstanding requests. */
1426 if (mntflags & MNT_FORCE) {
1427 error = newnfs_nmcancelreqs(nmp);
1428 if (error)
1429 goto out;
1430 /* For a forced close, get rid of the renew thread now */
1431 nfscl_umount(nmp, td);
1432 }
1433 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1434 do {
1435 error = vflush(mp, 1, flags, td);
1436 if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1437 (void) nfs_catnap(PSOCK, error, "newndm");
1438 } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1439 if (error)
1440 goto out;
1441
1442 /*
1443 * We are now committed to the unmount.
1444 */
1445 if ((mntflags & MNT_FORCE) == 0)
1446 nfscl_umount(nmp, td);
1447 newnfs_disconnect(&nmp->nm_sockreq);
1448 crfree(nmp->nm_sockreq.nr_cred);
1449 FREE(nmp->nm_nam, M_SONAME);
1450
1451 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1452 mtx_destroy(&nmp->nm_mtx);
1453 FREE(nmp, M_NEWNFSMNT);
1454 out:
1455 return (error);
1456 }
1457
1458 /*
1459 * Return root of a filesystem
1460 */
1461 static int
1462 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1463 {
1464 struct vnode *vp;
1465 struct nfsmount *nmp;
1466 struct nfsnode *np;
1467 int error;
1468
1469 nmp = VFSTONFS(mp);
1470 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1471 if (error)
1472 return error;
1473 vp = NFSTOV(np);
1474 /*
1475 * Get transfer parameters and attributes for root vnode once.
1476 */
1477 mtx_lock(&nmp->nm_mtx);
1478 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1479 mtx_unlock(&nmp->nm_mtx);
1480 ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1481 } else
1482 mtx_unlock(&nmp->nm_mtx);
1483 if (vp->v_type == VNON)
1484 vp->v_type = VDIR;
1485 vp->v_vflag |= VV_ROOT;
1486 *vpp = vp;
1487 return (0);
1488 }
1489
1490 /*
1491 * Flush out the buffer cache
1492 */
1493 /* ARGSUSED */
1494 static int
1495 nfs_sync(struct mount *mp, int waitfor)
1496 {
1497 struct vnode *vp, *mvp;
1498 struct thread *td;
1499 int error, allerror = 0;
1500
1501 td = curthread;
1502
1503 MNT_ILOCK(mp);
1504 /*
1505 * If a forced dismount is in progress, return from here so that
1506 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1507 * calling VFS_UNMOUNT().
1508 */
1509 if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1510 MNT_IUNLOCK(mp);
1511 return (EBADF);
1512 }
1513 MNT_IUNLOCK(mp);
1514
1515 /*
1516 * Force stale buffer cache information to be flushed.
1517 */
1518 loop:
1519 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1520 /* XXX Racy bv_cnt check. */
1521 if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1522 waitfor == MNT_LAZY) {
1523 VI_UNLOCK(vp);
1524 continue;
1525 }
1526 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1527 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1528 goto loop;
1529 }
1530 error = VOP_FSYNC(vp, waitfor, td);
1531 if (error)
1532 allerror = error;
1533 NFSVOPUNLOCK(vp, 0);
1534 vrele(vp);
1535 }
1536 return (allerror);
1537 }
1538
1539 static int
1540 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1541 {
1542 struct nfsmount *nmp = VFSTONFS(mp);
1543 struct vfsquery vq;
1544 int error;
1545
1546 bzero(&vq, sizeof(vq));
1547 switch (op) {
1548 #if 0
1549 case VFS_CTL_NOLOCKS:
1550 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1551 if (req->oldptr != NULL) {
1552 error = SYSCTL_OUT(req, &val, sizeof(val));
1553 if (error)
1554 return (error);
1555 }
1556 if (req->newptr != NULL) {
1557 error = SYSCTL_IN(req, &val, sizeof(val));
1558 if (error)
1559 return (error);
1560 if (val)
1561 nmp->nm_flag |= NFSMNT_NOLOCKS;
1562 else
1563 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1564 }
1565 break;
1566 #endif
1567 case VFS_CTL_QUERY:
1568 mtx_lock(&nmp->nm_mtx);
1569 if (nmp->nm_state & NFSSTA_TIMEO)
1570 vq.vq_flags |= VQ_NOTRESP;
1571 mtx_unlock(&nmp->nm_mtx);
1572 #if 0
1573 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1574 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1575 vq.vq_flags |= VQ_NOTRESPLOCK;
1576 #endif
1577 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1578 break;
1579 case VFS_CTL_TIMEO:
1580 if (req->oldptr != NULL) {
1581 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1582 sizeof(nmp->nm_tprintf_initial_delay));
1583 if (error)
1584 return (error);
1585 }
1586 if (req->newptr != NULL) {
1587 error = vfs_suser(mp, req->td);
1588 if (error)
1589 return (error);
1590 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1591 sizeof(nmp->nm_tprintf_initial_delay));
1592 if (error)
1593 return (error);
1594 if (nmp->nm_tprintf_initial_delay < 0)
1595 nmp->nm_tprintf_initial_delay = 0;
1596 }
1597 break;
1598 default:
1599 return (ENOTSUP);
1600 }
1601 return (0);
1602 }
1603
1604 /*
1605 * Extract the information needed by the nlm from the nfs vnode.
1606 */
1607 static void
1608 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1609 struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1610 struct timeval *timeop)
1611 {
1612 struct nfsmount *nmp;
1613 struct nfsnode *np = VTONFS(vp);
1614
1615 nmp = VFSTONFS(vp->v_mount);
1616 if (fhlenp != NULL)
1617 *fhlenp = (size_t)np->n_fhp->nfh_len;
1618 if (fhp != NULL)
1619 bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1620 if (sp != NULL)
1621 bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1622 if (is_v3p != NULL)
1623 *is_v3p = NFS_ISV3(vp);
1624 if (sizep != NULL)
1625 *sizep = np->n_size;
1626 if (timeop != NULL) {
1627 timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1628 timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1629 }
1630 }
1631
Cache object: 0f490d88f5f9f2372ec18d5ec5247d03
|