1 /*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76
77 extern int nfscl_ticks;
78 extern struct timeval nfsboottime;
79 extern struct nfsstats newnfsstats;
80 extern int nfsrv_useacl;
81
82 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
83 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
84
85 SYSCTL_DECL(_vfs_newnfs);
86 SYSCTL_STRUCT(_vfs_newnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
87 &newnfsstats, nfsstats, "S,nfsstats");
88 static int nfs_ip_paranoia = 1;
89 SYSCTL_INT(_vfs_newnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
90 &nfs_ip_paranoia, 0, "");
91 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
92 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_INITIAL_DELAY,
93 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
94 /* how long between console messages "nfs server foo not responding" */
95 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
96 SYSCTL_INT(_vfs_newnfs, NFS_TPRINTF_DELAY,
97 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
98
99 static int nfs_mountroot(struct mount *);
100 static void nfs_sec_name(char *, int *);
101 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
102 struct nfs_args *argp, const char *, struct ucred *,
103 struct thread *);
104 static int mountnfs(struct nfs_args *, struct mount *,
105 struct sockaddr *, char *, u_char *, int, u_char *, int,
106 u_char *, int, struct vnode **, struct ucred *,
107 struct thread *, int, int);
108 static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
109 struct sockaddr_storage *, int *, off_t *,
110 struct timeval *);
111 static vfs_mount_t nfs_mount;
112 static vfs_cmount_t nfs_cmount;
113 static vfs_unmount_t nfs_unmount;
114 static vfs_root_t nfs_root;
115 static vfs_statfs_t nfs_statfs;
116 static vfs_sync_t nfs_sync;
117 static vfs_sysctl_t nfs_sysctl;
118
119 /*
120 * nfs vfs operations.
121 */
122 static struct vfsops nfs_vfsops = {
123 .vfs_init = ncl_init,
124 .vfs_mount = nfs_mount,
125 .vfs_cmount = nfs_cmount,
126 .vfs_root = nfs_root,
127 .vfs_statfs = nfs_statfs,
128 .vfs_sync = nfs_sync,
129 .vfs_uninit = ncl_uninit,
130 .vfs_unmount = nfs_unmount,
131 .vfs_sysctl = nfs_sysctl,
132 };
133 VFS_SET(nfs_vfsops, newnfs, VFCF_NETWORK | VFCF_SBDRY);
134
135 /* So that loader and kldload(2) can find us, wherever we are.. */
136 MODULE_VERSION(newnfs, 1);
137
138 /*
139 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
140 * can be shared by both NFS clients. It is declared here so that it
141 * will be defined for kernels built without NFS_ROOT, although it
142 * isn't used in that case.
143 */
144 #if !defined(NFS_ROOT) && !defined(NFSCLIENT)
145 struct nfs_diskless nfs_diskless = { { { 0 } } };
146 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
147 int nfs_diskless_valid = 0;
148 #endif
149
150 SYSCTL_INT(_vfs_newnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
151 &nfs_diskless_valid, 0,
152 "Has the diskless struct been filled correctly");
153
154 SYSCTL_STRING(_vfs_newnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
155 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
156
157 SYSCTL_OPAQUE(_vfs_newnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
158 &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
159 "%Ssockaddr_in", "Diskless root nfs address");
160
161
162 void newnfsargs_ntoh(struct nfs_args *);
163 static int nfs_mountdiskless(char *,
164 struct sockaddr_in *, struct nfs_args *,
165 struct thread *, struct vnode **, struct mount *);
166 static void nfs_convert_diskless(void);
167 static void nfs_convert_oargs(struct nfs_args *args,
168 struct onfs_args *oargs);
169
170 int
171 newnfs_iosize(struct nfsmount *nmp)
172 {
173 int iosize, maxio;
174
175 /* First, set the upper limit for iosize */
176 if (nmp->nm_flag & NFSMNT_NFSV4) {
177 maxio = NFS_MAXBSIZE;
178 } else if (nmp->nm_flag & NFSMNT_NFSV3) {
179 if (nmp->nm_sotype == SOCK_DGRAM)
180 maxio = NFS_MAXDGRAMDATA;
181 else
182 maxio = NFS_MAXBSIZE;
183 } else {
184 maxio = NFS_V2MAXDATA;
185 }
186 if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
187 nmp->nm_rsize = maxio;
188 if (nmp->nm_rsize > MAXBSIZE)
189 nmp->nm_rsize = MAXBSIZE;
190 if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
191 nmp->nm_readdirsize = maxio;
192 if (nmp->nm_readdirsize > nmp->nm_rsize)
193 nmp->nm_readdirsize = nmp->nm_rsize;
194 if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
195 nmp->nm_wsize = maxio;
196 if (nmp->nm_wsize > MAXBSIZE)
197 nmp->nm_wsize = MAXBSIZE;
198
199 /*
200 * Calculate the size used for io buffers. Use the larger
201 * of the two sizes to minimise nfs requests but make sure
202 * that it is at least one VM page to avoid wasting buffer
203 * space.
204 */
205 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
206 iosize = imax(iosize, PAGE_SIZE);
207 nmp->nm_mountp->mnt_stat.f_iosize = iosize;
208 return (iosize);
209 }
210
211 static void
212 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
213 {
214
215 args->version = NFS_ARGSVERSION;
216 args->addr = oargs->addr;
217 args->addrlen = oargs->addrlen;
218 args->sotype = oargs->sotype;
219 args->proto = oargs->proto;
220 args->fh = oargs->fh;
221 args->fhsize = oargs->fhsize;
222 args->flags = oargs->flags;
223 args->wsize = oargs->wsize;
224 args->rsize = oargs->rsize;
225 args->readdirsize = oargs->readdirsize;
226 args->timeo = oargs->timeo;
227 args->retrans = oargs->retrans;
228 args->readahead = oargs->readahead;
229 args->hostname = oargs->hostname;
230 }
231
232 static void
233 nfs_convert_diskless(void)
234 {
235
236 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
237 sizeof(struct ifaliasreq));
238 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
239 sizeof(struct sockaddr_in));
240 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
241 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
242 nfsv3_diskless.root_fhsize = NFSX_MYFH;
243 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
244 } else {
245 nfsv3_diskless.root_fhsize = NFSX_V2FH;
246 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
247 }
248 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
249 sizeof(struct sockaddr_in));
250 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
251 nfsv3_diskless.root_time = nfs_diskless.root_time;
252 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
253 MAXHOSTNAMELEN);
254 nfs_diskless_valid = 3;
255 }
256
257 /*
258 * nfs statfs call
259 */
260 static int
261 nfs_statfs(struct mount *mp, struct statfs *sbp)
262 {
263 struct vnode *vp;
264 struct thread *td;
265 struct nfsmount *nmp = VFSTONFS(mp);
266 struct nfsvattr nfsva;
267 struct nfsfsinfo fs;
268 struct nfsstatfs sb;
269 int error = 0, attrflag, gotfsinfo = 0, ret;
270 struct nfsnode *np;
271
272 td = curthread;
273
274 error = vfs_busy(mp, MBF_NOWAIT);
275 if (error)
276 return (error);
277 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
278 if (error) {
279 vfs_unbusy(mp);
280 return (error);
281 }
282 vp = NFSTOV(np);
283 mtx_lock(&nmp->nm_mtx);
284 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
285 mtx_unlock(&nmp->nm_mtx);
286 error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
287 &attrflag, NULL);
288 if (!error)
289 gotfsinfo = 1;
290 } else
291 mtx_unlock(&nmp->nm_mtx);
292 if (!error)
293 error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
294 &attrflag, NULL);
295 if (attrflag == 0) {
296 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
297 td->td_ucred, td, &nfsva, NULL);
298 if (ret) {
299 /*
300 * Just set default values to get things going.
301 */
302 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
303 nfsva.na_vattr.va_type = VDIR;
304 nfsva.na_vattr.va_mode = 0777;
305 nfsva.na_vattr.va_nlink = 100;
306 nfsva.na_vattr.va_uid = (uid_t)0;
307 nfsva.na_vattr.va_gid = (gid_t)0;
308 nfsva.na_vattr.va_fileid = 2;
309 nfsva.na_vattr.va_gen = 1;
310 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
311 nfsva.na_vattr.va_size = 512 * 1024;
312 }
313 }
314 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
315 if (!error) {
316 mtx_lock(&nmp->nm_mtx);
317 if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
318 nfscl_loadfsinfo(nmp, &fs);
319 nfscl_loadsbinfo(nmp, &sb, sbp);
320 sbp->f_iosize = newnfs_iosize(nmp);
321 mtx_unlock(&nmp->nm_mtx);
322 if (sbp != &mp->mnt_stat) {
323 bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
324 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
325 }
326 strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
327 } else if (NFS_ISV4(vp)) {
328 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
329 }
330 vput(vp);
331 vfs_unbusy(mp);
332 return (error);
333 }
334
335 /*
336 * nfs version 3 fsinfo rpc call
337 */
338 int
339 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
340 struct thread *td)
341 {
342 struct nfsfsinfo fs;
343 struct nfsvattr nfsva;
344 int error, attrflag;
345
346 error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
347 if (!error) {
348 if (attrflag)
349 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
350 1);
351 mtx_lock(&nmp->nm_mtx);
352 nfscl_loadfsinfo(nmp, &fs);
353 mtx_unlock(&nmp->nm_mtx);
354 }
355 return (error);
356 }
357
358 /*
359 * Mount a remote root fs via. nfs. This depends on the info in the
360 * nfs_diskless structure that has been filled in properly by some primary
361 * bootstrap.
362 * It goes something like this:
363 * - do enough of "ifconfig" by calling ifioctl() so that the system
364 * can talk to the server
365 * - If nfs_diskless.mygateway is filled in, use that address as
366 * a default gateway.
367 * - build the rootfs mount point and call mountnfs() to do the rest.
368 *
369 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
370 * structure, as well as other global NFS client variables here, as
371 * nfs_mountroot() will be called once in the boot before any other NFS
372 * client activity occurs.
373 */
374 static int
375 nfs_mountroot(struct mount *mp)
376 {
377 struct thread *td = curthread;
378 struct nfsv3_diskless *nd = &nfsv3_diskless;
379 struct socket *so;
380 struct vnode *vp;
381 struct ifreq ir;
382 int error;
383 u_long l;
384 char buf[128];
385 char *cp;
386
387 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
388 bootpc_init(); /* use bootp to get nfs_diskless filled in */
389 #elif defined(NFS_ROOT)
390 nfs_setup_diskless();
391 #endif
392
393 if (nfs_diskless_valid == 0)
394 return (-1);
395 if (nfs_diskless_valid == 1)
396 nfs_convert_diskless();
397
398 /*
399 * XXX splnet, so networks will receive...
400 */
401 splnet();
402
403 /*
404 * Do enough of ifconfig(8) so that the critical net interface can
405 * talk to the server.
406 */
407 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
408 td->td_ucred, td);
409 if (error)
410 panic("nfs_mountroot: socreate(%04x): %d",
411 nd->myif.ifra_addr.sa_family, error);
412
413 #if 0 /* XXX Bad idea */
414 /*
415 * We might not have been told the right interface, so we pass
416 * over the first ten interfaces of the same kind, until we get
417 * one of them configured.
418 */
419
420 for (i = strlen(nd->myif.ifra_name) - 1;
421 nd->myif.ifra_name[i] >= '' &&
422 nd->myif.ifra_name[i] <= '9';
423 nd->myif.ifra_name[i] ++) {
424 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
425 if(!error)
426 break;
427 }
428 #endif
429 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
430 if (error)
431 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
432 if ((cp = getenv("boot.netif.mtu")) != NULL) {
433 ir.ifr_mtu = strtol(cp, NULL, 10);
434 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
435 freeenv(cp);
436 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
437 if (error)
438 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
439 }
440 soclose(so);
441
442 /*
443 * If the gateway field is filled in, set it as the default route.
444 * Note that pxeboot will set a default route of 0 if the route
445 * is not set by the DHCP server. Check also for a value of 0
446 * to avoid panicking inappropriately in that situation.
447 */
448 if (nd->mygateway.sin_len != 0 &&
449 nd->mygateway.sin_addr.s_addr != 0) {
450 struct sockaddr_in mask, sin;
451
452 bzero((caddr_t)&mask, sizeof(mask));
453 sin = mask;
454 sin.sin_family = AF_INET;
455 sin.sin_len = sizeof(sin);
456 /* XXX MRT use table 0 for this sort of thing */
457 CURVNET_SET(TD_TO_VNET(td));
458 error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
459 (struct sockaddr *)&nd->mygateway,
460 (struct sockaddr *)&mask,
461 RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
462 CURVNET_RESTORE();
463 if (error)
464 panic("nfs_mountroot: RTM_ADD: %d", error);
465 }
466
467 /*
468 * Create the rootfs mount point.
469 */
470 nd->root_args.fh = nd->root_fh;
471 nd->root_args.fhsize = nd->root_fhsize;
472 l = ntohl(nd->root_saddr.sin_addr.s_addr);
473 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
474 (l >> 24) & 0xff, (l >> 16) & 0xff,
475 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
476 printf("NFS ROOT: %s\n", buf);
477 nd->root_args.hostname = buf;
478 if ((error = nfs_mountdiskless(buf,
479 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
480 return (error);
481 }
482
483 /*
484 * This is not really an nfs issue, but it is much easier to
485 * set hostname here and then let the "/etc/rc.xxx" files
486 * mount the right /var based upon its preset value.
487 */
488 mtx_lock(&prison0.pr_mtx);
489 strlcpy(prison0.pr_hostname, nd->my_hostnam,
490 sizeof(prison0.pr_hostname));
491 mtx_unlock(&prison0.pr_mtx);
492 inittodr(ntohl(nd->root_time));
493 return (0);
494 }
495
496 /*
497 * Internal version of mount system call for diskless setup.
498 */
499 static int
500 nfs_mountdiskless(char *path,
501 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
502 struct vnode **vpp, struct mount *mp)
503 {
504 struct sockaddr *nam;
505 int dirlen, error;
506 char *dirpath;
507
508 /*
509 * Find the directory path in "path", which also has the server's
510 * name/ip address in it.
511 */
512 dirpath = strchr(path, ':');
513 if (dirpath != NULL)
514 dirlen = strlen(++dirpath);
515 else
516 dirlen = 0;
517 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
518 if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
519 NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
520 NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
521 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
522 return (error);
523 }
524 return (0);
525 }
526
527 static void
528 nfs_sec_name(char *sec, int *flagsp)
529 {
530 if (!strcmp(sec, "krb5"))
531 *flagsp |= NFSMNT_KERB;
532 else if (!strcmp(sec, "krb5i"))
533 *flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
534 else if (!strcmp(sec, "krb5p"))
535 *flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
536 }
537
538 static void
539 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
540 const char *hostname, struct ucred *cred, struct thread *td)
541 {
542 int s;
543 int adjsock;
544 char *p;
545
546 s = splnet();
547
548 /*
549 * Set read-only flag if requested; otherwise, clear it if this is
550 * an update. If this is not an update, then either the read-only
551 * flag is already clear, or this is a root mount and it was set
552 * intentionally at some previous point.
553 */
554 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
555 MNT_ILOCK(mp);
556 mp->mnt_flag |= MNT_RDONLY;
557 MNT_IUNLOCK(mp);
558 } else if (mp->mnt_flag & MNT_UPDATE) {
559 MNT_ILOCK(mp);
560 mp->mnt_flag &= ~MNT_RDONLY;
561 MNT_IUNLOCK(mp);
562 }
563
564 /*
565 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
566 * no sense in that context. Also, set up appropriate retransmit
567 * and soft timeout behavior.
568 */
569 if (argp->sotype == SOCK_STREAM) {
570 nmp->nm_flag &= ~NFSMNT_NOCONN;
571 nmp->nm_timeo = NFS_MAXTIMEO;
572 if ((argp->flags & NFSMNT_NFSV4) != 0)
573 nmp->nm_retry = INT_MAX;
574 else
575 nmp->nm_retry = NFS_RETRANS_TCP;
576 }
577
578 /* Also clear RDIRPLUS if NFSv2, it crashes some servers */
579 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
580 argp->flags &= ~NFSMNT_RDIRPLUS;
581 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
582 }
583
584 /* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */
585 if ((argp->flags & NFSMNT_NFSV4) != 0) {
586 argp->flags &= ~NFSMNT_RESVPORT;
587 nmp->nm_flag &= ~NFSMNT_RESVPORT;
588 }
589
590 /* Re-bind if rsrvd port requested and wasn't on one */
591 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
592 && (argp->flags & NFSMNT_RESVPORT);
593 /* Also re-bind if we're switching to/from a connected UDP socket */
594 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
595 (argp->flags & NFSMNT_NOCONN));
596
597 /* Update flags atomically. Don't change the lock bits. */
598 nmp->nm_flag = argp->flags | nmp->nm_flag;
599 splx(s);
600
601 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
602 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
603 if (nmp->nm_timeo < NFS_MINTIMEO)
604 nmp->nm_timeo = NFS_MINTIMEO;
605 else if (nmp->nm_timeo > NFS_MAXTIMEO)
606 nmp->nm_timeo = NFS_MAXTIMEO;
607 }
608
609 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
610 nmp->nm_retry = argp->retrans;
611 if (nmp->nm_retry > NFS_MAXREXMIT)
612 nmp->nm_retry = NFS_MAXREXMIT;
613 }
614
615 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
616 nmp->nm_wsize = argp->wsize;
617 /* Round down to multiple of blocksize */
618 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
619 if (nmp->nm_wsize <= 0)
620 nmp->nm_wsize = NFS_FABLKSIZE;
621 }
622
623 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
624 nmp->nm_rsize = argp->rsize;
625 /* Round down to multiple of blocksize */
626 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
627 if (nmp->nm_rsize <= 0)
628 nmp->nm_rsize = NFS_FABLKSIZE;
629 }
630
631 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
632 nmp->nm_readdirsize = argp->readdirsize;
633 }
634
635 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
636 nmp->nm_acregmin = argp->acregmin;
637 else
638 nmp->nm_acregmin = NFS_MINATTRTIMO;
639 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
640 nmp->nm_acregmax = argp->acregmax;
641 else
642 nmp->nm_acregmax = NFS_MAXATTRTIMO;
643 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
644 nmp->nm_acdirmin = argp->acdirmin;
645 else
646 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
647 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
648 nmp->nm_acdirmax = argp->acdirmax;
649 else
650 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
651 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
652 nmp->nm_acdirmin = nmp->nm_acdirmax;
653 if (nmp->nm_acregmin > nmp->nm_acregmax)
654 nmp->nm_acregmin = nmp->nm_acregmax;
655
656 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
657 if (argp->readahead <= NFS_MAXRAHEAD)
658 nmp->nm_readahead = argp->readahead;
659 else
660 nmp->nm_readahead = NFS_MAXRAHEAD;
661 }
662 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
663 if (argp->wcommitsize < nmp->nm_wsize)
664 nmp->nm_wcommitsize = nmp->nm_wsize;
665 else
666 nmp->nm_wcommitsize = argp->wcommitsize;
667 }
668
669 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
670 (nmp->nm_soproto != argp->proto));
671
672 if (nmp->nm_client != NULL && adjsock) {
673 int haslock = 0, error = 0;
674
675 if (nmp->nm_sotype == SOCK_STREAM) {
676 error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
677 if (!error)
678 haslock = 1;
679 }
680 if (!error) {
681 newnfs_disconnect(&nmp->nm_sockreq);
682 if (haslock)
683 newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
684 nmp->nm_sotype = argp->sotype;
685 nmp->nm_soproto = argp->proto;
686 if (nmp->nm_sotype == SOCK_DGRAM)
687 while (newnfs_connect(nmp, &nmp->nm_sockreq,
688 cred, td, 0)) {
689 printf("newnfs_args: retrying connect\n");
690 (void) nfs_catnap(PSOCK, 0, "newnfscon");
691 }
692 }
693 } else {
694 nmp->nm_sotype = argp->sotype;
695 nmp->nm_soproto = argp->proto;
696 }
697
698 if (hostname != NULL) {
699 strlcpy(nmp->nm_hostname, hostname,
700 sizeof(nmp->nm_hostname));
701 p = strchr(nmp->nm_hostname, ':');
702 if (p != NULL)
703 *p = '\0';
704 }
705 }
706
707 static const char *nfs_opts[] = { "from", "nfs_args",
708 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
709 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
710 "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
711 "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
712 "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
713 "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
714 "principal", "nfsv4", "gssname", "allgssname", "dirpath",
715 "nametimeo", "negnametimeo", "nocto", "wcommitsize",
716 NULL };
717
718 /*
719 * VFS Operations.
720 *
721 * mount system call
722 * It seems a bit dumb to copyinstr() the host and path here and then
723 * bcopy() them in mountnfs(), but I wanted to detect errors before
724 * doing the sockargs() call because sockargs() allocates an mbuf and
725 * an error after that means that I have to release the mbuf.
726 */
727 /* ARGSUSED */
728 static int
729 nfs_mount(struct mount *mp)
730 {
731 struct nfs_args args = {
732 .version = NFS_ARGSVERSION,
733 .addr = NULL,
734 .addrlen = sizeof (struct sockaddr_in),
735 .sotype = SOCK_STREAM,
736 .proto = 0,
737 .fh = NULL,
738 .fhsize = 0,
739 .flags = NFSMNT_RESVPORT,
740 .wsize = NFS_WSIZE,
741 .rsize = NFS_RSIZE,
742 .readdirsize = NFS_READDIRSIZE,
743 .timeo = 10,
744 .retrans = NFS_RETRANS,
745 .readahead = NFS_DEFRAHEAD,
746 .wcommitsize = 0, /* was: NQ_DEFLEASE */
747 .hostname = NULL,
748 .acregmin = NFS_MINATTRTIMO,
749 .acregmax = NFS_MAXATTRTIMO,
750 .acdirmin = NFS_MINDIRATTRTIMO,
751 .acdirmax = NFS_MAXDIRATTRTIMO,
752 };
753 int error = 0, ret, len;
754 struct sockaddr *nam = NULL;
755 struct vnode *vp;
756 struct thread *td;
757 char hst[MNAMELEN];
758 u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
759 char *opt, *name, *secname;
760 int nametimeo = NFS_DEFAULT_NAMETIMEO;
761 int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
762 int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
763 size_t hstlen;
764
765 has_nfs_args_opt = 0;
766 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
767 error = EINVAL;
768 goto out;
769 }
770
771 td = curthread;
772 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
773 error = nfs_mountroot(mp);
774 goto out;
775 }
776
777 nfscl_init();
778
779 /*
780 * The old mount_nfs program passed the struct nfs_args
781 * from userspace to kernel. The new mount_nfs program
782 * passes string options via nmount() from userspace to kernel
783 * and we populate the struct nfs_args in the kernel.
784 */
785 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
786 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
787 sizeof(args));
788 if (error != 0)
789 goto out;
790
791 if (args.version != NFS_ARGSVERSION) {
792 error = EPROGMISMATCH;
793 goto out;
794 }
795 has_nfs_args_opt = 1;
796 }
797
798 /* Handle the new style options. */
799 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
800 args.flags |= NFSMNT_NOCONN;
801 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
802 args.flags |= NFSMNT_NOCONN;
803 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
804 args.flags |= NFSMNT_NOLOCKD;
805 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
806 args.flags &= ~NFSMNT_NOLOCKD;
807 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
808 args.flags |= NFSMNT_INT;
809 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
810 args.flags |= NFSMNT_RDIRPLUS;
811 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
812 args.flags |= NFSMNT_RESVPORT;
813 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
814 args.flags &= ~NFSMNT_RESVPORT;
815 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
816 args.flags |= NFSMNT_SOFT;
817 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
818 args.flags &= ~NFSMNT_SOFT;
819 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
820 args.sotype = SOCK_DGRAM;
821 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
822 args.sotype = SOCK_DGRAM;
823 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
824 args.sotype = SOCK_STREAM;
825 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
826 args.flags |= NFSMNT_NFSV3;
827 if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
828 args.flags |= NFSMNT_NFSV4;
829 args.sotype = SOCK_STREAM;
830 }
831 if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
832 args.flags |= NFSMNT_ALLGSSNAME;
833 if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
834 args.flags |= NFSMNT_NOCTO;
835 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
836 if (opt == NULL) {
837 vfs_mount_error(mp, "illegal readdirsize");
838 error = EINVAL;
839 goto out;
840 }
841 ret = sscanf(opt, "%d", &args.readdirsize);
842 if (ret != 1 || args.readdirsize <= 0) {
843 vfs_mount_error(mp, "illegal readdirsize: %s",
844 opt);
845 error = EINVAL;
846 goto out;
847 }
848 args.flags |= NFSMNT_READDIRSIZE;
849 }
850 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
851 if (opt == NULL) {
852 vfs_mount_error(mp, "illegal readahead");
853 error = EINVAL;
854 goto out;
855 }
856 ret = sscanf(opt, "%d", &args.readahead);
857 if (ret != 1 || args.readahead <= 0) {
858 vfs_mount_error(mp, "illegal readahead: %s",
859 opt);
860 error = EINVAL;
861 goto out;
862 }
863 args.flags |= NFSMNT_READAHEAD;
864 }
865 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
866 if (opt == NULL) {
867 vfs_mount_error(mp, "illegal wsize");
868 error = EINVAL;
869 goto out;
870 }
871 ret = sscanf(opt, "%d", &args.wsize);
872 if (ret != 1 || args.wsize <= 0) {
873 vfs_mount_error(mp, "illegal wsize: %s",
874 opt);
875 error = EINVAL;
876 goto out;
877 }
878 args.flags |= NFSMNT_WSIZE;
879 }
880 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
881 if (opt == NULL) {
882 vfs_mount_error(mp, "illegal rsize");
883 error = EINVAL;
884 goto out;
885 }
886 ret = sscanf(opt, "%d", &args.rsize);
887 if (ret != 1 || args.rsize <= 0) {
888 vfs_mount_error(mp, "illegal wsize: %s",
889 opt);
890 error = EINVAL;
891 goto out;
892 }
893 args.flags |= NFSMNT_RSIZE;
894 }
895 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
896 if (opt == NULL) {
897 vfs_mount_error(mp, "illegal retrans");
898 error = EINVAL;
899 goto out;
900 }
901 ret = sscanf(opt, "%d", &args.retrans);
902 if (ret != 1 || args.retrans <= 0) {
903 vfs_mount_error(mp, "illegal retrans: %s",
904 opt);
905 error = EINVAL;
906 goto out;
907 }
908 args.flags |= NFSMNT_RETRANS;
909 }
910 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
911 ret = sscanf(opt, "%d", &args.acregmin);
912 if (ret != 1 || args.acregmin < 0) {
913 vfs_mount_error(mp, "illegal acregmin: %s",
914 opt);
915 error = EINVAL;
916 goto out;
917 }
918 args.flags |= NFSMNT_ACREGMIN;
919 }
920 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
921 ret = sscanf(opt, "%d", &args.acregmax);
922 if (ret != 1 || args.acregmax < 0) {
923 vfs_mount_error(mp, "illegal acregmax: %s",
924 opt);
925 error = EINVAL;
926 goto out;
927 }
928 args.flags |= NFSMNT_ACREGMAX;
929 }
930 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
931 ret = sscanf(opt, "%d", &args.acdirmin);
932 if (ret != 1 || args.acdirmin < 0) {
933 vfs_mount_error(mp, "illegal acdirmin: %s",
934 opt);
935 error = EINVAL;
936 goto out;
937 }
938 args.flags |= NFSMNT_ACDIRMIN;
939 }
940 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
941 ret = sscanf(opt, "%d", &args.acdirmax);
942 if (ret != 1 || args.acdirmax < 0) {
943 vfs_mount_error(mp, "illegal acdirmax: %s",
944 opt);
945 error = EINVAL;
946 goto out;
947 }
948 args.flags |= NFSMNT_ACDIRMAX;
949 }
950 if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
951 ret = sscanf(opt, "%d", &args.wcommitsize);
952 if (ret != 1 || args.wcommitsize < 0) {
953 vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
954 error = EINVAL;
955 goto out;
956 }
957 args.flags |= NFSMNT_WCOMMITSIZE;
958 }
959 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
960 ret = sscanf(opt, "%d", &args.timeo);
961 if (ret != 1 || args.timeo <= 0) {
962 vfs_mount_error(mp, "illegal timeout: %s",
963 opt);
964 error = EINVAL;
965 goto out;
966 }
967 args.flags |= NFSMNT_TIMEO;
968 }
969 if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
970 ret = sscanf(opt, "%d", &nametimeo);
971 if (ret != 1 || nametimeo < 0) {
972 vfs_mount_error(mp, "illegal nametimeo: %s", opt);
973 error = EINVAL;
974 goto out;
975 }
976 }
977 if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
978 == 0) {
979 ret = sscanf(opt, "%d", &negnametimeo);
980 if (ret != 1 || negnametimeo < 0) {
981 vfs_mount_error(mp, "illegal negnametimeo: %s",
982 opt);
983 error = EINVAL;
984 goto out;
985 }
986 }
987 if (vfs_getopt(mp->mnt_optnew, "sec",
988 (void **) &secname, NULL) == 0)
989 nfs_sec_name(secname, &args.flags);
990
991 if (mp->mnt_flag & MNT_UPDATE) {
992 struct nfsmount *nmp = VFSTONFS(mp);
993
994 if (nmp == NULL) {
995 error = EIO;
996 goto out;
997 }
998
999 /*
1000 * If a change from TCP->UDP is done and there are thread(s)
1001 * that have I/O RPC(s) in progress with a tranfer size
1002 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1003 * hung, retrying the RPC(s) forever. Usually these threads
1004 * will be seen doing an uninterruptible sleep on wait channel
1005 * "newnfsreq" (truncated to "newnfsre" by procstat).
1006 */
1007 if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1008 tprintf(td->td_proc, LOG_WARNING,
1009 "Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1010
1011 /*
1012 * When doing an update, we can't change version,
1013 * security, switch lockd strategies or change cookie
1014 * translation
1015 */
1016 args.flags = (args.flags &
1017 ~(NFSMNT_NFSV3 |
1018 NFSMNT_NFSV4 |
1019 NFSMNT_KERB |
1020 NFSMNT_INTEGRITY |
1021 NFSMNT_PRIVACY |
1022 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1023 (nmp->nm_flag &
1024 (NFSMNT_NFSV3 |
1025 NFSMNT_NFSV4 |
1026 NFSMNT_KERB |
1027 NFSMNT_INTEGRITY |
1028 NFSMNT_PRIVACY |
1029 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1030 nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1031 goto out;
1032 }
1033
1034 /*
1035 * Make the nfs_ip_paranoia sysctl serve as the default connection
1036 * or no-connection mode for those protocols that support
1037 * no-connection mode (the flag will be cleared later for protocols
1038 * that do not support no-connection mode). This will allow a client
1039 * to receive replies from a different IP then the request was
1040 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1041 * not 0.
1042 */
1043 if (nfs_ip_paranoia == 0)
1044 args.flags |= NFSMNT_NOCONN;
1045
1046 if (has_nfs_args_opt != 0) {
1047 /*
1048 * In the 'nfs_args' case, the pointers in the args
1049 * structure are in userland - we copy them in here.
1050 */
1051 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1052 vfs_mount_error(mp, "Bad file handle");
1053 error = EINVAL;
1054 goto out;
1055 }
1056 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1057 args.fhsize);
1058 if (error != 0)
1059 goto out;
1060 error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1061 if (error != 0)
1062 goto out;
1063 bzero(&hst[hstlen], MNAMELEN - hstlen);
1064 args.hostname = hst;
1065 /* sockargs() call must be after above copyin() calls */
1066 error = getsockaddr(&nam, (caddr_t)args.addr,
1067 args.addrlen);
1068 if (error != 0)
1069 goto out;
1070 } else {
1071 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1072 &args.fhsize) == 0) {
1073 if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1074 vfs_mount_error(mp, "Bad file handle");
1075 error = EINVAL;
1076 goto out;
1077 }
1078 bcopy(args.fh, nfh, args.fhsize);
1079 } else {
1080 args.fhsize = 0;
1081 }
1082 (void) vfs_getopt(mp->mnt_optnew, "hostname",
1083 (void **)&args.hostname, &len);
1084 if (args.hostname == NULL) {
1085 vfs_mount_error(mp, "Invalid hostname");
1086 error = EINVAL;
1087 goto out;
1088 }
1089 bcopy(args.hostname, hst, MNAMELEN);
1090 hst[MNAMELEN - 1] = '\0';
1091 }
1092
1093 if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1094 strlcpy(srvkrbname, name, sizeof (srvkrbname));
1095 else
1096 snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1097 srvkrbnamelen = strlen(srvkrbname);
1098
1099 if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1100 strlcpy(krbname, name, sizeof (krbname));
1101 else
1102 krbname[0] = '\0';
1103 krbnamelen = strlen(krbname);
1104
1105 if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1106 strlcpy(dirpath, name, sizeof (dirpath));
1107 else
1108 dirpath[0] = '\0';
1109 dirlen = strlen(dirpath);
1110
1111 if (has_nfs_args_opt == 0) {
1112 if (vfs_getopt(mp->mnt_optnew, "addr",
1113 (void **)&args.addr, &args.addrlen) == 0) {
1114 if (args.addrlen > SOCK_MAXADDRLEN) {
1115 error = ENAMETOOLONG;
1116 goto out;
1117 }
1118 nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1119 bcopy(args.addr, nam, args.addrlen);
1120 nam->sa_len = args.addrlen;
1121 } else {
1122 vfs_mount_error(mp, "No server address");
1123 error = EINVAL;
1124 goto out;
1125 }
1126 }
1127
1128 args.fh = nfh;
1129 error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1130 dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1131 nametimeo, negnametimeo);
1132 out:
1133 if (!error) {
1134 MNT_ILOCK(mp);
1135 mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1136 MNT_IUNLOCK(mp);
1137 }
1138 return (error);
1139 }
1140
1141
1142 /*
1143 * VFS Operations.
1144 *
1145 * mount system call
1146 * It seems a bit dumb to copyinstr() the host and path here and then
1147 * bcopy() them in mountnfs(), but I wanted to detect errors before
1148 * doing the sockargs() call because sockargs() allocates an mbuf and
1149 * an error after that means that I have to release the mbuf.
1150 */
1151 /* ARGSUSED */
1152 static int
1153 nfs_cmount(struct mntarg *ma, void *data, int flags)
1154 {
1155 int error;
1156 struct nfs_args args;
1157
1158 error = copyin(data, &args, sizeof (struct nfs_args));
1159 if (error)
1160 return error;
1161
1162 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1163
1164 error = kernel_mount(ma, flags);
1165 return (error);
1166 }
1167
1168 /*
1169 * Common code for mount and mountroot
1170 */
1171 static int
1172 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1173 char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1174 u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1175 struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo)
1176 {
1177 struct nfsmount *nmp;
1178 struct nfsnode *np;
1179 int error, trycnt, ret;
1180 struct nfsvattr nfsva;
1181 static u_int64_t clval = 0;
1182
1183 if (mp->mnt_flag & MNT_UPDATE) {
1184 nmp = VFSTONFS(mp);
1185 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1186 FREE(nam, M_SONAME);
1187 return (0);
1188 } else {
1189 MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1190 krbnamelen + dirlen + srvkrbnamelen + 2,
1191 M_NEWNFSMNT, M_WAITOK | M_ZERO);
1192 TAILQ_INIT(&nmp->nm_bufq);
1193 if (clval == 0)
1194 clval = (u_int64_t)nfsboottime.tv_sec;
1195 nmp->nm_clval = clval++;
1196 nmp->nm_krbnamelen = krbnamelen;
1197 nmp->nm_dirpathlen = dirlen;
1198 nmp->nm_srvkrbnamelen = srvkrbnamelen;
1199 if (td->td_ucred->cr_uid != (uid_t)0) {
1200 /*
1201 * nm_uid is used to get KerberosV credentials for
1202 * the nfsv4 state handling operations if there is
1203 * no host based principal set. Use the uid of
1204 * this user if not root, since they are doing the
1205 * mount. I don't think setting this for root will
1206 * work, since root normally does not have user
1207 * credentials in a credentials cache.
1208 */
1209 nmp->nm_uid = td->td_ucred->cr_uid;
1210 } else {
1211 /*
1212 * Just set to -1, so it won't be used.
1213 */
1214 nmp->nm_uid = (uid_t)-1;
1215 }
1216
1217 /* Copy and null terminate all the names */
1218 if (nmp->nm_krbnamelen > 0) {
1219 bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1220 nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1221 }
1222 if (nmp->nm_dirpathlen > 0) {
1223 bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1224 nmp->nm_dirpathlen);
1225 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1226 + 1] = '\0';
1227 }
1228 if (nmp->nm_srvkrbnamelen > 0) {
1229 bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1230 nmp->nm_srvkrbnamelen);
1231 nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1232 + nmp->nm_srvkrbnamelen + 2] = '\0';
1233 }
1234 nmp->nm_sockreq.nr_cred = crhold(cred);
1235 mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1236 mp->mnt_data = nmp;
1237 nmp->nm_getinfo = nfs_getnlminfo;
1238 nmp->nm_vinvalbuf = ncl_vinvalbuf;
1239 }
1240 vfs_getnewfsid(mp);
1241 nmp->nm_mountp = mp;
1242 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1243
1244 /*
1245 * Since nfs_decode_args() might optionally set them, these
1246 * need to be set to defaults before the call, so that the
1247 * optional settings aren't overwritten.
1248 */
1249 nmp->nm_nametimeo = nametimeo;
1250 nmp->nm_negnametimeo = negnametimeo;
1251 nmp->nm_timeo = NFS_TIMEO;
1252 nmp->nm_retry = NFS_RETRANS;
1253 nmp->nm_readahead = NFS_DEFRAHEAD;
1254 if (desiredvnodes >= 11000)
1255 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1256 else
1257 nmp->nm_wcommitsize = hibufspace / 10;
1258
1259 nfs_decode_args(mp, nmp, argp, hst, cred, td);
1260
1261 /*
1262 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1263 * high, depending on whether we end up with negative offsets in
1264 * the client or server somewhere. 2GB-1 may be safer.
1265 *
1266 * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum
1267 * that we can handle until we find out otherwise.
1268 * XXX Our "safe" limit on the client is what we can store in our
1269 * buffer cache using signed(!) block numbers.
1270 */
1271 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1272 nmp->nm_maxfilesize = 0xffffffffLL;
1273 else
1274 nmp->nm_maxfilesize = OFF_MAX;
1275
1276 if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1277 nmp->nm_wsize = NFS_WSIZE;
1278 nmp->nm_rsize = NFS_RSIZE;
1279 nmp->nm_readdirsize = NFS_READDIRSIZE;
1280 }
1281 nmp->nm_numgrps = NFS_MAXGRPS;
1282 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1283 if (nmp->nm_tprintf_delay < 0)
1284 nmp->nm_tprintf_delay = 0;
1285 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1286 if (nmp->nm_tprintf_initial_delay < 0)
1287 nmp->nm_tprintf_initial_delay = 0;
1288 nmp->nm_fhsize = argp->fhsize;
1289 if (nmp->nm_fhsize > 0)
1290 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1291 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1292 nmp->nm_nam = nam;
1293 /* Set up the sockets and per-host congestion */
1294 nmp->nm_sotype = argp->sotype;
1295 nmp->nm_soproto = argp->proto;
1296 nmp->nm_sockreq.nr_prog = NFS_PROG;
1297 if ((argp->flags & NFSMNT_NFSV4))
1298 nmp->nm_sockreq.nr_vers = NFS_VER4;
1299 else if ((argp->flags & NFSMNT_NFSV3))
1300 nmp->nm_sockreq.nr_vers = NFS_VER3;
1301 else
1302 nmp->nm_sockreq.nr_vers = NFS_VER2;
1303
1304
1305 if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1306 goto bad;
1307
1308 /*
1309 * A reference count is needed on the nfsnode representing the
1310 * remote root. If this object is not persistent, then backward
1311 * traversals of the mount point (i.e. "..") will not work if
1312 * the nfsnode gets flushed out of the cache. Ufs does not have
1313 * this problem, because one can identify root inodes by their
1314 * number == ROOTINO (2).
1315 */
1316 if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1317 nmp->nm_dirpathlen > 0) {
1318 /*
1319 * If the fhsize on the mount point == 0 for V4, the mount
1320 * path needs to be looked up.
1321 */
1322 trycnt = 3;
1323 do {
1324 error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1325 cred, td);
1326 if (error)
1327 (void) nfs_catnap(PZERO, error, "nfsgetdirp");
1328 } while (error && --trycnt > 0);
1329 if (error) {
1330 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1331 goto bad;
1332 }
1333 }
1334 if (nmp->nm_fhsize > 0) {
1335 /*
1336 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1337 * non-zero for the root vnode. f_iosize will be set correctly
1338 * by nfs_statfs() before any I/O occurs.
1339 */
1340 mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1341 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1342 LK_EXCLUSIVE);
1343 if (error)
1344 goto bad;
1345 *vpp = NFSTOV(np);
1346
1347 /*
1348 * Get file attributes and transfer parameters for the
1349 * mountpoint. This has the side effect of filling in
1350 * (*vpp)->v_type with the correct value.
1351 */
1352 ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1353 cred, td, &nfsva, NULL);
1354 if (ret) {
1355 /*
1356 * Just set default values to get things going.
1357 */
1358 NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1359 nfsva.na_vattr.va_type = VDIR;
1360 nfsva.na_vattr.va_mode = 0777;
1361 nfsva.na_vattr.va_nlink = 100;
1362 nfsva.na_vattr.va_uid = (uid_t)0;
1363 nfsva.na_vattr.va_gid = (gid_t)0;
1364 nfsva.na_vattr.va_fileid = 2;
1365 nfsva.na_vattr.va_gen = 1;
1366 nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1367 nfsva.na_vattr.va_size = 512 * 1024;
1368 }
1369 (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1370 if (argp->flags & NFSMNT_NFSV3)
1371 ncl_fsinfo(nmp, *vpp, cred, td);
1372
1373 /* Mark if the mount point supports NFSv4 ACLs. */
1374 if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1375 ret == 0 &&
1376 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1377 MNT_ILOCK(mp);
1378 mp->mnt_flag |= MNT_NFS4ACLS;
1379 MNT_IUNLOCK(mp);
1380 }
1381
1382 /*
1383 * Lose the lock but keep the ref.
1384 */
1385 NFSVOPUNLOCK(*vpp, 0);
1386 return (0);
1387 }
1388 error = EIO;
1389
1390 bad:
1391 newnfs_disconnect(&nmp->nm_sockreq);
1392 crfree(nmp->nm_sockreq.nr_cred);
1393 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1394 mtx_destroy(&nmp->nm_mtx);
1395 FREE(nmp, M_NEWNFSMNT);
1396 FREE(nam, M_SONAME);
1397 return (error);
1398 }
1399
1400 /*
1401 * unmount system call
1402 */
1403 static int
1404 nfs_unmount(struct mount *mp, int mntflags)
1405 {
1406 struct thread *td;
1407 struct nfsmount *nmp;
1408 int error, flags = 0, trycnt = 0;
1409
1410 td = curthread;
1411
1412 if (mntflags & MNT_FORCE)
1413 flags |= FORCECLOSE;
1414 nmp = VFSTONFS(mp);
1415 /*
1416 * Goes something like this..
1417 * - Call vflush() to clear out vnodes for this filesystem
1418 * - Close the socket
1419 * - Free up the data structures
1420 */
1421 /* In the forced case, cancel any outstanding requests. */
1422 if (mntflags & MNT_FORCE) {
1423 error = newnfs_nmcancelreqs(nmp);
1424 if (error)
1425 goto out;
1426 /* For a forced close, get rid of the renew thread now */
1427 nfscl_umount(nmp, td);
1428 }
1429 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1430 do {
1431 error = vflush(mp, 1, flags, td);
1432 if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1433 (void) nfs_catnap(PSOCK, error, "newndm");
1434 } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1435 if (error)
1436 goto out;
1437
1438 /*
1439 * We are now committed to the unmount.
1440 */
1441 if ((mntflags & MNT_FORCE) == 0)
1442 nfscl_umount(nmp, td);
1443 newnfs_disconnect(&nmp->nm_sockreq);
1444 crfree(nmp->nm_sockreq.nr_cred);
1445 FREE(nmp->nm_nam, M_SONAME);
1446
1447 mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1448 mtx_destroy(&nmp->nm_mtx);
1449 FREE(nmp, M_NEWNFSMNT);
1450 out:
1451 return (error);
1452 }
1453
1454 /*
1455 * Return root of a filesystem
1456 */
1457 static int
1458 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1459 {
1460 struct vnode *vp;
1461 struct nfsmount *nmp;
1462 struct nfsnode *np;
1463 int error;
1464
1465 nmp = VFSTONFS(mp);
1466 error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1467 if (error)
1468 return error;
1469 vp = NFSTOV(np);
1470 /*
1471 * Get transfer parameters and attributes for root vnode once.
1472 */
1473 mtx_lock(&nmp->nm_mtx);
1474 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1475 mtx_unlock(&nmp->nm_mtx);
1476 ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1477 } else
1478 mtx_unlock(&nmp->nm_mtx);
1479 if (vp->v_type == VNON)
1480 vp->v_type = VDIR;
1481 vp->v_vflag |= VV_ROOT;
1482 *vpp = vp;
1483 return (0);
1484 }
1485
1486 /*
1487 * Flush out the buffer cache
1488 */
1489 /* ARGSUSED */
1490 static int
1491 nfs_sync(struct mount *mp, int waitfor)
1492 {
1493 struct vnode *vp, *mvp;
1494 struct thread *td;
1495 int error, allerror = 0;
1496
1497 td = curthread;
1498
1499 MNT_ILOCK(mp);
1500 /*
1501 * If a forced dismount is in progress, return from here so that
1502 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1503 * calling VFS_UNMOUNT().
1504 */
1505 if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1506 MNT_IUNLOCK(mp);
1507 return (EBADF);
1508 }
1509
1510 /*
1511 * Force stale buffer cache information to be flushed.
1512 */
1513 loop:
1514 MNT_VNODE_FOREACH(vp, mp, mvp) {
1515 VI_LOCK(vp);
1516 MNT_IUNLOCK(mp);
1517 /* XXX Racy bv_cnt check. */
1518 if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1519 waitfor == MNT_LAZY) {
1520 VI_UNLOCK(vp);
1521 MNT_ILOCK(mp);
1522 continue;
1523 }
1524 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1525 MNT_ILOCK(mp);
1526 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1527 goto loop;
1528 }
1529 error = VOP_FSYNC(vp, waitfor, td);
1530 if (error)
1531 allerror = error;
1532 NFSVOPUNLOCK(vp, 0);
1533 vrele(vp);
1534
1535 MNT_ILOCK(mp);
1536 }
1537 MNT_IUNLOCK(mp);
1538 return (allerror);
1539 }
1540
1541 static int
1542 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1543 {
1544 struct nfsmount *nmp = VFSTONFS(mp);
1545 struct vfsquery vq;
1546 int error;
1547
1548 bzero(&vq, sizeof(vq));
1549 switch (op) {
1550 #if 0
1551 case VFS_CTL_NOLOCKS:
1552 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1553 if (req->oldptr != NULL) {
1554 error = SYSCTL_OUT(req, &val, sizeof(val));
1555 if (error)
1556 return (error);
1557 }
1558 if (req->newptr != NULL) {
1559 error = SYSCTL_IN(req, &val, sizeof(val));
1560 if (error)
1561 return (error);
1562 if (val)
1563 nmp->nm_flag |= NFSMNT_NOLOCKS;
1564 else
1565 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1566 }
1567 break;
1568 #endif
1569 case VFS_CTL_QUERY:
1570 mtx_lock(&nmp->nm_mtx);
1571 if (nmp->nm_state & NFSSTA_TIMEO)
1572 vq.vq_flags |= VQ_NOTRESP;
1573 mtx_unlock(&nmp->nm_mtx);
1574 #if 0
1575 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1576 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1577 vq.vq_flags |= VQ_NOTRESPLOCK;
1578 #endif
1579 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1580 break;
1581 case VFS_CTL_TIMEO:
1582 if (req->oldptr != NULL) {
1583 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1584 sizeof(nmp->nm_tprintf_initial_delay));
1585 if (error)
1586 return (error);
1587 }
1588 if (req->newptr != NULL) {
1589 error = vfs_suser(mp, req->td);
1590 if (error)
1591 return (error);
1592 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1593 sizeof(nmp->nm_tprintf_initial_delay));
1594 if (error)
1595 return (error);
1596 if (nmp->nm_tprintf_initial_delay < 0)
1597 nmp->nm_tprintf_initial_delay = 0;
1598 }
1599 break;
1600 default:
1601 return (ENOTSUP);
1602 }
1603 return (0);
1604 }
1605
1606 /*
1607 * Extract the information needed by the nlm from the nfs vnode.
1608 */
1609 static void
1610 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1611 struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1612 struct timeval *timeop)
1613 {
1614 struct nfsmount *nmp;
1615 struct nfsnode *np = VTONFS(vp);
1616
1617 nmp = VFSTONFS(vp->v_mount);
1618 if (fhlenp != NULL)
1619 *fhlenp = (size_t)np->n_fhp->nfh_len;
1620 if (fhp != NULL)
1621 bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1622 if (sp != NULL)
1623 bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1624 if (is_v3p != NULL)
1625 *is_v3p = NFS_ISV3(vp);
1626 if (sizep != NULL)
1627 *sizep = np->n_size;
1628 if (timeop != NULL) {
1629 timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1630 timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1631 }
1632 }
1633
Cache object: 4b937e67de574ce3340f3a7b444520d0
|