1 /*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: src/sys/nfsclient/nfs_vfsops.c,v 1.219 2008/11/03 10:38:00 dfr Exp $");
37
38
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/lock.h>
48 #include <sys/malloc.h>
49 #include <sys/mbuf.h>
50 #include <sys/module.h>
51 #include <sys/mount.h>
52 #include <sys/proc.h>
53 #include <sys/socket.h>
54 #include <sys/socketvar.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/vnode.h>
58 #include <sys/signalvar.h>
59 #include <sys/vimage.h>
60
61 #include <vm/vm.h>
62 #include <vm/vm_extern.h>
63 #include <vm/uma.h>
64
65 #include <net/if.h>
66 #include <net/route.h>
67 #include <netinet/in.h>
68
69 #include <rpc/rpcclnt.h>
70 #include <rpc/rpc.h>
71
72 #include <nfs/rpcv2.h>
73 #include <nfs/nfsproto.h>
74 #include <nfsclient/nfs.h>
75 #include <nfsclient/nfsnode.h>
76 #include <nfsclient/nfsmount.h>
77 #include <nfs/xdr_subs.h>
78 #include <nfsclient/nfsm_subs.h>
79 #include <nfsclient/nfsdiskless.h>
80
81 MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
82 MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
83 MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
84 MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
85 MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
86
87 uma_zone_t nfsmount_zone;
88
89 struct nfsstats nfsstats;
90
91 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
92 SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
93 &nfsstats, nfsstats, "S,nfsstats");
94 static int nfs_ip_paranoia = 1;
95 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
96 &nfs_ip_paranoia, 0,
97 "Disallow accepting replies from IPs which differ from those sent");
98 #ifdef NFS_DEBUG
99 int nfs_debug;
100 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
101 "Toggle debug flag");
102 #endif
103 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
104 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
105 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0,
106 "Delay before printing \"nfs server not responding\" messages");
107 /* how long between console messages "nfs server foo not responding" */
108 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
109 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
110 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0,
111 "Delay between printing \"nfs server not responding\" messages");
112
113 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
114 struct nfs_args *argp, const char *hostname);
115 static int mountnfs(struct nfs_args *, struct mount *,
116 struct sockaddr *, char *, struct vnode **,
117 struct ucred *cred);
118 static vfs_mount_t nfs_mount;
119 static vfs_cmount_t nfs_cmount;
120 static vfs_unmount_t nfs_unmount;
121 static vfs_root_t nfs_root;
122 static vfs_statfs_t nfs_statfs;
123 static vfs_sync_t nfs_sync;
124 static vfs_sysctl_t nfs_sysctl;
125
126 static int fake_wchan;
127
128 /*
129 * nfs vfs operations.
130 */
131 static struct vfsops nfs_vfsops = {
132 .vfs_init = nfs_init,
133 .vfs_mount = nfs_mount,
134 .vfs_cmount = nfs_cmount,
135 .vfs_root = nfs_root,
136 .vfs_statfs = nfs_statfs,
137 .vfs_sync = nfs_sync,
138 .vfs_uninit = nfs_uninit,
139 .vfs_unmount = nfs_unmount,
140 .vfs_sysctl = nfs_sysctl,
141 };
142 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
143
144 /* So that loader and kldload(2) can find us, wherever we are.. */
145 MODULE_VERSION(nfs, 1);
146 #ifndef NFS_LEGACYRPC
147 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
148 #endif
149 #ifdef KGSSAPI
150 MODULE_DEPEND(nfs, kgssapi, 1, 1, 1);
151 #endif
152
153 static struct nfs_rpcops nfs_rpcops = {
154 nfs_readrpc,
155 nfs_writerpc,
156 nfs_writebp,
157 nfs_readlinkrpc,
158 nfs_invaldir,
159 nfs_commit,
160 };
161
162 /*
163 * This structure must be filled in by a primary bootstrap or bootstrap
164 * server for a diskless/dataless machine. It is initialized below just
165 * to ensure that it is allocated to initialized data (.data not .bss).
166 */
167 struct nfs_diskless nfs_diskless = { { { 0 } } };
168 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
169 int nfs_diskless_valid = 0;
170
171 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
172 &nfs_diskless_valid, 0,
173 "Has the diskless struct been filled correctly");
174
175 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
176 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
177
178 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
179 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
180 "%Ssockaddr_in", "Diskless root nfs address");
181
182
183 void nfsargs_ntoh(struct nfs_args *);
184 static int nfs_mountdiskless(char *,
185 struct sockaddr_in *, struct nfs_args *,
186 struct thread *, struct vnode **, struct mount *);
187 static void nfs_convert_diskless(void);
188 static void nfs_convert_oargs(struct nfs_args *args,
189 struct onfs_args *oargs);
190
191 int
192 nfs_iosize(struct nfsmount *nmp)
193 {
194 int iosize;
195
196 /*
197 * Calculate the size used for io buffers. Use the larger
198 * of the two sizes to minimise nfs requests but make sure
199 * that it is at least one VM page to avoid wasting buffer
200 * space.
201 */
202 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
203 iosize = imax(iosize, PAGE_SIZE);
204 return (iosize);
205 }
206
207 static void
208 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
209 {
210
211 args->version = NFS_ARGSVERSION;
212 args->addr = oargs->addr;
213 args->addrlen = oargs->addrlen;
214 args->sotype = oargs->sotype;
215 args->proto = oargs->proto;
216 args->fh = oargs->fh;
217 args->fhsize = oargs->fhsize;
218 args->flags = oargs->flags;
219 args->wsize = oargs->wsize;
220 args->rsize = oargs->rsize;
221 args->readdirsize = oargs->readdirsize;
222 args->timeo = oargs->timeo;
223 args->retrans = oargs->retrans;
224 args->maxgrouplist = oargs->maxgrouplist;
225 args->readahead = oargs->readahead;
226 args->deadthresh = oargs->deadthresh;
227 args->hostname = oargs->hostname;
228 }
229
230 static void
231 nfs_convert_diskless(void)
232 {
233
234 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
235 sizeof(struct ifaliasreq));
236 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
237 sizeof(struct sockaddr_in));
238 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
239 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
240 nfsv3_diskless.root_fhsize = NFSX_V3FH;
241 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
242 } else {
243 nfsv3_diskless.root_fhsize = NFSX_V2FH;
244 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
245 }
246 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
247 sizeof(struct sockaddr_in));
248 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
249 nfsv3_diskless.root_time = nfs_diskless.root_time;
250 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
251 MAXHOSTNAMELEN);
252 nfs_diskless_valid = 3;
253 }
254
255 /*
256 * nfs statfs call
257 */
258 static int
259 nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
260 {
261 struct vnode *vp;
262 struct nfs_statfs *sfp;
263 caddr_t bpos, dpos;
264 struct nfsmount *nmp = VFSTONFS(mp);
265 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
266 struct mbuf *mreq, *mrep, *md, *mb;
267 struct nfsnode *np;
268 u_quad_t tquad;
269
270 #ifndef nolint
271 sfp = NULL;
272 #endif
273 error = vfs_busy(mp, MBF_NOWAIT);
274 if (error)
275 return (error);
276 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
277 if (error) {
278 vfs_unbusy(mp);
279 return (error);
280 }
281 vp = NFSTOV(np);
282 mtx_lock(&nmp->nm_mtx);
283 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
284 mtx_unlock(&nmp->nm_mtx);
285 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
286 } else
287 mtx_unlock(&nmp->nm_mtx);
288 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
289 mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
290 mb = mreq;
291 bpos = mtod(mb, caddr_t);
292 nfsm_fhtom(vp, v3);
293 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
294 if (v3)
295 nfsm_postop_attr(vp, retattr);
296 if (error) {
297 if (mrep != NULL)
298 m_freem(mrep);
299 goto nfsmout;
300 }
301 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
302 mtx_lock(&nmp->nm_mtx);
303 sbp->f_iosize = nfs_iosize(nmp);
304 mtx_unlock(&nmp->nm_mtx);
305 if (v3) {
306 sbp->f_bsize = NFS_FABLKSIZE;
307 tquad = fxdr_hyper(&sfp->sf_tbytes);
308 sbp->f_blocks = tquad / NFS_FABLKSIZE;
309 tquad = fxdr_hyper(&sfp->sf_fbytes);
310 sbp->f_bfree = tquad / NFS_FABLKSIZE;
311 tquad = fxdr_hyper(&sfp->sf_abytes);
312 sbp->f_bavail = tquad / NFS_FABLKSIZE;
313 sbp->f_files = (fxdr_unsigned(int32_t,
314 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
315 sbp->f_ffree = (fxdr_unsigned(int32_t,
316 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
317 } else {
318 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
319 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
320 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
321 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
322 sbp->f_files = 0;
323 sbp->f_ffree = 0;
324 }
325 m_freem(mrep);
326 nfsmout:
327 vput(vp);
328 vfs_unbusy(mp);
329 return (error);
330 }
331
332 /*
333 * nfs version 3 fsinfo rpc call
334 */
335 int
336 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
337 struct thread *td)
338 {
339 struct nfsv3_fsinfo *fsp;
340 u_int32_t pref, max;
341 caddr_t bpos, dpos;
342 int error = 0, retattr;
343 struct mbuf *mreq, *mrep, *md, *mb;
344 u_int64_t maxfsize;
345
346 nfsstats.rpccnt[NFSPROC_FSINFO]++;
347 mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
348 mb = mreq;
349 bpos = mtod(mb, caddr_t);
350 nfsm_fhtom(vp, 1);
351 nfsm_request(vp, NFSPROC_FSINFO, td, cred);
352 nfsm_postop_attr(vp, retattr);
353 if (!error) {
354 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
355 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
356 mtx_lock(&nmp->nm_mtx);
357 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
358 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
359 ~(NFS_FABLKSIZE - 1);
360 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
361 if (max < nmp->nm_wsize && max > 0) {
362 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
363 if (nmp->nm_wsize == 0)
364 nmp->nm_wsize = max;
365 }
366 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
367 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
368 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
369 ~(NFS_FABLKSIZE - 1);
370 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
371 if (max < nmp->nm_rsize && max > 0) {
372 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
373 if (nmp->nm_rsize == 0)
374 nmp->nm_rsize = max;
375 }
376 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
377 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
378 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
379 ~(NFS_DIRBLKSIZ - 1);
380 if (max < nmp->nm_readdirsize && max > 0) {
381 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
382 if (nmp->nm_readdirsize == 0)
383 nmp->nm_readdirsize = max;
384 }
385 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
386 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
387 nmp->nm_maxfilesize = maxfsize;
388 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
389 nmp->nm_state |= NFSSTA_GOTFSINFO;
390 mtx_unlock(&nmp->nm_mtx);
391 }
392 m_freem(mrep);
393 nfsmout:
394 return (error);
395 }
396
397 /*
398 * Mount a remote root fs via. nfs. This depends on the info in the
399 * nfs_diskless structure that has been filled in properly by some primary
400 * bootstrap.
401 * It goes something like this:
402 * - do enough of "ifconfig" by calling ifioctl() so that the system
403 * can talk to the server
404 * - If nfs_diskless.mygateway is filled in, use that address as
405 * a default gateway.
406 * - build the rootfs mount point and call mountnfs() to do the rest.
407 *
408 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
409 * structure, as well as other global NFS client variables here, as
410 * nfs_mountroot() will be called once in the boot before any other NFS
411 * client activity occurs.
412 */
413 int
414 nfs_mountroot(struct mount *mp, struct thread *td)
415 {
416 INIT_VPROCG(TD_TO_VPROCG(td));
417 struct nfsv3_diskless *nd = &nfsv3_diskless;
418 struct socket *so;
419 struct vnode *vp;
420 struct ifreq ir;
421 int error, i;
422 u_long l;
423 char buf[128];
424 char *cp;
425
426 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
427 bootpc_init(); /* use bootp to get nfs_diskless filled in */
428 #elif defined(NFS_ROOT)
429 nfs_setup_diskless();
430 #endif
431
432 if (nfs_diskless_valid == 0)
433 return (-1);
434 if (nfs_diskless_valid == 1)
435 nfs_convert_diskless();
436
437 /*
438 * XXX splnet, so networks will receive...
439 */
440 splnet();
441
442 /*
443 * Do enough of ifconfig(8) so that the critical net interface can
444 * talk to the server.
445 */
446 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
447 td->td_ucred, td);
448 if (error)
449 panic("nfs_mountroot: socreate(%04x): %d",
450 nd->myif.ifra_addr.sa_family, error);
451
452 #if 0 /* XXX Bad idea */
453 /*
454 * We might not have been told the right interface, so we pass
455 * over the first ten interfaces of the same kind, until we get
456 * one of them configured.
457 */
458
459 for (i = strlen(nd->myif.ifra_name) - 1;
460 nd->myif.ifra_name[i] >= '' &&
461 nd->myif.ifra_name[i] <= '9';
462 nd->myif.ifra_name[i] ++) {
463 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
464 if(!error)
465 break;
466 }
467 #endif
468 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
469 if (error)
470 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
471 if ((cp = getenv("boot.netif.mtu")) != NULL) {
472 ir.ifr_mtu = strtol(cp, NULL, 10);
473 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
474 freeenv(cp);
475 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
476 if (error)
477 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
478 }
479 soclose(so);
480
481 /*
482 * If the gateway field is filled in, set it as the default route.
483 * Note that pxeboot will set a default route of 0 if the route
484 * is not set by the DHCP server. Check also for a value of 0
485 * to avoid panicking inappropriately in that situation.
486 */
487 if (nd->mygateway.sin_len != 0 &&
488 nd->mygateway.sin_addr.s_addr != 0) {
489 struct sockaddr_in mask, sin;
490
491 bzero((caddr_t)&mask, sizeof(mask));
492 sin = mask;
493 sin.sin_family = AF_INET;
494 sin.sin_len = sizeof(sin);
495 /* XXX MRT use table 0 for this sort of thing */
496 error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
497 (struct sockaddr *)&nd->mygateway,
498 (struct sockaddr *)&mask,
499 RTF_UP | RTF_GATEWAY, NULL);
500 if (error)
501 panic("nfs_mountroot: RTM_ADD: %d", error);
502 }
503
504 /*
505 * Create the rootfs mount point.
506 */
507 nd->root_args.fh = nd->root_fh;
508 nd->root_args.fhsize = nd->root_fhsize;
509 l = ntohl(nd->root_saddr.sin_addr.s_addr);
510 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
511 (l >> 24) & 0xff, (l >> 16) & 0xff,
512 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
513 printf("NFS ROOT: %s\n", buf);
514 nd->root_args.hostname = buf;
515 if ((error = nfs_mountdiskless(buf,
516 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
517 return (error);
518 }
519
520 /*
521 * This is not really an nfs issue, but it is much easier to
522 * set hostname here and then let the "/etc/rc.xxx" files
523 * mount the right /var based upon its preset value.
524 */
525 mtx_lock(&hostname_mtx);
526 bcopy(nd->my_hostnam, V_hostname, MAXHOSTNAMELEN);
527 V_hostname[MAXHOSTNAMELEN - 1] = '\0';
528 for (i = 0; i < MAXHOSTNAMELEN; i++)
529 if (V_hostname[i] == '\0')
530 break;
531 mtx_unlock(&hostname_mtx);
532 inittodr(ntohl(nd->root_time));
533 return (0);
534 }
535
536 /*
537 * Internal version of mount system call for diskless setup.
538 */
539 static int
540 nfs_mountdiskless(char *path,
541 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
542 struct vnode **vpp, struct mount *mp)
543 {
544 struct sockaddr *nam;
545 int error;
546
547 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
548 if ((error = mountnfs(args, mp, nam, path, vpp,
549 td->td_ucred)) != 0) {
550 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
551 return (error);
552 }
553 return (0);
554 }
555
556 #ifndef NFS_LEGACYRPC
557 static int
558 nfs_sec_name_to_num(char *sec)
559 {
560 if (!strcmp(sec, "krb5"))
561 return (RPCSEC_GSS_KRB5);
562 if (!strcmp(sec, "krb5i"))
563 return (RPCSEC_GSS_KRB5I);
564 if (!strcmp(sec, "krb5p"))
565 return (RPCSEC_GSS_KRB5P);
566 if (!strcmp(sec, "sys"))
567 return (AUTH_SYS);
568 /*
569 * Userland should validate the string but we will try and
570 * cope with unexpected values.
571 */
572 return (AUTH_SYS);
573 }
574 #endif
575
576 static void
577 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
578 const char *hostname)
579 {
580 int s;
581 int adjsock;
582 int maxio;
583 char *p;
584 #ifndef NFS_LEGACYRPC
585 char *secname;
586 char *principal;
587 #endif
588
589 s = splnet();
590
591 /*
592 * Set read-only flag if requested; otherwise, clear it if this is
593 * an update. If this is not an update, then either the read-only
594 * flag is already clear, or this is a root mount and it was set
595 * intentionally at some previous point.
596 */
597 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
598 MNT_ILOCK(mp);
599 mp->mnt_flag |= MNT_RDONLY;
600 MNT_IUNLOCK(mp);
601 } else if (mp->mnt_flag & MNT_UPDATE) {
602 MNT_ILOCK(mp);
603 mp->mnt_flag &= ~MNT_RDONLY;
604 MNT_IUNLOCK(mp);
605 }
606
607 /*
608 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
609 * no sense in that context. Also, set up appropriate retransmit
610 * and soft timeout behavior.
611 */
612 if (argp->sotype == SOCK_STREAM) {
613 nmp->nm_flag &= ~NFSMNT_NOCONN;
614 nmp->nm_flag |= NFSMNT_DUMBTIMR;
615 nmp->nm_timeo = NFS_MAXTIMEO;
616 nmp->nm_retry = NFS_RETRANS_TCP;
617 }
618
619 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
620 if ((argp->flags & NFSMNT_NFSV3) == 0)
621 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
622
623 /* Re-bind if rsrvd port requested and wasn't on one */
624 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
625 && (argp->flags & NFSMNT_RESVPORT);
626 /* Also re-bind if we're switching to/from a connected UDP socket */
627 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
628 (argp->flags & NFSMNT_NOCONN));
629
630 /* Update flags atomically. Don't change the lock bits. */
631 nmp->nm_flag = argp->flags | nmp->nm_flag;
632 splx(s);
633
634 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
635 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
636 if (nmp->nm_timeo < NFS_MINTIMEO)
637 nmp->nm_timeo = NFS_MINTIMEO;
638 else if (nmp->nm_timeo > NFS_MAXTIMEO)
639 nmp->nm_timeo = NFS_MAXTIMEO;
640 }
641
642 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
643 nmp->nm_retry = argp->retrans;
644 if (nmp->nm_retry > NFS_MAXREXMIT)
645 nmp->nm_retry = NFS_MAXREXMIT;
646 }
647
648 if (argp->flags & NFSMNT_NFSV3) {
649 if (argp->sotype == SOCK_DGRAM)
650 maxio = NFS_MAXDGRAMDATA;
651 else
652 maxio = NFS_MAXDATA;
653 } else
654 maxio = NFS_V2MAXDATA;
655
656 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
657 nmp->nm_wsize = argp->wsize;
658 /* Round down to multiple of blocksize */
659 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
660 if (nmp->nm_wsize <= 0)
661 nmp->nm_wsize = NFS_FABLKSIZE;
662 }
663 if (nmp->nm_wsize > maxio)
664 nmp->nm_wsize = maxio;
665 if (nmp->nm_wsize > MAXBSIZE)
666 nmp->nm_wsize = MAXBSIZE;
667
668 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
669 nmp->nm_rsize = argp->rsize;
670 /* Round down to multiple of blocksize */
671 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
672 if (nmp->nm_rsize <= 0)
673 nmp->nm_rsize = NFS_FABLKSIZE;
674 }
675 if (nmp->nm_rsize > maxio)
676 nmp->nm_rsize = maxio;
677 if (nmp->nm_rsize > MAXBSIZE)
678 nmp->nm_rsize = MAXBSIZE;
679
680 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
681 nmp->nm_readdirsize = argp->readdirsize;
682 }
683 if (nmp->nm_readdirsize > maxio)
684 nmp->nm_readdirsize = maxio;
685 if (nmp->nm_readdirsize > nmp->nm_rsize)
686 nmp->nm_readdirsize = nmp->nm_rsize;
687
688 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
689 nmp->nm_acregmin = argp->acregmin;
690 else
691 nmp->nm_acregmin = NFS_MINATTRTIMO;
692 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
693 nmp->nm_acregmax = argp->acregmax;
694 else
695 nmp->nm_acregmax = NFS_MAXATTRTIMO;
696 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
697 nmp->nm_acdirmin = argp->acdirmin;
698 else
699 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
700 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
701 nmp->nm_acdirmax = argp->acdirmax;
702 else
703 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
704 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
705 nmp->nm_acdirmin = nmp->nm_acdirmax;
706 if (nmp->nm_acregmin > nmp->nm_acregmax)
707 nmp->nm_acregmin = nmp->nm_acregmax;
708
709 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
710 if (argp->maxgrouplist <= NFS_MAXGRPS)
711 nmp->nm_numgrps = argp->maxgrouplist;
712 else
713 nmp->nm_numgrps = NFS_MAXGRPS;
714 }
715 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
716 if (argp->readahead <= NFS_MAXRAHEAD)
717 nmp->nm_readahead = argp->readahead;
718 else
719 nmp->nm_readahead = NFS_MAXRAHEAD;
720 }
721 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
722 if (argp->wcommitsize < nmp->nm_wsize)
723 nmp->nm_wcommitsize = nmp->nm_wsize;
724 else
725 nmp->nm_wcommitsize = argp->wcommitsize;
726 }
727 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
728 if (argp->deadthresh <= NFS_MAXDEADTHRESH)
729 nmp->nm_deadthresh = argp->deadthresh;
730 else
731 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
732 }
733
734 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
735 (nmp->nm_soproto != argp->proto));
736 nmp->nm_sotype = argp->sotype;
737 nmp->nm_soproto = argp->proto;
738
739 if (
740 #ifdef NFS_LEGACYRPC
741 nmp->nm_so
742 #else
743 nmp->nm_client
744 #endif
745 && adjsock) {
746 nfs_safedisconnect(nmp);
747 if (nmp->nm_sotype == SOCK_DGRAM)
748 while (nfs_connect(nmp, NULL)) {
749 printf("nfs_args: retrying connect\n");
750 (void) tsleep(&fake_wchan, PSOCK, "nfscon", hz);
751 }
752 }
753
754 if (hostname) {
755 strlcpy(nmp->nm_hostname, hostname,
756 sizeof(nmp->nm_hostname));
757 p = strchr(nmp->nm_hostname, ':');
758 if (p)
759 *p = '\0';
760 }
761
762 #ifndef NFS_LEGACYRPC
763 if (vfs_getopt(mp->mnt_optnew, "sec",
764 (void **) &secname, NULL) == 0) {
765 nmp->nm_secflavor = nfs_sec_name_to_num(secname);
766 } else {
767 nmp->nm_secflavor = AUTH_SYS;
768 }
769
770 if (vfs_getopt(mp->mnt_optnew, "principal",
771 (void **) &principal, NULL) == 0) {
772 strlcpy(nmp->nm_principal, principal,
773 sizeof(nmp->nm_principal));
774 } else {
775 snprintf(nmp->nm_principal, sizeof(nmp->nm_principal),
776 "nfs@%s", nmp->nm_hostname);
777 }
778 #endif
779 }
780
781 static const char *nfs_opts[] = { "from", "nfs_args",
782 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
783 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
784 "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
785 "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
786 "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax",
787 "deadthresh", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
788 "maxgroups", "principal",
789 NULL };
790
791 /*
792 * VFS Operations.
793 *
794 * mount system call
795 * It seems a bit dumb to copyinstr() the host and path here and then
796 * bcopy() them in mountnfs(), but I wanted to detect errors before
797 * doing the sockargs() call because sockargs() allocates an mbuf and
798 * an error after that means that I have to release the mbuf.
799 */
800 /* ARGSUSED */
801 static int
802 nfs_mount(struct mount *mp, struct thread *td)
803 {
804 struct nfs_args args = {
805 .version = NFS_ARGSVERSION,
806 .addr = NULL,
807 .addrlen = sizeof (struct sockaddr_in),
808 .sotype = SOCK_STREAM,
809 .proto = 0,
810 .fh = NULL,
811 .fhsize = 0,
812 .flags = NFSMNT_RESVPORT,
813 .wsize = NFS_WSIZE,
814 .rsize = NFS_RSIZE,
815 .readdirsize = NFS_READDIRSIZE,
816 .timeo = 10,
817 .retrans = NFS_RETRANS,
818 .maxgrouplist = NFS_MAXGRPS,
819 .readahead = NFS_DEFRAHEAD,
820 .wcommitsize = 0, /* was: NQ_DEFLEASE */
821 .deadthresh = NFS_MAXDEADTHRESH, /* was: NQ_DEADTHRESH */
822 .hostname = NULL,
823 /* args version 4 */
824 .acregmin = NFS_MINATTRTIMO,
825 .acregmax = NFS_MAXATTRTIMO,
826 .acdirmin = NFS_MINDIRATTRTIMO,
827 .acdirmax = NFS_MAXDIRATTRTIMO,
828 };
829 int error, ret, has_nfs_args_opt;
830 int has_addr_opt, has_fh_opt, has_hostname_opt;
831 struct sockaddr *nam;
832 struct vnode *vp;
833 char hst[MNAMELEN];
834 size_t len;
835 u_char nfh[NFSX_V3FHMAX];
836 char *opt;
837
838 has_nfs_args_opt = 0;
839 has_addr_opt = 0;
840 has_fh_opt = 0;
841 has_hostname_opt = 0;
842
843 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
844 error = EINVAL;
845 goto out;
846 }
847
848 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
849 error = nfs_mountroot(mp, td);
850 goto out;
851 }
852
853 /*
854 * The old mount_nfs program passed the struct nfs_args
855 * from userspace to kernel. The new mount_nfs program
856 * passes string options via nmount() from userspace to kernel
857 * and we populate the struct nfs_args in the kernel.
858 */
859 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
860 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
861 sizeof args);
862 if (error)
863 goto out;
864
865 if (args.version != NFS_ARGSVERSION) {
866 error = EPROGMISMATCH;
867 goto out;
868 }
869 has_nfs_args_opt = 1;
870 }
871
872 if (vfs_getopt(mp->mnt_optnew, "dumbtimer", NULL, NULL) == 0)
873 args.flags |= NFSMNT_DUMBTIMR;
874 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
875 args.flags |= NFSMNT_NOCONN;
876 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
877 args.flags |= NFSMNT_NOCONN;
878 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
879 args.flags |= NFSMNT_NOLOCKD;
880 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
881 args.flags &= ~NFSMNT_NOLOCKD;
882 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
883 args.flags |= NFSMNT_INT;
884 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
885 args.flags |= NFSMNT_RDIRPLUS;
886 if (vfs_getopt( |