1 /*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: releng/10.0/sys/nfsclient/nfs_vfsops.c 249630 2013-04-18 23:20:16Z rmacklem $");
37
38
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/jail.h>
48 #include <sys/limits.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/mount.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/syslog.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <net/vnet.h>
70
71 #include <netinet/in.h>
72
73 #include <rpc/rpc.h>
74
75 #include <nfs/nfsproto.h>
76 #include <nfsclient/nfs.h>
77 #include <nfsclient/nfsnode.h>
78 #include <nfsclient/nfsmount.h>
79 #include <nfs/xdr_subs.h>
80 #include <nfsclient/nfsm_subs.h>
81 #include <nfs/nfsdiskless.h>
82
83 FEATURE(nfsclient, "NFS client");
84
85 MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
86 MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
87 MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
88 MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
89 MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
90
91 uma_zone_t nfsmount_zone;
92
93 struct nfsstats nfsstats;
94
95 SYSCTL_NODE(_vfs, OID_AUTO, oldnfs, CTLFLAG_RW, 0, "Old NFS filesystem");
96 SYSCTL_STRUCT(_vfs_oldnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
97 &nfsstats, nfsstats, "S,nfsstats");
98 static int nfs_ip_paranoia = 1;
99 SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
100 &nfs_ip_paranoia, 0,
101 "Disallow accepting replies from IPs which differ from those sent");
102 #ifdef NFS_DEBUG
103 int nfs_debug;
104 SYSCTL_INT(_vfs_oldnfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
105 "Toggle debug flag");
106 #endif
107 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
108 SYSCTL_INT(_vfs_oldnfs, NFS_TPRINTF_INITIAL_DELAY,
109 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0,
110 "Delay before printing \"nfs server not responding\" messages");
111 /* how long between console messages "nfs server foo not responding" */
112 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
113 SYSCTL_INT(_vfs_oldnfs, NFS_TPRINTF_DELAY,
114 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0,
115 "Delay between printing \"nfs server not responding\" messages");
116
117 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
118 struct nfs_args *argp, const char *hostname);
119 static int mountnfs(struct nfs_args *, struct mount *,
120 struct sockaddr *, char *, struct vnode **,
121 struct ucred *cred, int, int);
122 static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
123 struct sockaddr_storage *, int *, off_t *,
124 struct timeval *);
125 static vfs_mount_t nfs_mount;
126 static vfs_cmount_t nfs_cmount;
127 static vfs_unmount_t nfs_unmount;
128 static vfs_root_t nfs_root;
129 static vfs_statfs_t nfs_statfs;
130 static vfs_sync_t nfs_sync;
131 static vfs_sysctl_t nfs_sysctl;
132
133 static int fake_wchan;
134
135 /*
136 * nfs vfs operations.
137 */
138 static struct vfsops nfs_vfsops = {
139 .vfs_init = nfs_init,
140 .vfs_mount = nfs_mount,
141 .vfs_cmount = nfs_cmount,
142 .vfs_root = nfs_root,
143 .vfs_statfs = nfs_statfs,
144 .vfs_sync = nfs_sync,
145 .vfs_uninit = nfs_uninit,
146 .vfs_unmount = nfs_unmount,
147 .vfs_sysctl = nfs_sysctl,
148 };
149 VFS_SET(nfs_vfsops, oldnfs, VFCF_NETWORK | VFCF_SBDRY);
150
151 /* So that loader and kldload(2) can find us, wherever we are.. */
152 MODULE_VERSION(oldnfs, 1);
153 MODULE_DEPEND(oldnfs, krpc, 1, 1, 1);
154 #ifdef KGSSAPI
155 MODULE_DEPEND(oldnfs, kgssapi, 1, 1, 1);
156 #endif
157 MODULE_DEPEND(oldnfs, nfs_common, 1, 1, 1);
158 MODULE_DEPEND(oldnfs, nfslock, 1, 1, 1);
159
160 static struct nfs_rpcops nfs_rpcops = {
161 nfs_readrpc,
162 nfs_writerpc,
163 nfs_writebp,
164 nfs_readlinkrpc,
165 nfs_invaldir,
166 nfs_commit,
167 };
168
169 /*
170 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
171 * can be shared by both NFS clients. It is declared here so that it
172 * will be defined for kernels built without NFS_ROOT, although it
173 * isn't used in that case.
174 */
175 #ifndef NFS_ROOT
176 struct nfs_diskless nfs_diskless = { { { 0 } } };
177 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
178 int nfs_diskless_valid = 0;
179 #endif
180
181 SYSCTL_INT(_vfs_oldnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
182 &nfs_diskless_valid, 0,
183 "Has the diskless struct been filled correctly");
184
185 SYSCTL_STRING(_vfs_oldnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
186 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
187
188 SYSCTL_OPAQUE(_vfs_oldnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
189 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
190 "%Ssockaddr_in", "Diskless root nfs address");
191
192
193 void nfsargs_ntoh(struct nfs_args *);
194 static int nfs_mountdiskless(char *,
195 struct sockaddr_in *, struct nfs_args *,
196 struct thread *, struct vnode **, struct mount *);
197 static void nfs_convert_diskless(void);
198 static void nfs_convert_oargs(struct nfs_args *args,
199 struct onfs_args *oargs);
200
201 int
202 nfs_iosize(struct nfsmount *nmp)
203 {
204 int iosize;
205
206 /*
207 * Calculate the size used for io buffers. Use the larger
208 * of the two sizes to minimise nfs requests but make sure
209 * that it is at least one VM page to avoid wasting buffer
210 * space.
211 */
212 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
213 iosize = imax(iosize, PAGE_SIZE);
214 return (iosize);
215 }
216
217 static void
218 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
219 {
220
221 args->version = NFS_ARGSVERSION;
222 args->addr = oargs->addr;
223 args->addrlen = oargs->addrlen;
224 args->sotype = oargs->sotype;
225 args->proto = oargs->proto;
226 args->fh = oargs->fh;
227 args->fhsize = oargs->fhsize;
228 args->flags = oargs->flags;
229 args->wsize = oargs->wsize;
230 args->rsize = oargs->rsize;
231 args->readdirsize = oargs->readdirsize;
232 args->timeo = oargs->timeo;
233 args->retrans = oargs->retrans;
234 args->maxgrouplist = oargs->maxgrouplist;
235 args->readahead = oargs->readahead;
236 args->deadthresh = oargs->deadthresh;
237 args->hostname = oargs->hostname;
238 }
239
240 static void
241 nfs_convert_diskless(void)
242 {
243
244 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
245 sizeof(struct ifaliasreq));
246 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
247 sizeof(struct sockaddr_in));
248 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
249 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
250 nfsv3_diskless.root_fhsize = NFSX_V3FH;
251 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
252 } else {
253 nfsv3_diskless.root_fhsize = NFSX_V2FH;
254 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
255 }
256 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
257 sizeof(struct sockaddr_in));
258 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
259 nfsv3_diskless.root_time = nfs_diskless.root_time;
260 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
261 MAXHOSTNAMELEN);
262 nfs_diskless_valid = 3;
263 }
264
265 /*
266 * nfs statfs call
267 */
268 static int
269 nfs_statfs(struct mount *mp, struct statfs *sbp)
270 {
271 struct vnode *vp;
272 struct thread *td;
273 struct nfs_statfs *sfp;
274 caddr_t bpos, dpos;
275 struct nfsmount *nmp = VFSTONFS(mp);
276 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
277 struct mbuf *mreq, *mrep, *md, *mb;
278 struct nfsnode *np;
279 u_quad_t tquad;
280
281 td = curthread;
282 #ifndef nolint
283 sfp = NULL;
284 #endif
285 error = vfs_busy(mp, MBF_NOWAIT);
286 if (error)
287 return (error);
288 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
289 if (error) {
290 vfs_unbusy(mp);
291 return (error);
292 }
293 vp = NFSTOV(np);
294 mtx_lock(&nmp->nm_mtx);
295 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
296 mtx_unlock(&nmp->nm_mtx);
297 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
298 } else
299 mtx_unlock(&nmp->nm_mtx);
300 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
301 mreq = m_get2(NFSX_FH(v3), M_WAITOK, MT_DATA, 0);
302 mb = mreq;
303 bpos = mtod(mb, caddr_t);
304 nfsm_fhtom(vp, v3);
305 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
306 if (v3)
307 nfsm_postop_attr(vp, retattr);
308 if (error) {
309 if (mrep != NULL)
310 m_freem(mrep);
311 goto nfsmout;
312 }
313 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
314 mtx_lock(&nmp->nm_mtx);
315 sbp->f_iosize = nfs_iosize(nmp);
316 mtx_unlock(&nmp->nm_mtx);
317 if (v3) {
318 sbp->f_bsize = NFS_FABLKSIZE;
319 tquad = fxdr_hyper(&sfp->sf_tbytes);
320 sbp->f_blocks = tquad / NFS_FABLKSIZE;
321 tquad = fxdr_hyper(&sfp->sf_fbytes);
322 sbp->f_bfree = tquad / NFS_FABLKSIZE;
323 tquad = fxdr_hyper(&sfp->sf_abytes);
324 sbp->f_bavail = tquad / NFS_FABLKSIZE;
325 sbp->f_files = (fxdr_unsigned(int32_t,
326 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
327 sbp->f_ffree = (fxdr_unsigned(int32_t,
328 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
329 } else {
330 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
331 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
332 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
333 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
334 sbp->f_files = 0;
335 sbp->f_ffree = 0;
336 }
337 m_freem(mrep);
338 nfsmout:
339 vput(vp);
340 vfs_unbusy(mp);
341 return (error);
342 }
343
344 /*
345 * nfs version 3 fsinfo rpc call
346 */
347 int
348 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
349 struct thread *td)
350 {
351 struct nfsv3_fsinfo *fsp;
352 u_int32_t pref, max;
353 caddr_t bpos, dpos;
354 int error = 0, retattr;
355 struct mbuf *mreq, *mrep, *md, *mb;
356 u_int64_t maxfsize;
357
358 nfsstats.rpccnt[NFSPROC_FSINFO]++;
359 mreq = m_get2(NFSX_FH(1), M_WAITOK, MT_DATA, 0);
360 mb = mreq;
361 bpos = mtod(mb, caddr_t);
362 nfsm_fhtom(vp, 1);
363 nfsm_request(vp, NFSPROC_FSINFO, td, cred);
364 nfsm_postop_attr(vp, retattr);
365 if (!error) {
366 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
367 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
368 mtx_lock(&nmp->nm_mtx);
369 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
370 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
371 ~(NFS_FABLKSIZE - 1);
372 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
373 if (max < nmp->nm_wsize && max > 0) {
374 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
375 if (nmp->nm_wsize == 0)
376 nmp->nm_wsize = max;
377 }
378 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
379 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
380 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
381 ~(NFS_FABLKSIZE - 1);
382 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
383 if (max < nmp->nm_rsize && max > 0) {
384 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
385 if (nmp->nm_rsize == 0)
386 nmp->nm_rsize = max;
387 }
388 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
389 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
390 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
391 ~(NFS_DIRBLKSIZ - 1);
392 if (max < nmp->nm_readdirsize && max > 0) {
393 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
394 if (nmp->nm_readdirsize == 0)
395 nmp->nm_readdirsize = max;
396 }
397 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
398 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
399 nmp->nm_maxfilesize = maxfsize;
400 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
401 nmp->nm_state |= NFSSTA_GOTFSINFO;
402 mtx_unlock(&nmp->nm_mtx);
403 }
404 m_freem(mrep);
405 nfsmout:
406 return (error);
407 }
408
409 /*
410 * Mount a remote root fs via. nfs. This depends on the info in the
411 * nfs_diskless structure that has been filled in properly by some primary
412 * bootstrap.
413 * It goes something like this:
414 * - do enough of "ifconfig" by calling ifioctl() so that the system
415 * can talk to the server
416 * - If nfs_diskless.mygateway is filled in, use that address as
417 * a default gateway.
418 * - build the rootfs mount point and call mountnfs() to do the rest.
419 *
420 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
421 * structure, as well as other global NFS client variables here, as
422 * nfs_mountroot() will be called once in the boot before any other NFS
423 * client activity occurs.
424 */
425 int
426 nfs_mountroot(struct mount *mp)
427 {
428 struct thread *td = curthread;
429 struct nfsv3_diskless *nd = &nfsv3_diskless;
430 struct socket *so;
431 struct vnode *vp;
432 struct ifreq ir;
433 int error;
434 u_long l;
435 char buf[128];
436 char *cp;
437
438
439 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
440 bootpc_init(); /* use bootp to get nfs_diskless filled in */
441 #elif defined(NFS_ROOT)
442 nfs_setup_diskless();
443 #endif
444
445 if (nfs_diskless_valid == 0) {
446 return (-1);
447 }
448 if (nfs_diskless_valid == 1)
449 nfs_convert_diskless();
450
451 /*
452 * XXX splnet, so networks will receive...
453 */
454 splnet();
455
456 /*
457 * Do enough of ifconfig(8) so that the critical net interface can
458 * talk to the server.
459 */
460 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
461 td->td_ucred, td);
462 if (error)
463 panic("nfs_mountroot: socreate(%04x): %d",
464 nd->myif.ifra_addr.sa_family, error);
465
466 #if 0 /* XXX Bad idea */
467 /*
468 * We might not have been told the right interface, so we pass
469 * over the first ten interfaces of the same kind, until we get
470 * one of them configured.
471 */
472
473 for (i = strlen(nd->myif.ifra_name) - 1;
474 nd->myif.ifra_name[i] >= '' &&
475 nd->myif.ifra_name[i] <= '9';
476 nd->myif.ifra_name[i] ++) {
477 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
478 if(!error)
479 break;
480 }
481 #endif
482
483 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
484 if (error)
485 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
486
487 if ((cp = getenv("boot.netif.mtu")) != NULL) {
488 ir.ifr_mtu = strtol(cp, NULL, 10);
489 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
490 freeenv(cp);
491 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
492 if (error)
493 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
494 }
495 soclose(so);
496
497 /*
498 * If the gateway field is filled in, set it as the default route.
499 * Note that pxeboot will set a default route of 0 if the route
500 * is not set by the DHCP server. Check also for a value of 0
501 * to avoid panicking inappropriately in that situation.
502 */
503 if (nd->mygateway.sin_len != 0 &&
504 nd->mygateway.sin_addr.s_addr != 0) {
505 struct sockaddr_in mask, sin;
506
507 bzero((caddr_t)&mask, sizeof(mask));
508 sin = mask;
509 sin.sin_family = AF_INET;
510 sin.sin_len = sizeof(sin);
511 /* XXX MRT use table 0 for this sort of thing */
512 CURVNET_SET(TD_TO_VNET(td));
513 error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
514 (struct sockaddr *)&nd->mygateway,
515 (struct sockaddr *)&mask,
516 RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
517 CURVNET_RESTORE();
518 if (error)
519 panic("nfs_mountroot: RTM_ADD: %d", error);
520 }
521
522 /*
523 * Create the rootfs mount point.
524 */
525 nd->root_args.fh = nd->root_fh;
526 nd->root_args.fhsize = nd->root_fhsize;
527 l = ntohl(nd->root_saddr.sin_addr.s_addr);
528 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
529 (l >> 24) & 0xff, (l >> 16) & 0xff,
530 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
531 printf("NFS ROOT: %s\n", buf);
532 nd->root_args.hostname = buf;
533 if ((error = nfs_mountdiskless(buf,
534 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
535 return (error);
536 }
537
538 /*
539 * This is not really an nfs issue, but it is much easier to
540 * set hostname here and then let the "/etc/rc.xxx" files
541 * mount the right /var based upon its preset value.
542 */
543 mtx_lock(&prison0.pr_mtx);
544 strlcpy(prison0.pr_hostname, nd->my_hostnam,
545 sizeof (prison0.pr_hostname));
546 mtx_unlock(&prison0.pr_mtx);
547 inittodr(ntohl(nd->root_time));
548 return (0);
549 }
550
551 /*
552 * Internal version of mount system call for diskless setup.
553 */
554 static int
555 nfs_mountdiskless(char *path,
556 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
557 struct vnode **vpp, struct mount *mp)
558 {
559 struct sockaddr *nam;
560 int error;
561
562 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
563 if ((error = mountnfs(args, mp, nam, path, vpp, td->td_ucred,
564 NFS_DEFAULT_NAMETIMEO, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
565 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
566 return (error);
567 }
568 return (0);
569 }
570
571 static int
572 nfs_sec_name_to_num(char *sec)
573 {
574 if (!strcmp(sec, "krb5"))
575 return (RPCSEC_GSS_KRB5);
576 if (!strcmp(sec, "krb5i"))
577 return (RPCSEC_GSS_KRB5I);
578 if (!strcmp(sec, "krb5p"))
579 return (RPCSEC_GSS_KRB5P);
580 if (!strcmp(sec, "sys"))
581 return (AUTH_SYS);
582 /*
583 * Userland should validate the string but we will try and
584 * cope with unexpected values.
585 */
586 return (AUTH_SYS);
587 }
588
589 static void
590 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
591 const char *hostname)
592 {
593 int s;
594 int adjsock;
595 int maxio;
596 char *p;
597 char *secname;
598 char *principal;
599
600 s = splnet();
601
602 /*
603 * Set read-only flag if requested; otherwise, clear it if this is
604 * an update. If this is not an update, then either the read-only
605 * flag is already clear, or this is a root mount and it was set
606 * intentionally at some previous point.
607 */
608 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
609 MNT_ILOCK(mp);
610 mp->mnt_flag |= MNT_RDONLY;
611 MNT_IUNLOCK(mp);
612 } else if (mp->mnt_flag & MNT_UPDATE) {
613 MNT_ILOCK(mp);
614 mp->mnt_flag &= ~MNT_RDONLY;
615 MNT_IUNLOCK(mp);
616 }
617
618 /*
619 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
620 * no sense in that context. Also, set up appropriate retransmit
621 * and soft timeout behavior.
622 */
623 if (argp->sotype == SOCK_STREAM) {
624 nmp->nm_flag &= ~NFSMNT_NOCONN;
625 nmp->nm_flag |= NFSMNT_DUMBTIMR;
626 nmp->nm_timeo = NFS_MAXTIMEO;
627 nmp->nm_retry = NFS_RETRANS_TCP;
628 }
629
630 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
631 if ((argp->flags & NFSMNT_NFSV3) == 0)
632 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
633
634 /* Re-bind if rsrvd port requested and wasn't on one */
635 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
636 && (argp->flags & NFSMNT_RESVPORT);
637 /* Also re-bind if we're switching to/from a connected UDP socket */
638 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
639 (argp->flags & NFSMNT_NOCONN));
640
641 /* Update flags atomically. Don't change the lock bits. */
642 nmp->nm_flag = argp->flags | nmp->nm_flag;
643 splx(s);
644
645 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
646 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
647 if (nmp->nm_timeo < NFS_MINTIMEO)
648 nmp->nm_timeo = NFS_MINTIMEO;
649 else if (nmp->nm_timeo > NFS_MAXTIMEO)
650 nmp->nm_timeo = NFS_MAXTIMEO;
651 }
652
653 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
654 nmp->nm_retry = argp->retrans;
655 if (nmp->nm_retry > NFS_MAXREXMIT)
656 nmp->nm_retry = NFS_MAXREXMIT;
657 }
658
659 if (argp->flags & NFSMNT_NFSV3) {
660 if (argp->sotype == SOCK_DGRAM)
661 maxio = NFS_MAXDGRAMDATA;
662 else
663 maxio = NFS_MAXDATA;
664 } else
665 maxio = NFS_V2MAXDATA;
666
667 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
668 nmp->nm_wsize = argp->wsize;
669 /* Round down to multiple of blocksize */
670 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
671 if (nmp->nm_wsize <= 0)
672 nmp->nm_wsize = NFS_FABLKSIZE;
673 }
674 if (nmp->nm_wsize > maxio)
675 nmp->nm_wsize = maxio;
676 if (nmp->nm_wsize > MAXBSIZE)
677 nmp->nm_wsize = MAXBSIZE;
678
679 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
680 nmp->nm_rsize = argp->rsize;
681 /* Round down to multiple of blocksize */
682 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
683 if (nmp->nm_rsize <= 0)
684 nmp->nm_rsize = NFS_FABLKSIZE;
685 }
686 if (nmp->nm_rsize > maxio)
687 nmp->nm_rsize = maxio;
688 if (nmp->nm_rsize > MAXBSIZE)
689 nmp->nm_rsize = MAXBSIZE;
690
691 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
692 nmp->nm_readdirsize = argp->readdirsize;
693 }
694 if (nmp->nm_readdirsize > maxio)
695 nmp->nm_readdirsize = maxio;
696 if (nmp->nm_readdirsize > nmp->nm_rsize)
697 nmp->nm_readdirsize = nmp->nm_rsize;
698
699 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
700 nmp->nm_acregmin = argp->acregmin;
701 else
702 nmp->nm_acregmin = NFS_MINATTRTIMO;
703 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
704 nmp->nm_acregmax = argp->acregmax;
705 else
706 nmp->nm_acregmax = NFS_MAXATTRTIMO;
707 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
708 nmp->nm_acdirmin = argp->acdirmin;
709 else
710 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
711 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
712 nmp->nm_acdirmax = argp->acdirmax;
713 else
714 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
715 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
716 nmp->nm_acdirmin = nmp->nm_acdirmax;
717 if (nmp->nm_acregmin > nmp->nm_acregmax)
718 nmp->nm_acregmin = nmp->nm_acregmax;
719
720 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
721 if (argp->maxgrouplist <= NFS_MAXGRPS)
722 nmp->nm_numgrps = argp->maxgrouplist;
723 else
724 nmp->nm_numgrps = NFS_MAXGRPS;
725 }
726 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
727 if (argp->readahead <= NFS_MAXRAHEAD)
728 nmp->nm_readahead = argp->readahead;
729 else
730 nmp->nm_readahead = NFS_MAXRAHEAD;
731 }
732 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
733 if (argp->wcommitsize < nmp->nm_wsize)
734 nmp->nm_wcommitsize = nmp->nm_wsize;
735 else
736 nmp->nm_wcommitsize = argp->wcommitsize;
737 }
738 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
739 if (argp->deadthresh <= NFS_MAXDEADTHRESH)
740 nmp->nm_deadthresh = argp->deadthresh;
741 else
742 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
743 }
744
745 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
746 (nmp->nm_soproto != argp->proto));
747 nmp->nm_sotype = argp->sotype;
748 nmp->nm_soproto = argp->proto;
749
750 if (nmp->nm_client && adjsock) {
751 nfs_safedisconnect(nmp);
752 if (nmp->nm_sotype == SOCK_DGRAM)
753 while (nfs_connect(nmp)) {
754 printf("nfs_args: retrying connect\n");
755 (void) tsleep(&fake_wchan, PSOCK, "nfscon", hz);
756 }
757 }
758
759 if (hostname) {
760 strlcpy(nmp->nm_hostname, hostname,
761 sizeof(nmp->nm_hostname));
762 p = strchr(nmp->nm_hostname, ':');
763 if (p)
764 *p = '\0';
765 }
766
767 if (vfs_getopt(mp->mnt_optnew, "sec",
768 (void **) &secname, NULL) == 0) {
769 nmp->nm_secflavor = nfs_sec_name_to_num(secname);
770 } else {
771 nmp->nm_secflavor = AUTH_SYS;
772 }
773
774 if (vfs_getopt(mp->mnt_optnew, "principal",
775 (void **) &principal, NULL) == 0) {
776 strlcpy(nmp->nm_principal, principal,
777 sizeof(nmp->nm_principal));
778 } else {
779 snprintf(nmp->nm_principal, sizeof(nmp->nm_principal),
780 "nfs@%s", nmp->nm_hostname);
781 }
782 }
783
784 static const char *nfs_opts[] = { "from", "nfs_args",
785 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
786 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
787 "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
788 "readahead", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp",
789 "wsize", "rsize", "retrans", "acregmin", "acregmax", "acdirmin",
790 "acdirmax", "deadthresh", "hostname", "timeout", "addr", "fh", "nfsv3",
791 "sec", "maxgroups", "principal", "negnametimeo", "nocto", "wcommitsize",
792 "nametimeo",
793 NULL };
794
795 /*
796 * VFS Operations.
797 *
798 * mount system call
799 * It seems a bit dumb to copyinstr() the host and path here and then
800 * bcopy() them in mountnfs(), but I wanted to detect errors before
801 * doing the sockargs() call because sockargs() allocates an mbuf and
802 * an error after that means that I have to release the mbuf.
803 */
804 /* ARGSUSED */
805 static int
806 nfs_mount(struct mount *mp)
807 {
808 struct nfs_args args = {
809 .version = NFS_ARGSVERSION,
810 .addr = NULL,
811 .addrlen = sizeof (struct sockaddr_in),
812 .sotype = SOCK_STREAM,
813 .proto = 0,
814 .fh = NULL,
815 .fhsize = 0,
816 .flags = NFSMNT_RESVPORT,
817 .wsize = NFS_WSIZE,
818 .rsize = NFS_RSIZE,
819 .readdirsize = NFS_READDIRSIZE,
820 .timeo = 10,
821 .retrans = NFS_RETRANS,
822 .maxgrouplist = NFS_MAXGRPS,
823 .readahead = NFS_DEFRAHEAD,
824 .wcommitsize = 0, /* was: NQ_DEFLEASE */
825 .deadthresh = NFS_MAXDEADTHRESH, /* was: NQ_DEADTHRESH */
826 .hostname = NULL,
827 /* args version 4 */
828 .acregmin = NFS_MINATTRTIMO,
829 .acregmax = NFS_MAXATTRTIMO,
830 .acdirmin = NFS_MINDIRATTRTIMO,
831 .acdirmax = NFS_MAXDIRATTRTIMO,
832 };
833 int error, ret, has_nfs_args_opt;
834 int has_addr_opt, has_fh_opt, has_hostname_opt;
835 struct sockaddr *nam;
836 struct vnode *vp;
837 char hst[MNAMELEN];
838 size_t len;
839 u_char nfh[NFSX_V3FHMAX];
840 char *opt;
841 int nametimeo = NFS_DEFAULT_NAMETIMEO;
842 int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
843
844 has_nfs_args_opt = 0;
845 has_addr_opt = 0;
846 has_fh_opt = 0;
847 has_hostname_opt = 0;
848
849 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
850 error = EINVAL;
851 goto out;
852 }
853
854 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
855 error = nfs_mountroot(mp);
856 goto out;
857 }
858
859 /*
860 * The old mount_nfs program passed the struct nfs_args
861 * from userspace to kernel. The new mount_nfs program
862 * passes string options via nmount() from userspace to kernel
863 * and we populate the struct nfs_args in the kernel.
864 */
865 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
866 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
867 sizeof args);
868 if (error)
869 goto out;
870
871 if (args.version != NFS_ARGSVERSION) {
872 error = EPROGMISMATCH;
873 goto out;
874 }
875 has_nfs_args_opt = 1;
876 }
877
878 if (vfs_getopt(mp->mnt_optnew, "dumbtimer", NULL, NULL) == 0)
879 args.flags |= NFSMNT_DUMBTIMR;
880 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
881 args.flags |= NFSMNT_NOCONN;
882 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
883 args.flags |= NFSMNT_NOCONN;
884 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
885 args.flags |= NFSMNT_NOLOCKD;
886 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
887 args.flags &= ~NFSMNT_NOLOCKD;
888 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
889 args.flags |= NFSMNT_INT;
890 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
891 args.flags |= NFSMNT_RDIRPLUS;
892 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
893 args.flags |= NFSMNT_RESVPORT;
894 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
895 args.flags &= ~NFSMNT_RESVPORT;
896 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
897 args.flags |= NFSMNT_SOFT;
898 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
899 args.flags &= ~NFSMNT_SOFT;
900 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
901 args.sotype = SOCK_DGRAM;
902 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
903 args.sotype = SOCK_DGRAM;
904 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
905 args.sotype = SOCK_STREAM;
906 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
907 args.flags |= NFSMNT_NFSV3;
908 if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
909 args.flags |= NFSMNT_NOCTO;
910 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
911 if (opt == NULL) {
912 vfs_mount_error(mp, "illegal readdirsize");
913 error = EINVAL;
914 goto out;
915 }
916 ret = sscanf(opt, "%d", &args.readdirsize);
917 if (ret != 1 || args.readdirsize <= 0) {
918 vfs_mount_error(mp, "illegal readdirsize: %s",
919 opt);
920 error = EINVAL;
921 goto out;
922 }
923 args.flags |= NFSMNT_READDIRSIZE;
924 }
925 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
926 if (opt == NULL) {
927 vfs_mount_error(mp, "illegal readahead");
928 error = EINVAL;
929 goto out;
930 }
931 ret = sscanf(opt, "%d", &args.readahead);
932 if (ret != 1 || args.readahead <= 0) {
933 vfs_mount_error(mp, "illegal readahead: %s",
934 opt);
935 error = EINVAL;
936 goto out;
937 }
938 args.flags |= NFSMNT_READAHEAD;
939 }
940 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
941 if (opt == NULL) {
942 vfs_mount_error(mp, "illegal wsize");
943 error = EINVAL;
944 goto out;
945 }
946 ret = sscanf(opt, "%d", &args.wsize);
947 if (ret != 1 || args.wsize <= 0) {
948 vfs_mount_error(mp, "illegal wsize: %s",
949 opt);
950 error = EINVAL;
951 goto out;
952 }
953 args.flags |= NFSMNT_WSIZE;
954 }
955 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
956 if (opt == NULL) {
957 vfs_mount_error(mp, "illegal rsize");
958 error = EINVAL;
959 goto out;
960 }
961 ret = sscanf(opt, "%d", &args.rsize);
962 if (ret != 1 || args.rsize <= 0) {
963 vfs_mount_error(mp, "illegal wsize: %s",
964 opt);
965 error = EINVAL;
966 goto out;
967 }
968 args.flags |= NFSMNT_RSIZE;
969 }
970 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
971 if (opt == NULL) {
972 vfs_mount_error(mp, "illegal retrans");
973 error = EINVAL;
974 goto out;
975 }
976 ret = sscanf(opt, "%d", &args.retrans);
977 if (ret != 1 || args.retrans <= 0) {
978 vfs_mount_error(mp, "illegal retrans: %s",
979 opt);
980 error = EINVAL;
981 goto out;
982 }
983 args.flags |= NFSMNT_RETRANS;
984 }
985 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
986 ret = sscanf(opt, "%d", &args.acregmin);
987 if (ret != 1 || args.acregmin < 0) {
988 vfs_mount_error(mp, "illegal acregmin: %s",
989 opt);
990 error = EINVAL;
991 goto out;
992 }
993 args.flags |= NFSMNT_ACREGMIN;
994 }
995 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
996 ret = sscanf(opt, "%d", &args.acregmax);
997 if (ret != 1 || args.acregmax < 0) {
998 vfs_mount_error(mp, "illegal acregmax: %s",
999 opt);
1000 error = EINVAL;
1001 goto out;
1002 }
1003 args.flags |= NFSMNT_ACREGMAX;
1004 }
1005 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1006 ret = sscanf(opt, "%d", &args.acdirmin);
1007 if (ret != 1 || args.acdirmin < 0) {
1008 vfs_mount_error(mp, "illegal acdirmin: %s",
1009 opt);
1010 error = EINVAL;
1011 goto out;
1012 }
1013 args.flags |= NFSMNT_ACDIRMIN;
1014 }
1015 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1016 ret = sscanf(opt, "%d", &args.acdirmax);
1017 if (ret != 1 || args.acdirmax < 0) {
1018 vfs_mount_error(mp, "illegal acdirmax: %s",
1019 opt);
1020 error = EINVAL;
1021 goto out;
1022 }
1023 args.flags |= NFSMNT_ACDIRMAX;
1024 }
1025 if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1026 ret = sscanf(opt, "%d", &args.wcommitsize);
1027 if (ret != 1 || args.wcommitsize < 0) {
1028 vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1029 error = EINVAL;
1030 goto out;
1031 }
1032 args.flags |= NFSMNT_WCOMMITSIZE;
1033 }
1034 if (vfs_getopt(mp->mnt_optnew, "deadthresh", (void **)&opt, NULL) == 0) {
1035 ret = sscanf(opt, "%d", &args.deadthresh);
1036 if (ret != 1 || args.deadthresh <= 0) {
1037 vfs_mount_error(mp, "illegal deadthresh: %s",
1038 opt);
1039 error = EINVAL;
1040 goto out;
1041 }
1042 args.flags |= NFSMNT_DEADTHRESH;
1043 }
1044 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1045 ret = sscanf(opt, "%d", &args.timeo);
1046 if (ret != 1 || args.timeo <= 0) {
1047 vfs_mount_error(mp, "illegal timeout: %s",
1048 opt);
1049 error = EINVAL;
1050 goto out;
1051 }
1052 args.flags |= NFSMNT_TIMEO;
1053 }
1054 if (vfs_getopt(mp->mnt_optnew, "maxgroups", (void **)&opt, NULL) == 0) {
1055 ret = sscanf(opt, "%d", &args.maxgrouplist);
1056 if (ret != 1 || args.maxgrouplist <= 0) {
1057 vfs_mount_error(mp, "illegal maxgroups: %s",
1058 opt);
1059 error = EINVAL;
1060 goto out;
1061 }
1062 args.flags |= NFSMNT_MAXGRPS;
1063 }
1064 if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1065 ret = sscanf(opt, "%d", &nametimeo);
1066 if (ret != 1 || nametimeo < 0) {
1067 vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1068 error = EINVAL;
1069 goto out;
1070 }
1071 }
1072 if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1073 == 0) {
1074 ret = sscanf(opt, "%d", &negnametimeo);
1075 if (ret != 1 || negnametimeo < 0) {
1076 vfs_mount_error(mp, "illegal negnametimeo: %s",
1077 opt);
1078 error = EINVAL;
1079 goto out;
1080 }
1081 }
1082 if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1083 &args.addrlen) == 0) {
1084 has_addr_opt = 1;
1085 if (args.addrlen > SOCK_MAXADDRLEN) {
1086 error = ENAMETOOLONG;
1087 goto out;
1088 }
1089 nam = malloc(args.addrlen, M_SONAME,
1090 M_WAITOK);
1091 bcopy(args.addr, nam, args.addrlen);
1092 nam->sa_len = args.addrlen;
1093 }
1094 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1095 &args.fhsize) == 0) {
1096 has_fh_opt = 1;
1097 }
1098 if (vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
1099 NULL) == 0) {
1100 has_hostname_opt = 1;
1101 }
1102 if (args.hostname == NULL) {
1103 vfs_mount_error(mp, "Invalid hostname");
1104 error = EINVAL;
1105 goto out;
1106 }
1107 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1108 vfs_mount_error(mp, "Bad file handle");
1109 error = EINVAL;
1110 goto out;
1111 }
1112
1113 if (mp->mnt_flag & MNT_UPDATE) {
1114 struct nfsmount *nmp = VFSTONFS(mp);
1115
1116 if (nmp == NULL) {
1117 error = EIO;
1118 goto out;
1119 }
1120
1121 /*
1122 * If a change from TCP->UDP is done and there are thread(s)
1123 * that have I/O RPC(s) in progress with a tranfer size
1124 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1125 * hung, retrying the RPC(s) forever. Usually these threads
1126 * will be seen doing an uninterruptible sleep on wait channel
1127 * "newnfsreq" (truncated to "newnfsre" by procstat).
1128 */
1129 if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1130 tprintf(curthread->td_proc, LOG_WARNING,
1131 "Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1132
1133 /*
1134 * When doing an update, we can't change from or to
1135 * v3, switch lockd strategies or change cookie translation
1136 */
1137 args.flags = (args.flags &
1138 ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1139 (nmp->nm_flag &
1140 (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1141 nfs_decode_args(mp, nmp, &args, NULL);
1142 goto out;
1143 }
1144
1145 /*
1146 * Make the nfs_ip_paranoia sysctl serve as the default connection
1147 * or no-connection mode for those protocols that support
1148 * no-connection mode (the flag will be cleared later for protocols
1149 * that do not support no-connection mode). This will allow a client
1150 * to receive replies from a different IP then the request was
1151 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1152 * not 0.
1153 */
1154 if (nfs_ip_paranoia == 0)
1155 args.flags |= NFSMNT_NOCONN;
1156
1157 if (has_nfs_args_opt) {
1158 /*
1159 * In the 'nfs_args' case, the pointers in the args
1160 * structure are in userland - we copy them in here.
1161 */
1162 if (!has_fh_opt) {
1163 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1164 args.fhsize);
1165 if (error) {
1166 goto out;
1167 }
1168 args.fh = nfh;
1169 }
1170 if (!has_hostname_opt) {
1171 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
1172 if (error) {
1173 goto out;
1174 }
1175 bzero(&hst[len], MNAMELEN - len);
1176 args.hostname = hst;
1177 }
1178 if (!has_addr_opt) {
1179 /* sockargs() call must be after above copyin() calls */
1180 error = getsockaddr(&nam, (caddr_t)args.addr,
1181 args.addrlen);
1182 if (error) {
1183 goto out;
1184 }
1185 }
1186 } else if (has_addr_opt == 0) {
1187 vfs_mount_error(mp, "No server address");
1188 error = EINVAL;
1189 goto out;
1190 }
1191 error = mountnfs(&args, mp, nam, args.hostname, &vp,
1192 curthread->td_ucred, nametimeo, negnametimeo);
1193 out:
1194 if (!error) {
1195 MNT_ILOCK(mp);
1196 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
1197 MNT_IUNLOCK(mp);
1198 }
1199 return (error);
1200 }
1201
1202
1203 /*
1204 * VFS Operations.
1205 *
1206 * mount system call
1207 * It seems a bit dumb to copyinstr() the host and path here and then
1208 * bcopy() them in mountnfs(), but I wanted to detect errors before
1209 * doing the sockargs() call because sockargs() allocates an mbuf and
1210 * an error after that means that I have to release the mbuf.
1211 */
1212 /* ARGSUSED */
1213 static int
1214 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1215 {
1216 int error;
1217 struct nfs_args args;
1218
1219 error = copyin(data, &args, sizeof (struct nfs_args));
1220 if (error)
1221 return error;
1222
1223 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1224
1225 error = kernel_mount(ma, flags);
1226 return (error);
1227 }
1228
1229 /*
1230 * Common code for mount and mountroot
1231 */
1232 static int
1233 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1234 char *hst, struct vnode **vpp, struct ucred *cred, int nametimeo,
1235 int negnametimeo)
1236 {
1237 struct nfsmount *nmp;
1238 struct nfsnode *np;
1239 int error;
1240 struct vattr attrs;
1241
1242 if (mp->mnt_flag & MNT_UPDATE) {
1243 nmp = VFSTONFS(mp);
1244 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1245 free(nam, M_SONAME);
1246 return (0);
1247 } else {
1248 nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
1249 bzero((caddr_t)nmp, sizeof (struct nfsmount));
1250 TAILQ_INIT(&nmp->nm_bufq);
1251 mp->mnt_data = nmp;
1252 nmp->nm_getinfo = nfs_getnlminfo;
1253 nmp->nm_vinvalbuf = nfs_vinvalbuf;
1254 }
1255 vfs_getnewfsid(mp);
1256 nmp->nm_mountp = mp;
1257 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
1258
1259 /*
1260 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1261 * high, depending on whether we end up with negative offsets in
1262 * the client or server somewhere. 2GB-1 may be safer.
1263 *
1264 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
1265 * that we can handle until we find out otherwise.
1266 */
1267 if ((argp->flags & NFSMNT_NFSV3) == 0)
1268 nmp->nm_maxfilesize = 0xffffffffLL;
1269 else
1270 nmp->nm_maxfilesize = OFF_MAX;
1271
1272 nmp->nm_timeo = NFS_TIMEO;
1273 nmp->nm_retry = NFS_RETRANS;
1274 if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) {
1275 nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA;
1276 } else {
1277 nmp->nm_wsize = NFS_WSIZE;
1278 nmp->nm_rsize = NFS_RSIZE;
1279 }
1280 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1281 nmp->nm_readdirsize = NFS_READDIRSIZE;
1282 nmp->nm_numgrps = NFS_MAXGRPS;
1283 nmp->nm_readahead = NFS_DEFRAHEAD;
1284 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
1285 nmp->nm_nametimeo = nametimeo;
1286 nmp->nm_negnametimeo = negnametimeo;
1287 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1288 if (nmp->nm_tprintf_delay < 0)
1289 nmp->nm_tprintf_delay = 0;
1290 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1291 if (nmp->nm_tprintf_initial_delay < 0)
1292 nmp->nm_tprintf_initial_delay = 0;
1293 nmp->nm_fhsize = argp->fhsize;
1294 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1295 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1296 nmp->nm_nam = nam;
1297 /* Set up the sockets and per-host congestion */
1298 nmp->nm_sotype = argp->sotype;
1299 nmp->nm_soproto = argp->proto;
1300 nmp->nm_rpcops = &nfs_rpcops;
1301
1302 nfs_decode_args(mp, nmp, argp, hst);
1303
1304 /*
1305 * For Connection based sockets (TCP,...) defer the connect until
1306 * the first request, in case the server is not responding.
1307 */
1308 if (nmp->nm_sotype == SOCK_DGRAM &&
1309 (error = nfs_connect(nmp)))
1310 goto bad;
1311
1312 /*
1313 * This is silly, but it has to be set so that vinifod() works.
1314 * We do not want to do an nfs_statfs() here since we can get
1315 * stuck on a dead server and we are holding a lock on the mount
1316 * point.
1317 */
1318 mtx_lock(&nmp->nm_mtx);
1319 mp->mnt_stat.f_iosize = nfs_iosize(nmp);
1320 mtx_unlock(&nmp->nm_mtx);
1321 /*
1322 * A reference count is needed on the nfsnode representing the
1323 * remote root. If this object is not persistent, then backward
1324 * traversals of the mount point (i.e. "..") will not work if
1325 * the nfsnode gets flushed out of the cache. Ufs does not have
1326 * this problem, because one can identify root inodes by their
1327 * number == ROOTINO (2).
1328 */
1329 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
1330 if (error)
1331 goto bad;
1332 *vpp = NFSTOV(np);
1333
1334 /*
1335 * Get file attributes and transfer parameters for the
1336 * mountpoint. This has the side effect of filling in
1337 * (*vpp)->v_type with the correct value.
1338 */
1339 if (argp->flags & NFSMNT_NFSV3)
1340 nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread);
1341 else
1342 VOP_GETATTR(*vpp, &attrs, curthread->td_ucred);
1343
1344 /*
1345 * Lose the lock but keep the ref.
1346 */
1347 VOP_UNLOCK(*vpp, 0);
1348
1349 return (0);
1350 bad:
1351 nfs_disconnect(nmp);
1352 mtx_destroy(&nmp->nm_mtx);
1353 uma_zfree(nfsmount_zone, nmp);
1354 free(nam, M_SONAME);
1355 return (error);
1356 }
1357
1358 /*
1359 * unmount system call
1360 */
1361 static int
1362 nfs_unmount(struct mount *mp, int mntflags)
1363 {
1364 struct nfsmount *nmp;
1365 int error, flags = 0, i;
1366
1367 if (mntflags & MNT_FORCE)
1368 flags |= FORCECLOSE;
1369 nmp = VFSTONFS(mp);
1370 /*
1371 * Goes something like this..
1372 * - Call vflush() to clear out vnodes for this filesystem
1373 * - Close the socket
1374 * - Free up the data structures
1375 */
1376 /* In the forced case, cancel any outstanding requests. */
1377 if (flags & FORCECLOSE) {
1378 error = nfs_nmcancelreqs(nmp);
1379 if (error)
1380 goto out;
1381 }
1382 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1383 error = vflush(mp, 1, flags, curthread);
1384 if (error)
1385 goto out;
1386
1387 /*
1388 * We are now committed to the unmount.
1389 */
1390 /* Make sure no nfsiods are assigned to this mount. */
1391 mtx_lock(&nfs_iod_mtx);
1392 for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1393 if (nfs_iodmount[i] == nmp) {
1394 nfs_iodwant[i] = NFSIOD_AVAILABLE;
1395 nfs_iodmount[i] = NULL;
1396 }
1397 mtx_unlock(&nfs_iod_mtx);
1398 nfs_disconnect(nmp);
1399 free(nmp->nm_nam, M_SONAME);
1400
1401 mtx_destroy(&nmp->nm_mtx);
1402 uma_zfree(nfsmount_zone, nmp);
1403 out:
1404 return (error);
1405 }
1406
1407 /*
1408 * Return root of a filesystem
1409 */
1410 static int
1411 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1412 {
1413 struct vnode *vp;
1414 struct nfsmount *nmp;
1415 struct nfsnode *np;
1416 int error;
1417
1418 nmp = VFSTONFS(mp);
1419 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1420 if (error)
1421 return error;
1422 vp = NFSTOV(np);
1423 /*
1424 * Get transfer parameters and attributes for root vnode once.
1425 */
1426 mtx_lock(&nmp->nm_mtx);
1427 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
1428 (nmp->nm_flag & NFSMNT_NFSV3)) {
1429 mtx_unlock(&nmp->nm_mtx);
1430 nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1431 } else
1432 mtx_unlock(&nmp->nm_mtx);
1433 if (vp->v_type == VNON)
1434 vp->v_type = VDIR;
1435 vp->v_vflag |= VV_ROOT;
1436 *vpp = vp;
1437 return (0);
1438 }
1439
1440 /*
1441 * Flush out the buffer cache
1442 */
1443 /* ARGSUSED */
1444 static int
1445 nfs_sync(struct mount *mp, int waitfor)
1446 {
1447 struct vnode *vp, *mvp;
1448 struct thread *td;
1449 int error, allerror = 0;
1450
1451 td = curthread;
1452
1453 MNT_ILOCK(mp);
1454 /*
1455 * If a forced dismount is in progress, return from here so that
1456 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1457 * calling VFS_UNMOUNT().
1458 */
1459 if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1460 MNT_IUNLOCK(mp);
1461 return (EBADF);
1462 }
1463 MNT_IUNLOCK(mp);
1464
1465 /*
1466 * Force stale buffer cache information to be flushed.
1467 */
1468 loop:
1469 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1470 /* XXX Racy bv_cnt check. */
1471 if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1472 waitfor == MNT_LAZY) {
1473 VI_UNLOCK(vp);
1474 continue;
1475 }
1476 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1477 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1478 goto loop;
1479 }
1480 error = VOP_FSYNC(vp, waitfor, td);
1481 if (error)
1482 allerror = error;
1483 VOP_UNLOCK(vp, 0);
1484 vrele(vp);
1485 }
1486 return (allerror);
1487 }
1488
1489 static int
1490 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1491 {
1492 struct nfsmount *nmp = VFSTONFS(mp);
1493 struct vfsquery vq;
1494 int error;
1495
1496 bzero(&vq, sizeof(vq));
1497 switch (op) {
1498 #if 0
1499 case VFS_CTL_NOLOCKS:
1500 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1501 if (req->oldptr != NULL) {
1502 error = SYSCTL_OUT(req, &val, sizeof(val));
1503 if (error)
1504 return (error);
1505 }
1506 if (req->newptr != NULL) {
1507 error = SYSCTL_IN(req, &val, sizeof(val));
1508 if (error)
1509 return (error);
1510 if (val)
1511 nmp->nm_flag |= NFSMNT_NOLOCKS;
1512 else
1513 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1514 }
1515 break;
1516 #endif
1517 case VFS_CTL_QUERY:
1518 mtx_lock(&nmp->nm_mtx);
1519 if (nmp->nm_state & NFSSTA_TIMEO)
1520 vq.vq_flags |= VQ_NOTRESP;
1521 mtx_unlock(&nmp->nm_mtx);
1522 #if 0
1523 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1524 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1525 vq.vq_flags |= VQ_NOTRESPLOCK;
1526 #endif
1527 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1528 break;
1529 case VFS_CTL_TIMEO:
1530 if (req->oldptr != NULL) {
1531 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1532 sizeof(nmp->nm_tprintf_initial_delay));
1533 if (error)
1534 return (error);
1535 }
1536 if (req->newptr != NULL) {
1537 error = vfs_suser(mp, req->td);
1538 if (error)
1539 return (error);
1540 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1541 sizeof(nmp->nm_tprintf_initial_delay));
1542 if (error)
1543 return (error);
1544 if (nmp->nm_tprintf_initial_delay < 0)
1545 nmp->nm_tprintf_initial_delay = 0;
1546 }
1547 break;
1548 default:
1549 return (ENOTSUP);
1550 }
1551 return (0);
1552 }
1553
1554 /*
1555 * Extract the information needed by the nlm from the nfs vnode.
1556 */
1557 static void
1558 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1559 struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1560 struct timeval *timeop)
1561 {
1562 struct nfsmount *nmp;
1563 struct nfsnode *np = VTONFS(vp);
1564
1565 nmp = VFSTONFS(vp->v_mount);
1566 if (fhlenp != NULL)
1567 *fhlenp = (size_t)np->n_fhsize;
1568 if (fhp != NULL)
1569 bcopy(np->n_fhp, fhp, np->n_fhsize);
1570 if (sp != NULL)
1571 bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1572 if (is_v3p != NULL)
1573 *is_v3p = NFS_ISV3(vp);
1574 if (sizep != NULL)
1575 *sizep = np->n_size;
1576 if (timeop != NULL) {
1577 timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1578 timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1579 }
1580 }
1581
Cache object: 4142fff69c67e7aabac3f43ec1c06c59
|