1 /*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/jail.h>
48 #include <sys/limits.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/mount.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/syslog.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <net/vnet.h>
70
71 #include <netinet/in.h>
72
73 #include <rpc/rpc.h>
74
75 #include <nfs/nfsproto.h>
76 #include <nfsclient/nfs.h>
77 #include <nfsclient/nfsnode.h>
78 #include <nfsclient/nfsmount.h>
79 #include <nfs/xdr_subs.h>
80 #include <nfsclient/nfsm_subs.h>
81 #include <nfs/nfsdiskless.h>
82
83 MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
84 MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
85 MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
86 MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
87 MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
88
89 uma_zone_t nfsmount_zone;
90
91 struct nfsstats nfsstats;
92
93 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
94 SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
95 &nfsstats, nfsstats, "S,nfsstats");
96 static int nfs_ip_paranoia = 1;
97 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
98 &nfs_ip_paranoia, 0,
99 "Disallow accepting replies from IPs which differ from those sent");
100 #ifdef NFS_DEBUG
101 int nfs_debug;
102 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
103 "Toggle debug flag");
104 #endif
105 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
106 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
107 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0,
108 "Delay before printing \"nfs server not responding\" messages");
109 /* how long between console messages "nfs server foo not responding" */
110 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
111 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
112 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0,
113 "Delay between printing \"nfs server not responding\" messages");
114
115 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
116 struct nfs_args *argp, const char *hostname);
117 static int mountnfs(struct nfs_args *, struct mount *,
118 struct sockaddr *, char *, struct vnode **,
119 struct ucred *cred, int, int);
120 static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
121 struct sockaddr_storage *, int *, off_t *,
122 struct timeval *);
123 static vfs_mount_t nfs_mount;
124 static vfs_cmount_t nfs_cmount;
125 static vfs_unmount_t nfs_unmount;
126 static vfs_root_t nfs_root;
127 static vfs_statfs_t nfs_statfs;
128 static vfs_sync_t nfs_sync;
129 static vfs_sysctl_t nfs_sysctl;
130
131 static int fake_wchan;
132
133 /*
134 * nfs vfs operations.
135 */
136 static struct vfsops nfs_vfsops = {
137 .vfs_init = nfs_init,
138 .vfs_mount = nfs_mount,
139 .vfs_cmount = nfs_cmount,
140 .vfs_root = nfs_root,
141 .vfs_statfs = nfs_statfs,
142 .vfs_sync = nfs_sync,
143 .vfs_uninit = nfs_uninit,
144 .vfs_unmount = nfs_unmount,
145 .vfs_sysctl = nfs_sysctl,
146 };
147 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
148
149 /* So that loader and kldload(2) can find us, wherever we are.. */
150 MODULE_VERSION(nfs, 1);
151 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
152 #ifdef KGSSAPI
153 MODULE_DEPEND(nfs, kgssapi, 1, 1, 1);
154 #endif
155 MODULE_DEPEND(nfs, nfs_common, 1, 1, 1);
156 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
157
158 static struct nfs_rpcops nfs_rpcops = {
159 nfs_readrpc,
160 nfs_writerpc,
161 nfs_writebp,
162 nfs_readlinkrpc,
163 nfs_invaldir,
164 nfs_commit,
165 };
166
167 /*
168 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
169 * can be shared by both NFS clients. It is declared here so that it
170 * will be defined for kernels built without NFS_ROOT, although it
171 * isn't used in that case.
172 */
173 #ifndef NFS_ROOT
174 struct nfs_diskless nfs_diskless = { { { 0 } } };
175 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
176 int nfs_diskless_valid = 0;
177 #endif
178
179 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
180 &nfs_diskless_valid, 0,
181 "Has the diskless struct been filled correctly");
182
183 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
184 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
185
186 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
187 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
188 "%Ssockaddr_in", "Diskless root nfs address");
189
190
191 void nfsargs_ntoh(struct nfs_args *);
192 static int nfs_mountdiskless(char *,
193 struct sockaddr_in *, struct nfs_args *,
194 struct thread *, struct vnode **, struct mount *);
195 static void nfs_convert_diskless(void);
196 static void nfs_convert_oargs(struct nfs_args *args,
197 struct onfs_args *oargs);
198
199 int
200 nfs_iosize(struct nfsmount *nmp)
201 {
202 int iosize;
203
204 /*
205 * Calculate the size used for io buffers. Use the larger
206 * of the two sizes to minimise nfs requests but make sure
207 * that it is at least one VM page to avoid wasting buffer
208 * space.
209 */
210 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
211 iosize = imax(iosize, PAGE_SIZE);
212 return (iosize);
213 }
214
215 static void
216 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
217 {
218
219 args->version = NFS_ARGSVERSION;
220 args->addr = oargs->addr;
221 args->addrlen = oargs->addrlen;
222 args->sotype = oargs->sotype;
223 args->proto = oargs->proto;
224 args->fh = oargs->fh;
225 args->fhsize = oargs->fhsize;
226 args->flags = oargs->flags;
227 args->wsize = oargs->wsize;
228 args->rsize = oargs->rsize;
229 args->readdirsize = oargs->readdirsize;
230 args->timeo = oargs->timeo;
231 args->retrans = oargs->retrans;
232 args->maxgrouplist = oargs->maxgrouplist;
233 args->readahead = oargs->readahead;
234 args->deadthresh = oargs->deadthresh;
235 args->hostname = oargs->hostname;
236 }
237
238 static void
239 nfs_convert_diskless(void)
240 {
241
242 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
243 sizeof(struct ifaliasreq));
244 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
245 sizeof(struct sockaddr_in));
246 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
247 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
248 nfsv3_diskless.root_fhsize = NFSX_V3FH;
249 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
250 } else {
251 nfsv3_diskless.root_fhsize = NFSX_V2FH;
252 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
253 }
254 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
255 sizeof(struct sockaddr_in));
256 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
257 nfsv3_diskless.root_time = nfs_diskless.root_time;
258 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
259 MAXHOSTNAMELEN);
260 nfs_diskless_valid = 3;
261 }
262
263 /*
264 * nfs statfs call
265 */
266 static int
267 nfs_statfs(struct mount *mp, struct statfs *sbp)
268 {
269 struct vnode *vp;
270 struct thread *td;
271 struct nfs_statfs *sfp;
272 caddr_t bpos, dpos;
273 struct nfsmount *nmp = VFSTONFS(mp);
274 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
275 struct mbuf *mreq, *mrep, *md, *mb;
276 struct nfsnode *np;
277 u_quad_t tquad;
278
279 td = curthread;
280 #ifndef nolint
281 sfp = NULL;
282 #endif
283 error = vfs_busy(mp, MBF_NOWAIT);
284 if (error)
285 return (error);
286 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
287 if (error) {
288 vfs_unbusy(mp);
289 return (error);
290 }
291 vp = NFSTOV(np);
292 mtx_lock(&nmp->nm_mtx);
293 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
294 mtx_unlock(&nmp->nm_mtx);
295 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
296 } else
297 mtx_unlock(&nmp->nm_mtx);
298 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
299 mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
300 mb = mreq;
301 bpos = mtod(mb, caddr_t);
302 nfsm_fhtom(vp, v3);
303 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
304 if (v3)
305 nfsm_postop_attr(vp, retattr);
306 if (error) {
307 if (mrep != NULL)
308 m_freem(mrep);
309 goto nfsmout;
310 }
311 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
312 mtx_lock(&nmp->nm_mtx);
313 sbp->f_iosize = nfs_iosize(nmp);
314 mtx_unlock(&nmp->nm_mtx);
315 if (v3) {
316 sbp->f_bsize = NFS_FABLKSIZE;
317 tquad = fxdr_hyper(&sfp->sf_tbytes);
318 sbp->f_blocks = tquad / NFS_FABLKSIZE;
319 tquad = fxdr_hyper(&sfp->sf_fbytes);
320 sbp->f_bfree = tquad / NFS_FABLKSIZE;
321 tquad = fxdr_hyper(&sfp->sf_abytes);
322 sbp->f_bavail = tquad / NFS_FABLKSIZE;
323 sbp->f_files = (fxdr_unsigned(int32_t,
324 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
325 sbp->f_ffree = (fxdr_unsigned(int32_t,
326 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
327 } else {
328 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
329 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
330 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
331 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
332 sbp->f_files = 0;
333 sbp->f_ffree = 0;
334 }
335 m_freem(mrep);
336 nfsmout:
337 vput(vp);
338 vfs_unbusy(mp);
339 return (error);
340 }
341
342 /*
343 * nfs version 3 fsinfo rpc call
344 */
345 int
346 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
347 struct thread *td)
348 {
349 struct nfsv3_fsinfo *fsp;
350 u_int32_t pref, max;
351 caddr_t bpos, dpos;
352 int error = 0, retattr;
353 struct mbuf *mreq, *mrep, *md, *mb;
354 u_int64_t maxfsize;
355
356 nfsstats.rpccnt[NFSPROC_FSINFO]++;
357 mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
358 mb = mreq;
359 bpos = mtod(mb, caddr_t);
360 nfsm_fhtom(vp, 1);
361 nfsm_request(vp, NFSPROC_FSINFO, td, cred);
362 nfsm_postop_attr(vp, retattr);
363 if (!error) {
364 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
365 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
366 mtx_lock(&nmp->nm_mtx);
367 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
368 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
369 ~(NFS_FABLKSIZE - 1);
370 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
371 if (max < nmp->nm_wsize && max > 0) {
372 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
373 if (nmp->nm_wsize == 0)
374 nmp->nm_wsize = max;
375 }
376 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
377 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
378 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
379 ~(NFS_FABLKSIZE - 1);
380 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
381 if (max < nmp->nm_rsize && max > 0) {
382 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
383 if (nmp->nm_rsize == 0)
384 nmp->nm_rsize = max;
385 }
386 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
387 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
388 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
389 ~(NFS_DIRBLKSIZ - 1);
390 if (max < nmp->nm_readdirsize && max > 0) {
391 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
392 if (nmp->nm_readdirsize == 0)
393 nmp->nm_readdirsize = max;
394 }
395 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
396 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
397 nmp->nm_maxfilesize = maxfsize;
398 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
399 nmp->nm_state |= NFSSTA_GOTFSINFO;
400 mtx_unlock(&nmp->nm_mtx);
401 }
402 m_freem(mrep);
403 nfsmout:
404 return (error);
405 }
406
407 /*
408 * Mount a remote root fs via. nfs. This depends on the info in the
409 * nfs_diskless structure that has been filled in properly by some primary
410 * bootstrap.
411 * It goes something like this:
412 * - do enough of "ifconfig" by calling ifioctl() so that the system
413 * can talk to the server
414 * - If nfs_diskless.mygateway is filled in, use that address as
415 * a default gateway.
416 * - build the rootfs mount point and call mountnfs() to do the rest.
417 *
418 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
419 * structure, as well as other global NFS client variables here, as
420 * nfs_mountroot() will be called once in the boot before any other NFS
421 * client activity occurs.
422 */
423 int
424 nfs_mountroot(struct mount *mp)
425 {
426 struct thread *td = curthread;
427 struct nfsv3_diskless *nd = &nfsv3_diskless;
428 struct socket *so;
429 struct vnode *vp;
430 struct ifreq ir;
431 int error;
432 u_long l;
433 char buf[128];
434 char *cp;
435
436
437 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
438 bootpc_init(); /* use bootp to get nfs_diskless filled in */
439 #elif defined(NFS_ROOT)
440 nfs_setup_diskless();
441 #endif
442
443 if (nfs_diskless_valid == 0) {
444 return (-1);
445 }
446 if (nfs_diskless_valid == 1)
447 nfs_convert_diskless();
448
449 /*
450 * XXX splnet, so networks will receive...
451 */
452 splnet();
453
454 /*
455 * Do enough of ifconfig(8) so that the critical net interface can
456 * talk to the server.
457 */
458 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
459 td->td_ucred, td);
460 if (error)
461 panic("nfs_mountroot: socreate(%04x): %d",
462 nd->myif.ifra_addr.sa_family, error);
463
464 #if 0 /* XXX Bad idea */
465 /*
466 * We might not have been told the right interface, so we pass
467 * over the first ten interfaces of the same kind, until we get
468 * one of them configured.
469 */
470
471 for (i = strlen(nd->myif.ifra_name) - 1;
472 nd->myif.ifra_name[i] >= '' &&
473 nd->myif.ifra_name[i] <= '9';
474 nd->myif.ifra_name[i] ++) {
475 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
476 if(!error)
477 break;
478 }
479 #endif
480
481 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
482 if (error)
483 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
484
485 if ((cp = getenv("boot.netif.mtu")) != NULL) {
486 ir.ifr_mtu = strtol(cp, NULL, 10);
487 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
488 freeenv(cp);
489 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
490 if (error)
491 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
492 }
493 soclose(so);
494
495 /*
496 * If the gateway field is filled in, set it as the default route.
497 * Note that pxeboot will set a default route of 0 if the route
498 * is not set by the DHCP server. Check also for a value of 0
499 * to avoid panicking inappropriately in that situation.
500 */
501 if (nd->mygateway.sin_len != 0 &&
502 nd->mygateway.sin_addr.s_addr != 0) {
503 struct sockaddr_in mask, sin;
504
505 bzero((caddr_t)&mask, sizeof(mask));
506 sin = mask;
507 sin.sin_family = AF_INET;
508 sin.sin_len = sizeof(sin);
509 /* XXX MRT use table 0 for this sort of thing */
510 CURVNET_SET(TD_TO_VNET(td));
511 error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
512 (struct sockaddr *)&nd->mygateway,
513 (struct sockaddr *)&mask,
514 RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
515 CURVNET_RESTORE();
516 if (error)
517 panic("nfs_mountroot: RTM_ADD: %d", error);
518 }
519
520 /*
521 * Create the rootfs mount point.
522 */
523 nd->root_args.fh = nd->root_fh;
524 nd->root_args.fhsize = nd->root_fhsize;
525 l = ntohl(nd->root_saddr.sin_addr.s_addr);
526 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
527 (l >> 24) & 0xff, (l >> 16) & 0xff,
528 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
529 printf("NFS ROOT: %s\n", buf);
530 nd->root_args.hostname = buf;
531 if ((error = nfs_mountdiskless(buf,
532 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
533 return (error);
534 }
535
536 /*
537 * This is not really an nfs issue, but it is much easier to
538 * set hostname here and then let the "/etc/rc.xxx" files
539 * mount the right /var based upon its preset value.
540 */
541 mtx_lock(&prison0.pr_mtx);
542 strlcpy(prison0.pr_hostname, nd->my_hostnam,
543 sizeof (prison0.pr_hostname));
544 mtx_unlock(&prison0.pr_mtx);
545 inittodr(ntohl(nd->root_time));
546 return (0);
547 }
548
549 /*
550 * Internal version of mount system call for diskless setup.
551 */
552 static int
553 nfs_mountdiskless(char *path,
554 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
555 struct vnode **vpp, struct mount *mp)
556 {
557 struct sockaddr *nam;
558 int error;
559
560 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
561 if ((error = mountnfs(args, mp, nam, path, vpp, td->td_ucred,
562 NFS_DEFAULT_NAMETIMEO, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
563 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
564 return (error);
565 }
566 return (0);
567 }
568
569 static int
570 nfs_sec_name_to_num(char *sec)
571 {
572 if (!strcmp(sec, "krb5"))
573 return (RPCSEC_GSS_KRB5);
574 if (!strcmp(sec, "krb5i"))
575 return (RPCSEC_GSS_KRB5I);
576 if (!strcmp(sec, "krb5p"))
577 return (RPCSEC_GSS_KRB5P);
578 if (!strcmp(sec, "sys"))
579 return (AUTH_SYS);
580 /*
581 * Userland should validate the string but we will try and
582 * cope with unexpected values.
583 */
584 return (AUTH_SYS);
585 }
586
587 static void
588 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
589 const char *hostname)
590 {
591 int s;
592 int adjsock;
593 int maxio;
594 char *p;
595 char *secname;
596 char *principal;
597
598 s = splnet();
599
600 /*
601 * Set read-only flag if requested; otherwise, clear it if this is
602 * an update. If this is not an update, then either the read-only
603 * flag is already clear, or this is a root mount and it was set
604 * intentionally at some previous point.
605 */
606 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
607 MNT_ILOCK(mp);
608 mp->mnt_flag |= MNT_RDONLY;
609 MNT_IUNLOCK(mp);
610 } else if (mp->mnt_flag & MNT_UPDATE) {
611 MNT_ILOCK(mp);
612 mp->mnt_flag &= ~MNT_RDONLY;
613 MNT_IUNLOCK(mp);
614 }
615
616 /*
617 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
618 * no sense in that context. Also, set up appropriate retransmit
619 * and soft timeout behavior.
620 */
621 if (argp->sotype == SOCK_STREAM) {
622 nmp->nm_flag &= ~NFSMNT_NOCONN;
623 nmp->nm_flag |= NFSMNT_DUMBTIMR;
624 nmp->nm_timeo = NFS_MAXTIMEO;
625 nmp->nm_retry = NFS_RETRANS_TCP;
626 }
627
628 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
629 if ((argp->flags & NFSMNT_NFSV3) == 0)
630 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
631
632 /* Re-bind if rsrvd port requested and wasn't on one */
633 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
634 && (argp->flags & NFSMNT_RESVPORT);
635 /* Also re-bind if we're switching to/from a connected UDP socket */
636 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
637 (argp->flags & NFSMNT_NOCONN));
638
639 /* Update flags atomically. Don't change the lock bits. */
640 nmp->nm_flag = argp->flags | nmp->nm_flag;
641 splx(s);
642
643 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
644 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
645 if (nmp->nm_timeo < NFS_MINTIMEO)
646 nmp->nm_timeo = NFS_MINTIMEO;
647 else if (nmp->nm_timeo > NFS_MAXTIMEO)
648 nmp->nm_timeo = NFS_MAXTIMEO;
649 }
650
651 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
652 nmp->nm_retry = argp->retrans;
653 if (nmp->nm_retry > NFS_MAXREXMIT)
654 nmp->nm_retry = NFS_MAXREXMIT;
655 }
656
657 if (argp->flags & NFSMNT_NFSV3) {
658 if (argp->sotype == SOCK_DGRAM)
659 maxio = NFS_MAXDGRAMDATA;
660 else
661 maxio = NFS_MAXDATA;
662 } else
663 maxio = NFS_V2MAXDATA;
664
665 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
666 nmp->nm_wsize = argp->wsize;
667 /* Round down to multiple of blocksize */
668 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
669 if (nmp->nm_wsize <= 0)
670 nmp->nm_wsize = NFS_FABLKSIZE;
671 }
672 if (nmp->nm_wsize > maxio)
673 nmp->nm_wsize = maxio;
674 if (nmp->nm_wsize > MAXBSIZE)
675 nmp->nm_wsize = MAXBSIZE;
676
677 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
678 nmp->nm_rsize = argp->rsize;
679 /* Round down to multiple of blocksize */
680 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
681 if (nmp->nm_rsize <= 0)
682 nmp->nm_rsize = NFS_FABLKSIZE;
683 }
684 if (nmp->nm_rsize > maxio)
685 nmp->nm_rsize = maxio;
686 if (nmp->nm_rsize > MAXBSIZE)
687 nmp->nm_rsize = MAXBSIZE;
688
689 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
690 nmp->nm_readdirsize = argp->readdirsize;
691 }
692 if (nmp->nm_readdirsize > maxio)
693 nmp->nm_readdirsize = maxio;
694 if (nmp->nm_readdirsize > nmp->nm_rsize)
695 nmp->nm_readdirsize = nmp->nm_rsize;
696
697 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
698 nmp->nm_acregmin = argp->acregmin;
699 else
700 nmp->nm_acregmin = NFS_MINATTRTIMO;
701 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
702 nmp->nm_acregmax = argp->acregmax;
703 else
704 nmp->nm_acregmax = NFS_MAXATTRTIMO;
705 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
706 nmp->nm_acdirmin = argp->acdirmin;
707 else
708 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
709 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
710 nmp->nm_acdirmax = argp->acdirmax;
711 else
712 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
713 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
714 nmp->nm_acdirmin = nmp->nm_acdirmax;
715 if (nmp->nm_acregmin > nmp->nm_acregmax)
716 nmp->nm_acregmin = nmp->nm_acregmax;
717
718 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
719 if (argp->maxgrouplist <= NFS_MAXGRPS)
720 nmp->nm_numgrps = argp->maxgrouplist;
721 else
722 nmp->nm_numgrps = NFS_MAXGRPS;
723 }
724 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
725 if (argp->readahead <= NFS_MAXRAHEAD)
726 nmp->nm_readahead = argp->readahead;
727 else
728 nmp->nm_readahead = NFS_MAXRAHEAD;
729 }
730 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
731 if (argp->wcommitsize < nmp->nm_wsize)
732 nmp->nm_wcommitsize = nmp->nm_wsize;
733 else
734 nmp->nm_wcommitsize = argp->wcommitsize;
735 }
736 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
737 if (argp->deadthresh <= NFS_MAXDEADTHRESH)
738 nmp->nm_deadthresh = argp->deadthresh;
739 else
740 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
741 }
742
743 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
744 (nmp->nm_soproto != argp->proto));
745 nmp->nm_sotype = argp->sotype;
746 nmp->nm_soproto = argp->proto;
747
748 if (nmp->nm_client && adjsock) {
749 nfs_safedisconnect(nmp);
750 if (nmp->nm_sotype == SOCK_DGRAM)
751 while (nfs_connect(nmp)) {
752 printf("nfs_args: retrying connect\n");
753 (void) tsleep(&fake_wchan, PSOCK, "nfscon", hz);
754 }
755 }
756
757 if (hostname) {
758 strlcpy(nmp->nm_hostname, hostname,
759 sizeof(nmp->nm_hostname));
760 p = strchr(nmp->nm_hostname, ':');
761 if (p)
762 *p = '\0';
763 }
764
765 if (vfs_getopt(mp->mnt_optnew, "sec",
766 (void **) &secname, NULL) == 0) {
767 nmp->nm_secflavor = nfs_sec_name_to_num(secname);
768 } else {
769 nmp->nm_secflavor = AUTH_SYS;
770 }
771
772 if (vfs_getopt(mp->mnt_optnew, "principal",
773 (void **) &principal, NULL) == 0) {
774 strlcpy(nmp->nm_principal, principal,
775 sizeof(nmp->nm_principal));
776 } else {
777 snprintf(nmp->nm_principal, sizeof(nmp->nm_principal),
778 "nfs@%s", nmp->nm_hostname);
779 }
780 }
781
782 static const char *nfs_opts[] = { "from", "nfs_args",
783 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
784 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
785 "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
786 "readahead", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp",
787 "wsize", "rsize", "retrans", "acregmin", "acregmax", "acdirmin",
788 "acdirmax", "deadthresh", "hostname", "timeout", "addr", "fh", "nfsv3",
789 "sec", "maxgroups", "principal", "negnametimeo", "nocto", "wcommitsize",
790 "nametimeo",
791 NULL };
792
793 /*
794 * VFS Operations.
795 *
796 * mount system call
797 * It seems a bit dumb to copyinstr() the host and path here and then
798 * bcopy() them in mountnfs(), but I wanted to detect errors before
799 * doing the sockargs() call because sockargs() allocates an mbuf and
800 * an error after that means that I have to release the mbuf.
801 */
802 /* ARGSUSED */
803 static int
804 nfs_mount(struct mount *mp)
805 {
806 struct nfs_args args = {
807 .version = NFS_ARGSVERSION,
808 .addr = NULL,
809 .addrlen = sizeof (struct sockaddr_in),
810 .sotype = SOCK_STREAM,
811 .proto = 0,
812 .fh = NULL,
813 .fhsize = 0,
814 .flags = NFSMNT_RESVPORT,
815 .wsize = NFS_WSIZE,
816 .rsize = NFS_RSIZE,
817 .readdirsize = NFS_READDIRSIZE,
818 .timeo = 10,
819 .retrans = NFS_RETRANS,
820 .maxgrouplist = NFS_MAXGRPS,
821 .readahead = NFS_DEFRAHEAD,
822 .wcommitsize = 0, /* was: NQ_DEFLEASE */
823 .deadthresh = NFS_MAXDEADTHRESH, /* was: NQ_DEADTHRESH */
824 .hostname = NULL,
825 /* args version 4 */
826 .acregmin = NFS_MINATTRTIMO,
827 .acregmax = NFS_MAXATTRTIMO,
828 .acdirmin = NFS_MINDIRATTRTIMO,
829 .acdirmax = NFS_MAXDIRATTRTIMO,
830 };
831 int error, ret, has_nfs_args_opt;
832 int has_addr_opt, has_fh_opt, has_hostname_opt;
833 struct sockaddr *nam;
834 struct vnode *vp;
835 char hst[MNAMELEN];
836 size_t len;
837 u_char nfh[NFSX_V3FHMAX];
838 char *opt;
839 int nametimeo = NFS_DEFAULT_NAMETIMEO;
840 int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
841
842 has_nfs_args_opt = 0;
843 has_addr_opt = 0;
844 has_fh_opt = 0;
845 has_hostname_opt = 0;
846
847 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
848 error = EINVAL;
849 goto out;
850 }
851
852 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
853 error = nfs_mountroot(mp);
854 goto out;
855 }
856
857 /*
858 * The old mount_nfs program passed the struct nfs_args
859 * from userspace to kernel. The new mount_nfs program
860 * passes string options via nmount() from userspace to kernel
861 * and we populate the struct nfs_args in the kernel.
862 */
863 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
864 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
865 sizeof args);
866 if (error)
867 goto out;
868
869 if (args.version != NFS_ARGSVERSION) {
870 error = EPROGMISMATCH;
871 goto out;
872 }
873 has_nfs_args_opt = 1;
874 }
875
876 if (vfs_getopt(mp->mnt_optnew, "dumbtimer", NULL, NULL) == 0)
877 args.flags |= NFSMNT_DUMBTIMR;
878 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
879 args.flags |= NFSMNT_NOCONN;
880 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
881 args.flags |= NFSMNT_NOCONN;
882 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
883 args.flags |= NFSMNT_NOLOCKD;
884 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
885 args.flags &= ~NFSMNT_NOLOCKD;
886 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
887 args.flags |= NFSMNT_INT;
888 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
889 args.flags |= NFSMNT_RDIRPLUS;
890 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
891 args.flags |= NFSMNT_RESVPORT;
892 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
893 args.flags &= ~NFSMNT_RESVPORT;
894 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
895 args.flags |= NFSMNT_SOFT;
896 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
897 args.flags &= ~NFSMNT_SOFT;
898 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
899 args.sotype = SOCK_DGRAM;
900 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
901 args.sotype = SOCK_DGRAM;
902 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
903 args.sotype = SOCK_STREAM;
904 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
905 args.flags |= NFSMNT_NFSV3;
906 if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
907 args.flags |= NFSMNT_NOCTO;
908 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
909 if (opt == NULL) {
910 vfs_mount_error(mp, "illegal readdirsize");
911 error = EINVAL;
912 goto out;
913 }
914 ret = sscanf(opt, "%d", &args.readdirsize);
915 if (ret != 1 || args.readdirsize <= 0) {
916 vfs_mount_error(mp, "illegal readdirsize: %s",
917 opt);
918 error = EINVAL;
919 goto out;
920 }
921 args.flags |= NFSMNT_READDIRSIZE;
922 }
923 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
924 if (opt == NULL) {
925 vfs_mount_error(mp, "illegal readahead");
926 error = EINVAL;
927 goto out;
928 }
929 ret = sscanf(opt, "%d", &args.readahead);
930 if (ret != 1 || args.readahead <= 0) {
931 vfs_mount_error(mp, "illegal readahead: %s",
932 opt);
933 error = EINVAL;
934 goto out;
935 }
936 args.flags |= NFSMNT_READAHEAD;
937 }
938 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
939 if (opt == NULL) {
940 vfs_mount_error(mp, "illegal wsize");
941 error = EINVAL;
942 goto out;
943 }
944 ret = sscanf(opt, "%d", &args.wsize);
945 if (ret != 1 || args.wsize <= 0) {
946 vfs_mount_error(mp, "illegal wsize: %s",
947 opt);
948 error = EINVAL;
949 goto out;
950 }
951 args.flags |= NFSMNT_WSIZE;
952 }
953 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
954 if (opt == NULL) {
955 vfs_mount_error(mp, "illegal rsize");
956 error = EINVAL;
957 goto out;
958 }
959 ret = sscanf(opt, "%d", &args.rsize);
960 if (ret != 1 || args.rsize <= 0) {
961 vfs_mount_error(mp, "illegal wsize: %s",
962 opt);
963 error = EINVAL;
964 goto out;
965 }
966 args.flags |= NFSMNT_RSIZE;
967 }
968 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
969 if (opt == NULL) {
970 vfs_mount_error(mp, "illegal retrans");
971 error = EINVAL;
972 goto out;
973 }
974 ret = sscanf(opt, "%d", &args.retrans);
975 if (ret != 1 || args.retrans <= 0) {
976 vfs_mount_error(mp, "illegal retrans: %s",
977 opt);
978 error = EINVAL;
979 goto out;
980 }
981 args.flags |= NFSMNT_RETRANS;
982 }
983 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
984 ret = sscanf(opt, "%d", &args.acregmin);
985 if (ret != 1 || args.acregmin < 0) {
986 vfs_mount_error(mp, "illegal acregmin: %s",
987 opt);
988 error = EINVAL;
989 goto out;
990 }
991 args.flags |= NFSMNT_ACREGMIN;
992 }
993 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
994 ret = sscanf(opt, "%d", &args.acregmax);
995 if (ret != 1 || args.acregmax < 0) {
996 vfs_mount_error(mp, "illegal acregmax: %s",
997 opt);
998 error = EINVAL;
999 goto out;
1000 }
1001 args.flags |= NFSMNT_ACREGMAX;
1002 }
1003 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1004 ret = sscanf(opt, "%d", &args.acdirmin);
1005 if (ret != 1 || args.acdirmin < 0) {
1006 vfs_mount_error(mp, "illegal acdirmin: %s",
1007 opt);
1008 error = EINVAL;
1009 goto out;
1010 }
1011 args.flags |= NFSMNT_ACDIRMIN;
1012 }
1013 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1014 ret = sscanf(opt, "%d", &args.acdirmax);
1015 if (ret != 1 || args.acdirmax < 0) {
1016 vfs_mount_error(mp, "illegal acdirmax: %s",
1017 opt);
1018 error = EINVAL;
1019 goto out;
1020 }
1021 args.flags |= NFSMNT_ACDIRMAX;
1022 }
1023 if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1024 ret = sscanf(opt, "%d", &args.wcommitsize);
1025 if (ret != 1 || args.wcommitsize < 0) {
1026 vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1027 error = EINVAL;
1028 goto out;
1029 }
1030 args.flags |= NFSMNT_WCOMMITSIZE;
1031 }
1032 if (vfs_getopt(mp->mnt_optnew, "deadthresh", (void **)&opt, NULL) == 0) {
1033 ret = sscanf(opt, "%d", &args.deadthresh);
1034 if (ret != 1 || args.deadthresh <= 0) {
1035 vfs_mount_error(mp, "illegal deadthresh: %s",
1036 opt);
1037 error = EINVAL;
1038 goto out;
1039 }
1040 args.flags |= NFSMNT_DEADTHRESH;
1041 }
1042 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1043 ret = sscanf(opt, "%d", &args.timeo);
1044 if (ret != 1 || args.timeo <= 0) {
1045 vfs_mount_error(mp, "illegal timeout: %s",
1046 opt);
1047 error = EINVAL;
1048 goto out;
1049 }
1050 args.flags |= NFSMNT_TIMEO;
1051 }
1052 if (vfs_getopt(mp->mnt_optnew, "maxgroups", (void **)&opt, NULL) == 0) {
1053 ret = sscanf(opt, "%d", &args.maxgrouplist);
1054 if (ret != 1 || args.maxgrouplist <= 0) {
1055 vfs_mount_error(mp, "illegal maxgroups: %s",
1056 opt);
1057 error = EINVAL;
1058 goto out;
1059 }
1060 args.flags |= NFSMNT_MAXGRPS;
1061 }
1062 if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1063 ret = sscanf(opt, "%d", &nametimeo);
1064 if (ret != 1 || nametimeo < 0) {
1065 vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1066 error = EINVAL;
1067 goto out;
1068 }
1069 }
1070 if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1071 == 0) {
1072 ret = sscanf(opt, "%d", &negnametimeo);
1073 if (ret != 1 || negnametimeo < 0) {
1074 vfs_mount_error(mp, "illegal negnametimeo: %s",
1075 opt);
1076 error = EINVAL;
1077 goto out;
1078 }
1079 }
1080 if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1081 &args.addrlen) == 0) {
1082 has_addr_opt = 1;
1083 if (args.addrlen > SOCK_MAXADDRLEN) {
1084 error = ENAMETOOLONG;
1085 goto out;
1086 }
1087 nam = malloc(args.addrlen, M_SONAME,
1088 M_WAITOK);
1089 bcopy(args.addr, nam, args.addrlen);
1090 nam->sa_len = args.addrlen;
1091 }
1092 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1093 &args.fhsize) == 0) {
1094 has_fh_opt = 1;
1095 }
1096 if (vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
1097 NULL) == 0) {
1098 has_hostname_opt = 1;
1099 }
1100 if (args.hostname == NULL) {
1101 vfs_mount_error(mp, "Invalid hostname");
1102 error = EINVAL;
1103 goto out;
1104 }
1105 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1106 vfs_mount_error(mp, "Bad file handle");
1107 error = EINVAL;
1108 goto out;
1109 }
1110
1111 if (mp->mnt_flag & MNT_UPDATE) {
1112 struct nfsmount *nmp = VFSTONFS(mp);
1113
1114 if (nmp == NULL) {
1115 error = EIO;
1116 goto out;
1117 }
1118
1119 /*
1120 * If a change from TCP->UDP is done and there are thread(s)
1121 * that have I/O RPC(s) in progress with a tranfer size
1122 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1123 * hung, retrying the RPC(s) forever. Usually these threads
1124 * will be seen doing an uninterruptible sleep on wait channel
1125 * "newnfsreq" (truncated to "newnfsre" by procstat).
1126 */
1127 if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1128 tprintf(curthread->td_proc, LOG_WARNING,
1129 "Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1130
1131 /*
1132 * When doing an update, we can't change from or to
1133 * v3, switch lockd strategies or change cookie translation
1134 */
1135 args.flags = (args.flags &
1136 ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1137 (nmp->nm_flag &
1138 (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1139 nfs_decode_args(mp, nmp, &args, NULL);
1140 goto out;
1141 }
1142
1143 /*
1144 * Make the nfs_ip_paranoia sysctl serve as the default connection
1145 * or no-connection mode for those protocols that support
1146 * no-connection mode (the flag will be cleared later for protocols
1147 * that do not support no-connection mode). This will allow a client
1148 * to receive replies from a different IP then the request was
1149 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1150 * not 0.
1151 */
1152 if (nfs_ip_paranoia == 0)
1153 args.flags |= NFSMNT_NOCONN;
1154
1155 if (has_nfs_args_opt) {
1156 /*
1157 * In the 'nfs_args' case, the pointers in the args
1158 * structure are in userland - we copy them in here.
1159 */
1160 if (!has_fh_opt) {
1161 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1162 args.fhsize);
1163 if (error) {
1164 goto out;
1165 }
1166 args.fh = nfh;
1167 }
1168 if (!has_hostname_opt) {
1169 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
1170 if (error) {
1171 goto out;
1172 }
1173 bzero(&hst[len], MNAMELEN - len);
1174 args.hostname = hst;
1175 }
1176 if (!has_addr_opt) {
1177 /* sockargs() call must be after above copyin() calls */
1178 error = getsockaddr(&nam, (caddr_t)args.addr,
1179 args.addrlen);
1180 if (error) {
1181 goto out;
1182 }
1183 }
1184 } else if (has_addr_opt == 0) {
1185 vfs_mount_error(mp, "No server address");
1186 error = EINVAL;
1187 goto out;
1188 }
1189 error = mountnfs(&args, mp, nam, args.hostname, &vp,
1190 curthread->td_ucred, nametimeo, negnametimeo);
1191 out:
1192 if (!error) {
1193 MNT_ILOCK(mp);
1194 mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1195 MNT_IUNLOCK(mp);
1196 }
1197 return (error);
1198 }
1199
1200
1201 /*
1202 * VFS Operations.
1203 *
1204 * mount system call
1205 * It seems a bit dumb to copyinstr() the host and path here and then
1206 * bcopy() them in mountnfs(), but I wanted to detect errors before
1207 * doing the sockargs() call because sockargs() allocates an mbuf and
1208 * an error after that means that I have to release the mbuf.
1209 */
1210 /* ARGSUSED */
1211 static int
1212 nfs_cmount(struct mntarg *ma, void *data, int flags)
1213 {
1214 int error;
1215 struct nfs_args args;
1216
1217 error = copyin(data, &args, sizeof (struct nfs_args));
1218 if (error)
1219 return error;
1220
1221 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1222
1223 error = kernel_mount(ma, flags);
1224 return (error);
1225 }
1226
1227 /*
1228 * Common code for mount and mountroot
1229 */
1230 static int
1231 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1232 char *hst, struct vnode **vpp, struct ucred *cred, int nametimeo,
1233 int negnametimeo)
1234 {
1235 struct nfsmount *nmp;
1236 struct nfsnode *np;
1237 int error;
1238 struct vattr attrs;
1239
1240 if (mp->mnt_flag & MNT_UPDATE) {
1241 nmp = VFSTONFS(mp);
1242 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1243 free(nam, M_SONAME);
1244 return (0);
1245 } else {
1246 nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
1247 bzero((caddr_t)nmp, sizeof (struct nfsmount));
1248 TAILQ_INIT(&nmp->nm_bufq);
1249 mp->mnt_data = nmp;
1250 nmp->nm_getinfo = nfs_getnlminfo;
1251 nmp->nm_vinvalbuf = nfs_vinvalbuf;
1252 }
1253 vfs_getnewfsid(mp);
1254 nmp->nm_mountp = mp;
1255 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
1256
1257 /*
1258 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1259 * high, depending on whether we end up with negative offsets in
1260 * the client or server somewhere. 2GB-1 may be safer.
1261 *
1262 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
1263 * that we can handle until we find out otherwise.
1264 */
1265 if ((argp->flags & NFSMNT_NFSV3) == 0)
1266 nmp->nm_maxfilesize = 0xffffffffLL;
1267 else
1268 nmp->nm_maxfilesize = OFF_MAX;
1269
1270 nmp->nm_timeo = NFS_TIMEO;
1271 nmp->nm_retry = NFS_RETRANS;
1272 if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) {
1273 nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA;
1274 } else {
1275 nmp->nm_wsize = NFS_WSIZE;
1276 nmp->nm_rsize = NFS_RSIZE;
1277 }
1278 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1279 nmp->nm_readdirsize = NFS_READDIRSIZE;
1280 nmp->nm_numgrps = NFS_MAXGRPS;
1281 nmp->nm_readahead = NFS_DEFRAHEAD;
1282 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
1283 nmp->nm_nametimeo = nametimeo;
1284 nmp->nm_negnametimeo = negnametimeo;
1285 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1286 if (nmp->nm_tprintf_delay < 0)
1287 nmp->nm_tprintf_delay = 0;
1288 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1289 if (nmp->nm_tprintf_initial_delay < 0)
1290 nmp->nm_tprintf_initial_delay = 0;
1291 nmp->nm_fhsize = argp->fhsize;
1292 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1293 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1294 nmp->nm_nam = nam;
1295 /* Set up the sockets and per-host congestion */
1296 nmp->nm_sotype = argp->sotype;
1297 nmp->nm_soproto = argp->proto;
1298 nmp->nm_rpcops = &nfs_rpcops;
1299
1300 nfs_decode_args(mp, nmp, argp, hst);
1301
1302 /*
1303 * For Connection based sockets (TCP,...) defer the connect until
1304 * the first request, in case the server is not responding.
1305 */
1306 if (nmp->nm_sotype == SOCK_DGRAM &&
1307 (error = nfs_connect(nmp)))
1308 goto bad;
1309
1310 /*
1311 * This is silly, but it has to be set so that vinifod() works.
1312 * We do not want to do an nfs_statfs() here since we can get
1313 * stuck on a dead server and we are holding a lock on the mount
1314 * point.
1315 */
1316 mtx_lock(&nmp->nm_mtx);
1317 mp->mnt_stat.f_iosize = nfs_iosize(nmp);
1318 mtx_unlock(&nmp->nm_mtx);
1319 /*
1320 * A reference count is needed on the nfsnode representing the
1321 * remote root. If this object is not persistent, then backward
1322 * traversals of the mount point (i.e. "..") will not work if
1323 * the nfsnode gets flushed out of the cache. Ufs does not have
1324 * this problem, because one can identify root inodes by their
1325 * number == ROOTINO (2).
1326 */
1327 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
1328 if (error)
1329 goto bad;
1330 *vpp = NFSTOV(np);
1331
1332 /*
1333 * Get file attributes and transfer parameters for the
1334 * mountpoint. This has the side effect of filling in
1335 * (*vpp)->v_type with the correct value.
1336 */
1337 if (argp->flags & NFSMNT_NFSV3)
1338 nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread);
1339 else
1340 VOP_GETATTR(*vpp, &attrs, curthread->td_ucred);
1341
1342 /*
1343 * Lose the lock but keep the ref.
1344 */
1345 VOP_UNLOCK(*vpp, 0);
1346
1347 return (0);
1348 bad:
1349 nfs_disconnect(nmp);
1350 mtx_destroy(&nmp->nm_mtx);
1351 uma_zfree(nfsmount_zone, nmp);
1352 free(nam, M_SONAME);
1353 return (error);
1354 }
1355
1356 /*
1357 * unmount system call
1358 */
1359 static int
1360 nfs_unmount(struct mount *mp, int mntflags)
1361 {
1362 struct nfsmount *nmp;
1363 int error, flags = 0;
1364
1365 if (mntflags & MNT_FORCE)
1366 flags |= FORCECLOSE;
1367 nmp = VFSTONFS(mp);
1368 /*
1369 * Goes something like this..
1370 * - Call vflush() to clear out vnodes for this filesystem
1371 * - Close the socket
1372 * - Free up the data structures
1373 */
1374 /* In the forced case, cancel any outstanding requests. */
1375 if (flags & FORCECLOSE) {
1376 error = nfs_nmcancelreqs(nmp);
1377 if (error)
1378 goto out;
1379 }
1380 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1381 error = vflush(mp, 1, flags, curthread);
1382 if (error)
1383 goto out;
1384
1385 /*
1386 * We are now committed to the unmount.
1387 */
1388 nfs_disconnect(nmp);
1389 free(nmp->nm_nam, M_SONAME);
1390
1391 mtx_destroy(&nmp->nm_mtx);
1392 uma_zfree(nfsmount_zone, nmp);
1393 out:
1394 return (error);
1395 }
1396
1397 /*
1398 * Return root of a filesystem
1399 */
1400 static int
1401 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1402 {
1403 struct vnode *vp;
1404 struct nfsmount *nmp;
1405 struct nfsnode *np;
1406 int error;
1407
1408 nmp = VFSTONFS(mp);
1409 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1410 if (error)
1411 return error;
1412 vp = NFSTOV(np);
1413 /*
1414 * Get transfer parameters and attributes for root vnode once.
1415 */
1416 mtx_lock(&nmp->nm_mtx);
1417 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
1418 (nmp->nm_flag & NFSMNT_NFSV3)) {
1419 mtx_unlock(&nmp->nm_mtx);
1420 nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1421 } else
1422 mtx_unlock(&nmp->nm_mtx);
1423 if (vp->v_type == VNON)
1424 vp->v_type = VDIR;
1425 vp->v_vflag |= VV_ROOT;
1426 *vpp = vp;
1427 return (0);
1428 }
1429
1430 /*
1431 * Flush out the buffer cache
1432 */
1433 /* ARGSUSED */
1434 static int
1435 nfs_sync(struct mount *mp, int waitfor)
1436 {
1437 struct vnode *vp, *mvp;
1438 struct thread *td;
1439 int error, allerror = 0;
1440
1441 td = curthread;
1442
1443 MNT_ILOCK(mp);
1444 /*
1445 * If a forced dismount is in progress, return from here so that
1446 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1447 * calling VFS_UNMOUNT().
1448 */
1449 if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1450 MNT_IUNLOCK(mp);
1451 return (EBADF);
1452 }
1453
1454 /*
1455 * Force stale buffer cache information to be flushed.
1456 */
1457 loop:
1458 MNT_VNODE_FOREACH(vp, mp, mvp) {
1459 VI_LOCK(vp);
1460 MNT_IUNLOCK(mp);
1461 /* XXX Racy bv_cnt check. */
1462 if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1463 waitfor == MNT_LAZY) {
1464 VI_UNLOCK(vp);
1465 MNT_ILOCK(mp);
1466 continue;
1467 }
1468 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1469 MNT_ILOCK(mp);
1470 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1471 goto loop;
1472 }
1473 error = VOP_FSYNC(vp, waitfor, td);
1474 if (error)
1475 allerror = error;
1476 VOP_UNLOCK(vp, 0);
1477 vrele(vp);
1478
1479 MNT_ILOCK(mp);
1480 }
1481 MNT_IUNLOCK(mp);
1482 return (allerror);
1483 }
1484
1485 static int
1486 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1487 {
1488 struct nfsmount *nmp = VFSTONFS(mp);
1489 struct vfsquery vq;
1490 int error;
1491
1492 bzero(&vq, sizeof(vq));
1493 switch (op) {
1494 #if 0
1495 case VFS_CTL_NOLOCKS:
1496 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1497 if (req->oldptr != NULL) {
1498 error = SYSCTL_OUT(req, &val, sizeof(val));
1499 if (error)
1500 return (error);
1501 }
1502 if (req->newptr != NULL) {
1503 error = SYSCTL_IN(req, &val, sizeof(val));
1504 if (error)
1505 return (error);
1506 if (val)
1507 nmp->nm_flag |= NFSMNT_NOLOCKS;
1508 else
1509 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1510 }
1511 break;
1512 #endif
1513 case VFS_CTL_QUERY:
1514 mtx_lock(&nmp->nm_mtx);
1515 if (nmp->nm_state & NFSSTA_TIMEO)
1516 vq.vq_flags |= VQ_NOTRESP;
1517 mtx_unlock(&nmp->nm_mtx);
1518 #if 0
1519 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1520 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1521 vq.vq_flags |= VQ_NOTRESPLOCK;
1522 #endif
1523 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1524 break;
1525 case VFS_CTL_TIMEO:
1526 if (req->oldptr != NULL) {
1527 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1528 sizeof(nmp->nm_tprintf_initial_delay));
1529 if (error)
1530 return (error);
1531 }
1532 if (req->newptr != NULL) {
1533 error = vfs_suser(mp, req->td);
1534 if (error)
1535 return (error);
1536 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1537 sizeof(nmp->nm_tprintf_initial_delay));
1538 if (error)
1539 return (error);
1540 if (nmp->nm_tprintf_initial_delay < 0)
1541 nmp->nm_tprintf_initial_delay = 0;
1542 }
1543 break;
1544 default:
1545 return (ENOTSUP);
1546 }
1547 return (0);
1548 }
1549
1550 /*
1551 * Extract the information needed by the nlm from the nfs vnode.
1552 */
1553 static void
1554 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1555 struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1556 struct timeval *timeop)
1557 {
1558 struct nfsmount *nmp;
1559 struct nfsnode *np = VTONFS(vp);
1560
1561 nmp = VFSTONFS(vp->v_mount);
1562 if (fhlenp != NULL)
1563 *fhlenp = (size_t)np->n_fhsize;
1564 if (fhp != NULL)
1565 bcopy(np->n_fhp, fhp, np->n_fhsize);
1566 if (sp != NULL)
1567 bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1568 if (is_v3p != NULL)
1569 *is_v3p = NFS_ISV3(vp);
1570 if (sizep != NULL)
1571 *sizep = np->n_size;
1572 if (timeop != NULL) {
1573 timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1574 timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1575 }
1576 }
1577
Cache object: 7ee9ddb9a6e409b9c1dbd44e7b9db0cc
|