1 /*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: releng/8.3/sys/nfsclient/nfs_vfsops.c 231637 2012-02-14 05:12:52Z rmacklem $");
37
38
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/jail.h>
48 #include <sys/limits.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/mount.h>
54 #include <sys/proc.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/syslog.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <net/vnet.h>
70
71 #include <netinet/in.h>
72
73 #include <rpc/rpc.h>
74
75 #include <nfs/nfsproto.h>
76 #include <nfsclient/nfs.h>
77 #include <nfsclient/nfsnode.h>
78 #include <nfsclient/nfsmount.h>
79 #include <nfs/xdr_subs.h>
80 #include <nfsclient/nfsm_subs.h>
81 #include <nfs/nfsdiskless.h>
82
83 MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
84 MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
85 MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
86 MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
87 MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
88
89 uma_zone_t nfsmount_zone;
90
91 struct nfsstats nfsstats;
92
93 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
94 SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
95 &nfsstats, nfsstats, "S,nfsstats");
96 static int nfs_ip_paranoia = 1;
97 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
98 &nfs_ip_paranoia, 0,
99 "Disallow accepting replies from IPs which differ from those sent");
100 #ifdef NFS_DEBUG
101 int nfs_debug;
102 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
103 "Toggle debug flag");
104 #endif
105 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
106 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
107 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0,
108 "Delay before printing \"nfs server not responding\" messages");
109 /* how long between console messages "nfs server foo not responding" */
110 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
111 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
112 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0,
113 "Delay between printing \"nfs server not responding\" messages");
114
115 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
116 struct nfs_args *argp, const char *hostname);
117 static int mountnfs(struct nfs_args *, struct mount *,
118 struct sockaddr *, char *, struct vnode **,
119 struct ucred *cred, int);
120 static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
121 struct sockaddr_storage *, int *, off_t *,
122 struct timeval *);
123 static vfs_mount_t nfs_mount;
124 static vfs_cmount_t nfs_cmount;
125 static vfs_unmount_t nfs_unmount;
126 static vfs_root_t nfs_root;
127 static vfs_statfs_t nfs_statfs;
128 static vfs_sync_t nfs_sync;
129 static vfs_sysctl_t nfs_sysctl;
130
131 static int fake_wchan;
132
133 /*
134 * nfs vfs operations.
135 */
136 static struct vfsops nfs_vfsops = {
137 .vfs_init = nfs_init,
138 .vfs_mount = nfs_mount,
139 .vfs_cmount = nfs_cmount,
140 .vfs_root = nfs_root,
141 .vfs_statfs = nfs_statfs,
142 .vfs_sync = nfs_sync,
143 .vfs_uninit = nfs_uninit,
144 .vfs_unmount = nfs_unmount,
145 .vfs_sysctl = nfs_sysctl,
146 };
147 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
148
149 /* So that loader and kldload(2) can find us, wherever we are.. */
150 MODULE_VERSION(nfs, 1);
151 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
152 #ifdef KGSSAPI
153 MODULE_DEPEND(nfs, kgssapi, 1, 1, 1);
154 #endif
155 MODULE_DEPEND(nfs, nfs_common, 1, 1, 1);
156 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
157
158 static struct nfs_rpcops nfs_rpcops = {
159 nfs_readrpc,
160 nfs_writerpc,
161 nfs_writebp,
162 nfs_readlinkrpc,
163 nfs_invaldir,
164 nfs_commit,
165 };
166
167 /*
168 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
169 * can be shared by both NFS clients. It is declared here so that it
170 * will be defined for kernels built without NFS_ROOT, although it
171 * isn't used in that case.
172 */
173 #ifndef NFS_ROOT
174 struct nfs_diskless nfs_diskless = { { { 0 } } };
175 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
176 int nfs_diskless_valid = 0;
177 #endif
178
179 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
180 &nfs_diskless_valid, 0,
181 "Has the diskless struct been filled correctly");
182
183 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
184 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
185
186 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
187 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
188 "%Ssockaddr_in", "Diskless root nfs address");
189
190
191 void nfsargs_ntoh(struct nfs_args *);
192 static int nfs_mountdiskless(char *,
193 struct sockaddr_in *, struct nfs_args *,
194 struct thread *, struct vnode **, struct mount *);
195 static void nfs_convert_diskless(void);
196 static void nfs_convert_oargs(struct nfs_args *args,
197 struct onfs_args *oargs);
198
199 int
200 nfs_iosize(struct nfsmount *nmp)
201 {
202 int iosize;
203
204 /*
205 * Calculate the size used for io buffers. Use the larger
206 * of the two sizes to minimise nfs requests but make sure
207 * that it is at least one VM page to avoid wasting buffer
208 * space.
209 */
210 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
211 iosize = imax(iosize, PAGE_SIZE);
212 return (iosize);
213 }
214
215 static void
216 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
217 {
218
219 args->version = NFS_ARGSVERSION;
220 args->addr = oargs->addr;
221 args->addrlen = oargs->addrlen;
222 args->sotype = oargs->sotype;
223 args->proto = oargs->proto;
224 args->fh = oargs->fh;
225 args->fhsize = oargs->fhsize;
226 args->flags = oargs->flags;
227 args->wsize = oargs->wsize;
228 args->rsize = oargs->rsize;
229 args->readdirsize = oargs->readdirsize;
230 args->timeo = oargs->timeo;
231 args->retrans = oargs->retrans;
232 args->maxgrouplist = oargs->maxgrouplist;
233 args->readahead = oargs->readahead;
234 args->deadthresh = oargs->deadthresh;
235 args->hostname = oargs->hostname;
236 }
237
238 static void
239 nfs_convert_diskless(void)
240 {
241
242 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
243 sizeof(struct ifaliasreq));
244 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
245 sizeof(struct sockaddr_in));
246 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
247 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
248 nfsv3_diskless.root_fhsize = NFSX_V3FH;
249 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
250 } else {
251 nfsv3_diskless.root_fhsize = NFSX_V2FH;
252 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
253 }
254 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
255 sizeof(struct sockaddr_in));
256 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
257 nfsv3_diskless.root_time = nfs_diskless.root_time;
258 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
259 MAXHOSTNAMELEN);
260 nfs_diskless_valid = 3;
261 }
262
263 /*
264 * nfs statfs call
265 */
266 static int
267 nfs_statfs(struct mount *mp, struct statfs *sbp)
268 {
269 struct vnode *vp;
270 struct thread *td;
271 struct nfs_statfs *sfp;
272 caddr_t bpos, dpos;
273 struct nfsmount *nmp = VFSTONFS(mp);
274 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
275 struct mbuf *mreq, *mrep, *md, *mb;
276 struct nfsnode *np;
277 u_quad_t tquad;
278
279 td = curthread;
280 #ifndef nolint
281 sfp = NULL;
282 #endif
283 error = vfs_busy(mp, MBF_NOWAIT);
284 if (error)
285 return (error);
286 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
287 if (error) {
288 vfs_unbusy(mp);
289 return (error);
290 }
291 vp = NFSTOV(np);
292 mtx_lock(&nmp->nm_mtx);
293 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
294 mtx_unlock(&nmp->nm_mtx);
295 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
296 } else
297 mtx_unlock(&nmp->nm_mtx);
298 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
299 mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
300 mb = mreq;
301 bpos = mtod(mb, caddr_t);
302 nfsm_fhtom(vp, v3);
303 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
304 if (v3)
305 nfsm_postop_attr(vp, retattr);
306 if (error) {
307 if (mrep != NULL)
308 m_freem(mrep);
309 goto nfsmout;
310 }
311 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
312 mtx_lock(&nmp->nm_mtx);
313 sbp->f_iosize = nfs_iosize(nmp);
314 mtx_unlock(&nmp->nm_mtx);
315 if (v3) {
316 sbp->f_bsize = NFS_FABLKSIZE;
317 tquad = fxdr_hyper(&sfp->sf_tbytes);
318 sbp->f_blocks = tquad / NFS_FABLKSIZE;
319 tquad = fxdr_hyper(&sfp->sf_fbytes);
320 sbp->f_bfree = tquad / NFS_FABLKSIZE;
321 tquad = fxdr_hyper(&sfp->sf_abytes);
322 sbp->f_bavail = tquad / NFS_FABLKSIZE;
323 sbp->f_files = (fxdr_unsigned(int32_t,
324 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
325 sbp->f_ffree = (fxdr_unsigned(int32_t,
326 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
327 } else {
328 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
329 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
330 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
331 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
332 sbp->f_files = 0;
333 sbp->f_ffree = 0;
334 }
335 m_freem(mrep);
336 nfsmout:
337 vput(vp);
338 vfs_unbusy(mp);
339 return (error);
340 }
341
342 /*
343 * nfs version 3 fsinfo rpc call
344 */
345 int
346 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
347 struct thread *td)
348 {
349 struct nfsv3_fsinfo *fsp;
350 u_int32_t pref, max;
351 caddr_t bpos, dpos;
352 int error = 0, retattr;
353 struct mbuf *mreq, *mrep, *md, *mb;
354 u_int64_t maxfsize;
355
356 nfsstats.rpccnt[NFSPROC_FSINFO]++;
357 mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
358 mb = mreq;
359 bpos = mtod(mb, caddr_t);
360 nfsm_fhtom(vp, 1);
361 nfsm_request(vp, NFSPROC_FSINFO, td, cred);
362 nfsm_postop_attr(vp, retattr);
363 if (!error) {
364 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
365 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
366 mtx_lock(&nmp->nm_mtx);
367 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
368 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
369 ~(NFS_FABLKSIZE - 1);
370 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
371 if (max < nmp->nm_wsize && max > 0) {
372 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
373 if (nmp->nm_wsize == 0)
374 nmp->nm_wsize = max;
375 }
376 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
377 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
378 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
379 ~(NFS_FABLKSIZE - 1);
380 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
381 if (max < nmp->nm_rsize && max > 0) {
382 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
383 if (nmp->nm_rsize == 0)
384 nmp->nm_rsize = max;
385 }
386 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
387 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
388 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
389 ~(NFS_DIRBLKSIZ - 1);
390 if (max < nmp->nm_readdirsize && max > 0) {
391 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
392 if (nmp->nm_readdirsize == 0)
393 nmp->nm_readdirsize = max;
394 }
395 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
396 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
397 nmp->nm_maxfilesize = maxfsize;
398 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
399 nmp->nm_state |= NFSSTA_GOTFSINFO;
400 mtx_unlock(&nmp->nm_mtx);
401 }
402 m_freem(mrep);
403 nfsmout:
404 return (error);
405 }
406
407 /*
408 * Mount a remote root fs via. nfs. This depends on the info in the
409 * nfs_diskless structure that has been filled in properly by some primary
410 * bootstrap.
411 * It goes something like this:
412 * - do enough of "ifconfig" by calling ifioctl() so that the system
413 * can talk to the server
414 * - If nfs_diskless.mygateway is filled in, use that address as
415 * a default gateway.
416 * - build the rootfs mount point and call mountnfs() to do the rest.
417 *
418 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
419 * structure, as well as other global NFS client variables here, as
420 * nfs_mountroot() will be called once in the boot before any other NFS
421 * client activity occurs.
422 */
423 int
424 nfs_mountroot(struct mount *mp)
425 {
426 struct thread *td = curthread;
427 struct nfsv3_diskless *nd = &nfsv3_diskless;
428 struct socket *so;
429 struct vnode *vp;
430 struct ifreq ir;
431 int error;
432 u_long l;
433 char buf[128];
434 char *cp;
435
436
437 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
438 bootpc_init(); /* use bootp to get nfs_diskless filled in */
439 #elif defined(NFS_ROOT)
440 nfs_setup_diskless();
441 #endif
442
443 if (nfs_diskless_valid == 0) {
444 return (-1);
445 }
446 if (nfs_diskless_valid == 1)
447 nfs_convert_diskless();
448
449 /*
450 * XXX splnet, so networks will receive...
451 */
452 splnet();
453
454 /*
455 * Do enough of ifconfig(8) so that the critical net interface can
456 * talk to the server.
457 */
458 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
459 td->td_ucred, td);
460 if (error)
461 panic("nfs_mountroot: socreate(%04x): %d",
462 nd->myif.ifra_addr.sa_family, error);
463
464 #if 0 /* XXX Bad idea */
465 /*
466 * We might not have been told the right interface, so we pass
467 * over the first ten interfaces of the same kind, until we get
468 * one of them configured.
469 */
470
471 for (i = strlen(nd->myif.ifra_name) - 1;
472 nd->myif.ifra_name[i] >= '' &&
473 nd->myif.ifra_name[i] <= '9';
474 nd->myif.ifra_name[i] ++) {
475 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
476 if(!error)
477 break;
478 }
479 #endif
480
481 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
482 if (error)
483 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
484
485 if ((cp = getenv("boot.netif.mtu")) != NULL) {
486 ir.ifr_mtu = strtol(cp, NULL, 10);
487 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
488 freeenv(cp);
489 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
490 if (error)
491 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
492 }
493 soclose(so);
494
495 /*
496 * If the gateway field is filled in, set it as the default route.
497 * Note that pxeboot will set a default route of 0 if the route
498 * is not set by the DHCP server. Check also for a value of 0
499 * to avoid panicking inappropriately in that situation.
500 */
501 if (nd->mygateway.sin_len != 0 &&
502 nd->mygateway.sin_addr.s_addr != 0) {
503 struct sockaddr_in mask, sin;
504
505 bzero((caddr_t)&mask, sizeof(mask));
506 sin = mask;
507 sin.sin_family = AF_INET;
508 sin.sin_len = sizeof(sin);
509 /* XXX MRT use table 0 for this sort of thing */
510 CURVNET_SET(TD_TO_VNET(td));
511 error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
512 (struct sockaddr *)&nd->mygateway,
513 (struct sockaddr *)&mask,
514 RTF_UP | RTF_GATEWAY, NULL);
515 CURVNET_RESTORE();
516 if (error)
517 panic("nfs_mountroot: RTM_ADD: %d", error);
518 }
519
520 /*
521 * Create the rootfs mount point.
522 */
523 nd->root_args.fh = nd->root_fh;
524 nd->root_args.fhsize = nd->root_fhsize;
525 l = ntohl(nd->root_saddr.sin_addr.s_addr);
526 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
527 (l >> 24) & 0xff, (l >> 16) & 0xff,
528 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
529 printf("NFS ROOT: %s\n", buf);
530 nd->root_args.hostname = buf;
531 if ((error = nfs_mountdiskless(buf,
532 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
533 return (error);
534 }
535
536 /*
537 * This is not really an nfs issue, but it is much easier to
538 * set hostname here and then let the "/etc/rc.xxx" files
539 * mount the right /var based upon its preset value.
540 */
541 mtx_lock(&prison0.pr_mtx);
542 strlcpy(prison0.pr_hostname, nd->my_hostnam,
543 sizeof (prison0.pr_hostname));
544 mtx_unlock(&prison0.pr_mtx);
545 inittodr(ntohl(nd->root_time));
546 return (0);
547 }
548
549 /*
550 * Internal version of mount system call for diskless setup.
551 */
552 static int
553 nfs_mountdiskless(char *path,
554 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
555 struct vnode **vpp, struct mount *mp)
556 {
557 struct sockaddr *nam;
558 int error;
559
560 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
561 if ((error = mountnfs(args, mp, nam, path, vpp,
562 td->td_ucred, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
563 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
564 return (error);
565 }
566 return (0);
567 }
568
569 static int
570 nfs_sec_name_to_num(char *sec)
571 {
572 if (!strcmp(sec, "krb5"))
573 return (RPCSEC_GSS_KRB5);
574 if (!strcmp(sec, "krb5i"))
575 return (RPCSEC_GSS_KRB5I);
576 if (!strcmp(sec, "krb5p"))
577 return (RPCSEC_GSS_KRB5P);
578 if (!strcmp(sec, "sys"))
579 return (AUTH_SYS);
580 /*
581 * Userland should validate the string but we will try and
582 * cope with unexpected values.
583 */
584 return (AUTH_SYS);
585 }
586
587 static void
588 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
589 const char *hostname)
590 {
591 int s;
592 int adjsock;
593 int maxio;
594 char *p;
595 char *secname;
596 char *principal;
597
598 s = splnet();
599
600 /*
601 * Set read-only flag if requested; otherwise, clear it if this is
602 * an update. If this is not an update, then either the read-only
603 * flag is already clear, or this is a root mount and it was set
604 * intentionally at some previous point.
605 */
606 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
607 MNT_ILOCK(mp);
608 mp->mnt_flag |= MNT_RDONLY;
609 MNT_IUNLOCK(mp);
610 } else if (mp->mnt_flag & MNT_UPDATE) {
611 MNT_ILOCK(mp);
612 mp->mnt_flag &= ~MNT_RDONLY;
613 MNT_IUNLOCK(mp);
614 }
615
616 /*
617 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
618 * no sense in that context. Also, set up appropriate retransmit
619 * and soft timeout behavior.
620 */
621 if (argp->sotype == SOCK_STREAM) {
622 nmp->nm_flag &= ~NFSMNT_NOCONN;
623 nmp->nm_flag |= NFSMNT_DUMBTIMR;
624 nmp->nm_timeo = NFS_MAXTIMEO;
625 nmp->nm_retry = NFS_RETRANS_TCP;
626 }
627
628 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
629 if ((argp->flags & NFSMNT_NFSV3) == 0)
630 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
631
632 /* Re-bind if rsrvd port requested and wasn't on one */
633 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
634 && (argp->flags & NFSMNT_RESVPORT);
635 /* Also re-bind if we're switching to/from a connected UDP socket */
636 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
637 (argp->flags & NFSMNT_NOCONN));
638
639 /* Update flags atomically. Don't change the lock bits. */
640 nmp->nm_flag = argp->flags | nmp->nm_flag;
641 splx(s);
642
643 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
644 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
645 if (nmp->nm_timeo < NFS_MINTIMEO)
646 nmp->nm_timeo = NFS_MINTIMEO;
647 else if (nmp->nm_timeo > NFS_MAXTIMEO)
648 nmp->nm_timeo = NFS_MAXTIMEO;
649 }
650
651 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
652 nmp->nm_retry = argp->retrans;
653 if (nmp->nm_retry > NFS_MAXREXMIT)
654 nmp->nm_retry = NFS_MAXREXMIT;
655 }
656
657 if (argp->flags & NFSMNT_NFSV3) {
658 if (argp->sotype == SOCK_DGRAM)
659 maxio = NFS_MAXDGRAMDATA;
660 else
661 maxio = NFS_MAXDATA;
662 } else
663 maxio = NFS_V2MAXDATA;
664
665 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
666 nmp->nm_wsize = argp->wsize;
667 /* Round down to multiple of blocksize */
668 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
669 if (nmp->nm_wsize <= 0)
670 nmp->nm_wsize = NFS_FABLKSIZE;
671 }
672 if (nmp->nm_wsize > maxio)
673 nmp->nm_wsize = maxio;
674 if (nmp->nm_wsize > MAXBSIZE)
675 nmp->nm_wsize = MAXBSIZE;
676
677 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
678 nmp->nm_rsize = argp->rsize;
679 /* Round down to multiple of blocksize */
680 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
681 if (nmp->nm_rsize <= 0)
682 nmp->nm_rsize = NFS_FABLKSIZE;
683 }
684 if (nmp->nm_rsize > maxio)
685 nmp->nm_rsize = maxio;
686 if (nmp->nm_rsize > MAXBSIZE)
687 nmp->nm_rsize = MAXBSIZE;
688
689 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
690 nmp->nm_readdirsize = argp->readdirsize;
691 }
692 if (nmp->nm_readdirsize > maxio)
693 nmp->nm_readdirsize = maxio;
694 if (nmp->nm_readdirsize > nmp->nm_rsize)
695 nmp->nm_readdirsize = nmp->nm_rsize;
696
697 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
698 nmp->nm_acregmin = argp->acregmin;
699 else
700 nmp->nm_acregmin = NFS_MINATTRTIMO;
701 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
702 nmp->nm_acregmax = argp->acregmax;
703 else
704 nmp->nm_acregmax = NFS_MAXATTRTIMO;
705 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
706 nmp->nm_acdirmin = argp->acdirmin;
707 else
708 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
709 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
710 nmp->nm_acdirmax = argp->acdirmax;
711 else
712 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
713 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
714 nmp->nm_acdirmin = nmp->nm_acdirmax;
715 if (nmp->nm_acregmin > nmp->nm_acregmax)
716 nmp->nm_acregmin = nmp->nm_acregmax;
717
718 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
719 if (argp->maxgrouplist <= NFS_MAXGRPS)
720 nmp->nm_numgrps = argp->maxgrouplist;
721 else
722 nmp->nm_numgrps = NFS_MAXGRPS;
723 }
724 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
725 if (argp->readahead <= NFS_MAXRAHEAD)
726 nmp->nm_readahead = argp->readahead;
727 else
728 nmp->nm_readahead = NFS_MAXRAHEAD;
729 }
730 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
731 if (argp->wcommitsize < nmp->nm_wsize)
732 nmp->nm_wcommitsize = nmp->nm_wsize;
733 else
734 nmp->nm_wcommitsize = argp->wcommitsize;
735 }
736 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
737 if (argp->deadthresh <= NFS_MAXDEADTHRESH)
738 nmp->nm_deadthresh = argp->deadthresh;
739 else
740 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
741 }
742
743 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
744 (nmp->nm_soproto != argp->proto));
745 nmp->nm_sotype = argp->sotype;
746 nmp->nm_soproto = argp->proto;
747
748 if (nmp->nm_client && adjsock) {
749 nfs_safedisconnect(nmp);
750 if (nmp->nm_sotype == SOCK_DGRAM)
751 while (nfs_connect(nmp)) {
752 printf("nfs_args: retrying connect\n");
753 (void) tsleep(&fake_wchan, PSOCK, "nfscon", hz);
754 }
755 }
756
757 if (hostname) {
758 strlcpy(nmp->nm_hostname, hostname,
759 sizeof(nmp->nm_hostname));
760 p = strchr(nmp->nm_hostname, ':');
761 if (p)
762 *p = '\0';
763 }
764
765 if (vfs_getopt(mp->mnt_optnew, "sec",
766 (void **) &secname, NULL) == 0) {
767 nmp->nm_secflavor = nfs_sec_name_to_num(secname);
768 } else {
769 nmp->nm_secflavor = AUTH_SYS;
770 }
771
772 if (vfs_getopt(mp->mnt_optnew, "principal",
773 (void **) &principal, NULL) == 0) {
774 strlcpy(nmp->nm_principal, principal,
775 sizeof(nmp->nm_principal));
776 } else {
777 snprintf(nmp->nm_principal, sizeof(nmp->nm_principal),
778 "nfs@%s", nmp->nm_hostname);
779 }
780 }
781
782 static const char *nfs_opts[] = { "from", "nfs_args",
783 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
784 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
785 "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
786 "readahead", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp",
787 "wsize", "rsize", "retrans", "acregmin", "acregmax", "acdirmin",
788 "acdirmax", "deadthresh", "hostname", "timeout", "addr", "fh", "nfsv3",
789 "sec", "maxgroups", "principal", "negnametimeo", "nocto", "wcommitsize",
790 NULL };
791
792 /*
793 * VFS Operations.
794 *
795 * mount system call
796 * It seems a bit dumb to copyinstr() the host and path here and then
797 * bcopy() them in mountnfs(), but I wanted to detect errors before
798 * doing the sockargs() call because sockargs() allocates an mbuf and
799 * an error after that means that I have to release the mbuf.
800 */
801 /* ARGSUSED */
802 static int
803 nfs_mount(struct mount *mp)
804 {
805 struct nfs_args args = {
806 .version = NFS_ARGSVERSION,
807 .addr = NULL,
808 .addrlen = sizeof (struct sockaddr_in),
809 .sotype = SOCK_STREAM,
810 .proto = 0,
811 .fh = NULL,
812 .fhsize = 0,
813 .flags = NFSMNT_RESVPORT,
814 .wsize = NFS_WSIZE,
815 .rsize = NFS_RSIZE,
816 .readdirsize = NFS_READDIRSIZE,
817 .timeo = 10,
818 .retrans = NFS_RETRANS,
819 .maxgrouplist = NFS_MAXGRPS,
820 .readahead = NFS_DEFRAHEAD,
821 .wcommitsize = 0, /* was: NQ_DEFLEASE */
822 .deadthresh = NFS_MAXDEADTHRESH, /* was: NQ_DEADTHRESH */
823 .hostname = NULL,
824 /* args version 4 */
825 .acregmin = NFS_MINATTRTIMO,
826 .acregmax = NFS_MAXATTRTIMO,
827 .acdirmin = NFS_MINDIRATTRTIMO,
828 .acdirmax = NFS_MAXDIRATTRTIMO,
829 };
830 int error, ret, has_nfs_args_opt;
831 int has_addr_opt, has_fh_opt, has_hostname_opt;
832 struct sockaddr *nam;
833 struct vnode *vp;
834 char hst[MNAMELEN];
835 size_t len;
836 u_char nfh[NFSX_V3FHMAX];
837 char *opt;
838 int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
839
840 has_nfs_args_opt = 0;
841 has_addr_opt = 0;
842 has_fh_opt = 0;
843 has_hostname_opt = 0;
844
845 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
846 error = EINVAL;
847 goto out;
848 }
849
850 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
851 error = nfs_mountroot(mp);
852 goto out;
853 }
854
855 /*
856 * The old mount_nfs program passed the struct nfs_args
857 * from userspace to kernel. The new mount_nfs program
858 * passes string options via nmount() from userspace to kernel
859 * and we populate the struct nfs_args in the kernel.
860 */
861 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
862 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
863 sizeof args);
864 if (error)
865 goto out;
866
867 if (args.version != NFS_ARGSVERSION) {
868 error = EPROGMISMATCH;
869 goto out;
870 }
871 has_nfs_args_opt = 1;
872 }
873
874 if (vfs_getopt(mp->mnt_optnew, "dumbtimer", NULL, NULL) == 0)
875 args.flags |= NFSMNT_DUMBTIMR;
876 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
877 args.flags |= NFSMNT_NOCONN;
878 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
879 args.flags |= NFSMNT_NOCONN;
880 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
881 args.flags |= NFSMNT_NOLOCKD;
882 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
883 args.flags &= ~NFSMNT_NOLOCKD;
884 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
885 args.flags |= NFSMNT_INT;
886 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
887 args.flags |= NFSMNT_RDIRPLUS;
888 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
889 args.flags |= NFSMNT_RESVPORT;
890 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
891 args.flags &= ~NFSMNT_RESVPORT;
892 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
893 args.flags |= NFSMNT_SOFT;
894 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
895 args.flags &= ~NFSMNT_SOFT;
896 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
897 args.sotype = SOCK_DGRAM;
898 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
899 args.sotype = SOCK_DGRAM;
900 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
901 args.sotype = SOCK_STREAM;
902 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
903 args.flags |= NFSMNT_NFSV3;
904 if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
905 args.flags |= NFSMNT_NOCTO;
906 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
907 if (opt == NULL) {
908 vfs_mount_error(mp, "illegal readdirsize");
909 error = EINVAL;
910 goto out;
911 }
912 ret = sscanf(opt, "%d", &args.readdirsize);
913 if (ret != 1 || args.readdirsize <= 0) {
914 vfs_mount_error(mp, "illegal readdirsize: %s",
915 opt);
916 error = EINVAL;
917 goto out;
918 }
919 args.flags |= NFSMNT_READDIRSIZE;
920 }
921 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
922 if (opt == NULL) {
923 vfs_mount_error(mp, "illegal readahead");
924 error = EINVAL;
925 goto out;
926 }
927 ret = sscanf(opt, "%d", &args.readahead);
928 if (ret != 1 || args.readahead <= 0) {
929 vfs_mount_error(mp, "illegal readahead: %s",
930 opt);
931 error = EINVAL;
932 goto out;
933 }
934 args.flags |= NFSMNT_READAHEAD;
935 }
936 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
937 if (opt == NULL) {
938 vfs_mount_error(mp, "illegal wsize");
939 error = EINVAL;
940 goto out;
941 }
942 ret = sscanf(opt, "%d", &args.wsize);
943 if (ret != 1 || args.wsize <= 0) {
944 vfs_mount_error(mp, "illegal wsize: %s",
945 opt);
946 error = EINVAL;
947 goto out;
948 }
949 args.flags |= NFSMNT_WSIZE;
950 }
951 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
952 if (opt == NULL) {
953 vfs_mount_error(mp, "illegal rsize");
954 error = EINVAL;
955 goto out;
956 }
957 ret = sscanf(opt, "%d", &args.rsize);
958 if (ret != 1 || args.rsize <= 0) {
959 vfs_mount_error(mp, "illegal wsize: %s",
960 opt);
961 error = EINVAL;
962 goto out;
963 }
964 args.flags |= NFSMNT_RSIZE;
965 }
966 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
967 if (opt == NULL) {
968 vfs_mount_error(mp, "illegal retrans");
969 error = EINVAL;
970 goto out;
971 }
972 ret = sscanf(opt, "%d", &args.retrans);
973 if (ret != 1 || args.retrans <= 0) {
974 vfs_mount_error(mp, "illegal retrans: %s",
975 opt);
976 error = EINVAL;
977 goto out;
978 }
979 args.flags |= NFSMNT_RETRANS;
980 }
981 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
982 ret = sscanf(opt, "%d", &args.acregmin);
983 if (ret != 1 || args.acregmin < 0) {
984 vfs_mount_error(mp, "illegal acregmin: %s",
985 opt);
986 error = EINVAL;
987 goto out;
988 }
989 args.flags |= NFSMNT_ACREGMIN;
990 }
991 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
992 ret = sscanf(opt, "%d", &args.acregmax);
993 if (ret != 1 || args.acregmax < 0) {
994 vfs_mount_error(mp, "illegal acregmax: %s",
995 opt);
996 error = EINVAL;
997 goto out;
998 }
999 args.flags |= NFSMNT_ACREGMAX;
1000 }
1001 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1002 ret = sscanf(opt, "%d", &args.acdirmin);
1003 if (ret != 1 || args.acdirmin < 0) {
1004 vfs_mount_error(mp, "illegal acdirmin: %s",
1005 opt);
1006 error = EINVAL;
1007 goto out;
1008 }
1009 args.flags |= NFSMNT_ACDIRMIN;
1010 }
1011 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1012 ret = sscanf(opt, "%d", &args.acdirmax);
1013 if (ret != 1 || args.acdirmax < 0) {
1014 vfs_mount_error(mp, "illegal acdirmax: %s",
1015 opt);
1016 error = EINVAL;
1017 goto out;
1018 }
1019 args.flags |= NFSMNT_ACDIRMAX;
1020 }
1021 if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1022 ret = sscanf(opt, "%d", &args.wcommitsize);
1023 if (ret != 1 || args.wcommitsize < 0) {
1024 vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1025 error = EINVAL;
1026 goto out;
1027 }
1028 args.flags |= NFSMNT_WCOMMITSIZE;
1029 }
1030 if (vfs_getopt(mp->mnt_optnew, "deadthresh", (void **)&opt, NULL) == 0) {
1031 ret = sscanf(opt, "%d", &args.deadthresh);
1032 if (ret != 1 || args.deadthresh <= 0) {
1033 vfs_mount_error(mp, "illegal deadthresh: %s",
1034 opt);
1035 error = EINVAL;
1036 goto out;
1037 }
1038 args.flags |= NFSMNT_DEADTHRESH;
1039 }
1040 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1041 ret = sscanf(opt, "%d", &args.timeo);
1042 if (ret != 1 || args.timeo <= 0) {
1043 vfs_mount_error(mp, "illegal timeout: %s",
1044 opt);
1045 error = EINVAL;
1046 goto out;
1047 }
1048 args.flags |= NFSMNT_TIMEO;
1049 }
1050 if (vfs_getopt(mp->mnt_optnew, "maxgroups", (void **)&opt, NULL) == 0) {
1051 ret = sscanf(opt, "%d", &args.maxgrouplist);
1052 if (ret != 1 || args.maxgrouplist <= 0) {
1053 vfs_mount_error(mp, "illegal maxgroups: %s",
1054 opt);
1055 error = EINVAL;
1056 goto out;
1057 }
1058 args.flags |= NFSMNT_MAXGRPS;
1059 }
1060 if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1061 == 0) {
1062 ret = sscanf(opt, "%d", &negnametimeo);
1063 if (ret != 1 || negnametimeo < 0) {
1064 vfs_mount_error(mp, "illegal negnametimeo: %s",
1065 opt);
1066 error = EINVAL;
1067 goto out;
1068 }
1069 }
1070 if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1071 &args.addrlen) == 0) {
1072 has_addr_opt = 1;
1073 if (args.addrlen > SOCK_MAXADDRLEN) {
1074 error = ENAMETOOLONG;
1075 goto out;
1076 }
1077 nam = malloc(args.addrlen, M_SONAME,
1078 M_WAITOK);
1079 bcopy(args.addr, nam, args.addrlen);
1080 nam->sa_len = args.addrlen;
1081 }
1082 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1083 &args.fhsize) == 0) {
1084 has_fh_opt = 1;
1085 }
1086 if (vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
1087 NULL) == 0) {
1088 has_hostname_opt = 1;
1089 }
1090 if (args.hostname == NULL) {
1091 vfs_mount_error(mp, "Invalid hostname");
1092 error = EINVAL;
1093 goto out;
1094 }
1095 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1096 vfs_mount_error(mp, "Bad file handle");
1097 error = EINVAL;
1098 goto out;
1099 }
1100
1101 if (mp->mnt_flag & MNT_UPDATE) {
1102 struct nfsmount *nmp = VFSTONFS(mp);
1103
1104 if (nmp == NULL) {
1105 error = EIO;
1106 goto out;
1107 }
1108
1109 /*
1110 * If a change from TCP->UDP is done and there are thread(s)
1111 * that have I/O RPC(s) in progress with a tranfer size
1112 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1113 * hung, retrying the RPC(s) forever. Usually these threads
1114 * will be seen doing an uninterruptible sleep on wait channel
1115 * "newnfsreq" (truncated to "newnfsre" by procstat).
1116 */
1117 if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1118 tprintf(curthread->td_proc, LOG_WARNING,
1119 "Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1120
1121 /*
1122 * When doing an update, we can't change from or to
1123 * v3, switch lockd strategies or change cookie translation
1124 */
1125 args.flags = (args.flags &
1126 ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1127 (nmp->nm_flag &
1128 (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1129 nfs_decode_args(mp, nmp, &args, NULL);
1130 goto out;
1131 }
1132
1133 /*
1134 * Make the nfs_ip_paranoia sysctl serve as the default connection
1135 * or no-connection mode for those protocols that support
1136 * no-connection mode (the flag will be cleared later for protocols
1137 * that do not support no-connection mode). This will allow a client
1138 * to receive replies from a different IP then the request was
1139 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1140 * not 0.
1141 */
1142 if (nfs_ip_paranoia == 0)
1143 args.flags |= NFSMNT_NOCONN;
1144
1145 if (has_nfs_args_opt) {
1146 /*
1147 * In the 'nfs_args' case, the pointers in the args
1148 * structure are in userland - we copy them in here.
1149 */
1150 if (!has_fh_opt) {
1151 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1152 args.fhsize);
1153 if (error) {
1154 goto out;
1155 }
1156 args.fh = nfh;
1157 }
1158 if (!has_hostname_opt) {
1159 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
1160 if (error) {
1161 goto out;
1162 }
1163 bzero(&hst[len], MNAMELEN - len);
1164 args.hostname = hst;
1165 }
1166 if (!has_addr_opt) {
1167 /* sockargs() call must be after above copyin() calls */
1168 error = getsockaddr(&nam, (caddr_t)args.addr,
1169 args.addrlen);
1170 if (error) {
1171 goto out;
1172 }
1173 }
1174 } else if (has_addr_opt == 0) {
1175 vfs_mount_error(mp, "No server address");
1176 error = EINVAL;
1177 goto out;
1178 }
1179 error = mountnfs(&args, mp, nam, args.hostname, &vp,
1180 curthread->td_ucred, negnametimeo);
1181 out:
1182 if (!error) {
1183 MNT_ILOCK(mp);
1184 mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1185 MNT_IUNLOCK(mp);
1186 }
1187 return (error);
1188 }
1189
1190
1191 /*
1192 * VFS Operations.
1193 *
1194 * mount system call
1195 * It seems a bit dumb to copyinstr() the host and path here and then
1196 * bcopy() them in mountnfs(), but I wanted to detect errors before
1197 * doing the sockargs() call because sockargs() allocates an mbuf and
1198 * an error after that means that I have to release the mbuf.
1199 */
1200 /* ARGSUSED */
1201 static int
1202 nfs_cmount(struct mntarg *ma, void *data, int flags)
1203 {
1204 int error;
1205 struct nfs_args args;
1206
1207 error = copyin(data, &args, sizeof (struct nfs_args));
1208 if (error)
1209 return error;
1210
1211 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1212
1213 error = kernel_mount(ma, flags);
1214 return (error);
1215 }
1216
1217 /*
1218 * Common code for mount and mountroot
1219 */
1220 static int
1221 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1222 char *hst, struct vnode **vpp, struct ucred *cred, int negnametimeo)
1223 {
1224 struct nfsmount *nmp;
1225 struct nfsnode *np;
1226 int error;
1227 struct vattr attrs;
1228
1229 if (mp->mnt_flag & MNT_UPDATE) {
1230 nmp = VFSTONFS(mp);
1231 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1232 free(nam, M_SONAME);
1233 return (0);
1234 } else {
1235 nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
1236 bzero((caddr_t)nmp, sizeof (struct nfsmount));
1237 TAILQ_INIT(&nmp->nm_bufq);
1238 mp->mnt_data = nmp;
1239 nmp->nm_getinfo = nfs_getnlminfo;
1240 nmp->nm_vinvalbuf = nfs_vinvalbuf;
1241 }
1242 vfs_getnewfsid(mp);
1243 nmp->nm_mountp = mp;
1244 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
1245
1246 /*
1247 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1248 * high, depending on whether we end up with negative offsets in
1249 * the client or server somewhere. 2GB-1 may be safer.
1250 *
1251 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
1252 * that we can handle until we find out otherwise.
1253 */
1254 if ((argp->flags & NFSMNT_NFSV3) == 0)
1255 nmp->nm_maxfilesize = 0xffffffffLL;
1256 else
1257 nmp->nm_maxfilesize = OFF_MAX;
1258
1259 nmp->nm_timeo = NFS_TIMEO;
1260 nmp->nm_retry = NFS_RETRANS;
1261 if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) {
1262 nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA;
1263 } else {
1264 nmp->nm_wsize = NFS_WSIZE;
1265 nmp->nm_rsize = NFS_RSIZE;
1266 }
1267 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1268 nmp->nm_readdirsize = NFS_READDIRSIZE;
1269 nmp->nm_numgrps = NFS_MAXGRPS;
1270 nmp->nm_readahead = NFS_DEFRAHEAD;
1271 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
1272 nmp->nm_negnametimeo = negnametimeo;
1273 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1274 if (nmp->nm_tprintf_delay < 0)
1275 nmp->nm_tprintf_delay = 0;
1276 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1277 if (nmp->nm_tprintf_initial_delay < 0)
1278 nmp->nm_tprintf_initial_delay = 0;
1279 nmp->nm_fhsize = argp->fhsize;
1280 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1281 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1282 nmp->nm_nam = nam;
1283 /* Set up the sockets and per-host congestion */
1284 nmp->nm_sotype = argp->sotype;
1285 nmp->nm_soproto = argp->proto;
1286 nmp->nm_rpcops = &nfs_rpcops;
1287
1288 nfs_decode_args(mp, nmp, argp, hst);
1289
1290 /*
1291 * For Connection based sockets (TCP,...) defer the connect until
1292 * the first request, in case the server is not responding.
1293 */
1294 if (nmp->nm_sotype == SOCK_DGRAM &&
1295 (error = nfs_connect(nmp)))
1296 goto bad;
1297
1298 /*
1299 * This is silly, but it has to be set so that vinifod() works.
1300 * We do not want to do an nfs_statfs() here since we can get
1301 * stuck on a dead server and we are holding a lock on the mount
1302 * point.
1303 */
1304 mtx_lock(&nmp->nm_mtx);
1305 mp->mnt_stat.f_iosize = nfs_iosize(nmp);
1306 mtx_unlock(&nmp->nm_mtx);
1307 /*
1308 * A reference count is needed on the nfsnode representing the
1309 * remote root. If this object is not persistent, then backward
1310 * traversals of the mount point (i.e. "..") will not work if
1311 * the nfsnode gets flushed out of the cache. Ufs does not have
1312 * this problem, because one can identify root inodes by their
1313 * number == ROOTINO (2).
1314 */
1315 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
1316 if (error)
1317 goto bad;
1318 *vpp = NFSTOV(np);
1319
1320 /*
1321 * Get file attributes and transfer parameters for the
1322 * mountpoint. This has the side effect of filling in
1323 * (*vpp)->v_type with the correct value.
1324 */
1325 if (argp->flags & NFSMNT_NFSV3)
1326 nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread);
1327 else
1328 VOP_GETATTR(*vpp, &attrs, curthread->td_ucred);
1329
1330 /*
1331 * Lose the lock but keep the ref.
1332 */
1333 VOP_UNLOCK(*vpp, 0);
1334
1335 return (0);
1336 bad:
1337 nfs_disconnect(nmp);
1338 mtx_destroy(&nmp->nm_mtx);
1339 uma_zfree(nfsmount_zone, nmp);
1340 free(nam, M_SONAME);
1341 return (error);
1342 }
1343
1344 /*
1345 * unmount system call
1346 */
1347 static int
1348 nfs_unmount(struct mount *mp, int mntflags)
1349 {
1350 struct nfsmount *nmp;
1351 int error, flags = 0;
1352
1353 if (mntflags & MNT_FORCE)
1354 flags |= FORCECLOSE;
1355 nmp = VFSTONFS(mp);
1356 /*
1357 * Goes something like this..
1358 * - Call vflush() to clear out vnodes for this filesystem
1359 * - Close the socket
1360 * - Free up the data structures
1361 */
1362 /* In the forced case, cancel any outstanding requests. */
1363 if (flags & FORCECLOSE) {
1364 error = nfs_nmcancelreqs(nmp);
1365 if (error)
1366 goto out;
1367 }
1368 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1369 error = vflush(mp, 1, flags, curthread);
1370 if (error)
1371 goto out;
1372
1373 /*
1374 * We are now committed to the unmount.
1375 */
1376 nfs_disconnect(nmp);
1377 free(nmp->nm_nam, M_SONAME);
1378
1379 mtx_destroy(&nmp->nm_mtx);
1380 uma_zfree(nfsmount_zone, nmp);
1381 out:
1382 return (error);
1383 }
1384
1385 /*
1386 * Return root of a filesystem
1387 */
1388 static int
1389 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1390 {
1391 struct vnode *vp;
1392 struct nfsmount *nmp;
1393 struct nfsnode *np;
1394 int error;
1395
1396 nmp = VFSTONFS(mp);
1397 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1398 if (error)
1399 return error;
1400 vp = NFSTOV(np);
1401 /*
1402 * Get transfer parameters and attributes for root vnode once.
1403 */
1404 mtx_lock(&nmp->nm_mtx);
1405 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
1406 (nmp->nm_flag & NFSMNT_NFSV3)) {
1407 mtx_unlock(&nmp->nm_mtx);
1408 nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1409 } else
1410 mtx_unlock(&nmp->nm_mtx);
1411 if (vp->v_type == VNON)
1412 vp->v_type = VDIR;
1413 vp->v_vflag |= VV_ROOT;
1414 *vpp = vp;
1415 return (0);
1416 }
1417
1418 /*
1419 * Flush out the buffer cache
1420 */
1421 /* ARGSUSED */
1422 static int
1423 nfs_sync(struct mount *mp, int waitfor)
1424 {
1425 struct vnode *vp, *mvp;
1426 struct thread *td;
1427 int error, allerror = 0;
1428
1429 td = curthread;
1430
1431 MNT_ILOCK(mp);
1432 /*
1433 * If a forced dismount is in progress, return from here so that
1434 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1435 * calling VFS_UNMOUNT().
1436 */
1437 if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1438 MNT_IUNLOCK(mp);
1439 return (EBADF);
1440 }
1441
1442 /*
1443 * Force stale buffer cache information to be flushed.
1444 */
1445 loop:
1446 MNT_VNODE_FOREACH(vp, mp, mvp) {
1447 VI_LOCK(vp);
1448 MNT_IUNLOCK(mp);
1449 /* XXX Racy bv_cnt check. */
1450 if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1451 waitfor == MNT_LAZY) {
1452 VI_UNLOCK(vp);
1453 MNT_ILOCK(mp);
1454 continue;
1455 }
1456 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1457 MNT_ILOCK(mp);
1458 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1459 goto loop;
1460 }
1461 error = VOP_FSYNC(vp, waitfor, td);
1462 if (error)
1463 allerror = error;
1464 VOP_UNLOCK(vp, 0);
1465 vrele(vp);
1466
1467 MNT_ILOCK(mp);
1468 }
1469 MNT_IUNLOCK(mp);
1470 return (allerror);
1471 }
1472
1473 static int
1474 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1475 {
1476 struct nfsmount *nmp = VFSTONFS(mp);
1477 struct vfsquery vq;
1478 int error;
1479
1480 bzero(&vq, sizeof(vq));
1481 switch (op) {
1482 #if 0
1483 case VFS_CTL_NOLOCKS:
1484 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1485 if (req->oldptr != NULL) {
1486 error = SYSCTL_OUT(req, &val, sizeof(val));
1487 if (error)
1488 return (error);
1489 }
1490 if (req->newptr != NULL) {
1491 error = SYSCTL_IN(req, &val, sizeof(val));
1492 if (error)
1493 return (error);
1494 if (val)
1495 nmp->nm_flag |= NFSMNT_NOLOCKS;
1496 else
1497 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1498 }
1499 break;
1500 #endif
1501 case VFS_CTL_QUERY:
1502 mtx_lock(&nmp->nm_mtx);
1503 if (nmp->nm_state & NFSSTA_TIMEO)
1504 vq.vq_flags |= VQ_NOTRESP;
1505 mtx_unlock(&nmp->nm_mtx);
1506 #if 0
1507 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1508 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1509 vq.vq_flags |= VQ_NOTRESPLOCK;
1510 #endif
1511 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1512 break;
1513 case VFS_CTL_TIMEO:
1514 if (req->oldptr != NULL) {
1515 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1516 sizeof(nmp->nm_tprintf_initial_delay));
1517 if (error)
1518 return (error);
1519 }
1520 if (req->newptr != NULL) {
1521 error = vfs_suser(mp, req->td);
1522 if (error)
1523 return (error);
1524 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1525 sizeof(nmp->nm_tprintf_initial_delay));
1526 if (error)
1527 return (error);
1528 if (nmp->nm_tprintf_initial_delay < 0)
1529 nmp->nm_tprintf_initial_delay = 0;
1530 }
1531 break;
1532 default:
1533 return (ENOTSUP);
1534 }
1535 return (0);
1536 }
1537
1538 /*
1539 * Extract the information needed by the nlm from the nfs vnode.
1540 */
1541 static void
1542 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1543 struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1544 struct timeval *timeop)
1545 {
1546 struct nfsmount *nmp;
1547 struct nfsnode *np = VTONFS(vp);
1548
1549 nmp = VFSTONFS(vp->v_mount);
1550 if (fhlenp != NULL)
1551 *fhlenp = (size_t)np->n_fhsize;
1552 if (fhp != NULL)
1553 bcopy(np->n_fhp, fhp, np->n_fhsize);
1554 if (sp != NULL)
1555 bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1556 if (is_v3p != NULL)
1557 *is_v3p = NFS_ISV3(vp);
1558 if (sizep != NULL)
1559 *sizep = np->n_size;
1560 if (timeop != NULL) {
1561 timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1562 timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1563 }
1564 }
1565
Cache object: 15e014c7a643330ad0881c39569bdb10
|