1 /*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: releng/8.2/sys/nfsclient/nfs_vfsops.c 215091 2010-11-10 17:17:38Z jh $");
37
38
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/jail.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/mount.h>
53 #include <sys/proc.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/vnode.h>
59 #include <sys/signalvar.h>
60
61 #include <vm/vm.h>
62 #include <vm/vm_extern.h>
63 #include <vm/uma.h>
64
65 #include <net/if.h>
66 #include <net/route.h>
67 #include <net/vnet.h>
68
69 #include <netinet/in.h>
70
71 #include <rpc/rpc.h>
72
73 #include <nfs/nfsproto.h>
74 #include <nfsclient/nfs.h>
75 #include <nfsclient/nfsnode.h>
76 #include <nfsclient/nfsmount.h>
77 #include <nfs/xdr_subs.h>
78 #include <nfsclient/nfsm_subs.h>
79 #include <nfsclient/nfsdiskless.h>
80
81 MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
82 MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
83 MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
84 MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
85 MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
86
87 uma_zone_t nfsmount_zone;
88
89 struct nfsstats nfsstats;
90
91 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
92 SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
93 &nfsstats, nfsstats, "S,nfsstats");
94 static int nfs_ip_paranoia = 1;
95 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
96 &nfs_ip_paranoia, 0,
97 "Disallow accepting replies from IPs which differ from those sent");
98 #ifdef NFS_DEBUG
99 int nfs_debug;
100 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
101 "Toggle debug flag");
102 #endif
103 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
104 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
105 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0,
106 "Delay before printing \"nfs server not responding\" messages");
107 /* how long between console messages "nfs server foo not responding" */
108 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
109 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
110 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0,
111 "Delay between printing \"nfs server not responding\" messages");
112
113 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
114 struct nfs_args *argp, const char *hostname);
115 static int mountnfs(struct nfs_args *, struct mount *,
116 struct sockaddr *, char *, struct vnode **,
117 struct ucred *cred, int);
118 static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
119 struct sockaddr_storage *, int *, off_t *);
120 static vfs_mount_t nfs_mount;
121 static vfs_cmount_t nfs_cmount;
122 static vfs_unmount_t nfs_unmount;
123 static vfs_root_t nfs_root;
124 static vfs_statfs_t nfs_statfs;
125 static vfs_sync_t nfs_sync;
126 static vfs_sysctl_t nfs_sysctl;
127
128 static int fake_wchan;
129
130 /*
131 * nfs vfs operations.
132 */
133 static struct vfsops nfs_vfsops = {
134 .vfs_init = nfs_init,
135 .vfs_mount = nfs_mount,
136 .vfs_cmount = nfs_cmount,
137 .vfs_root = nfs_root,
138 .vfs_statfs = nfs_statfs,
139 .vfs_sync = nfs_sync,
140 .vfs_uninit = nfs_uninit,
141 .vfs_unmount = nfs_unmount,
142 .vfs_sysctl = nfs_sysctl,
143 };
144 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
145
146 /* So that loader and kldload(2) can find us, wherever we are.. */
147 MODULE_VERSION(nfs, 1);
148 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
149 #ifdef KGSSAPI
150 MODULE_DEPEND(nfs, kgssapi, 1, 1, 1);
151 #endif
152 MODULE_DEPEND(nfs, nfs_common, 1, 1, 1);
153 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
154
155 static struct nfs_rpcops nfs_rpcops = {
156 nfs_readrpc,
157 nfs_writerpc,
158 nfs_writebp,
159 nfs_readlinkrpc,
160 nfs_invaldir,
161 nfs_commit,
162 };
163
164 /*
165 * This structure must be filled in by a primary bootstrap or bootstrap
166 * server for a diskless/dataless machine. It is initialized below just
167 * to ensure that it is allocated to initialized data (.data not .bss).
168 */
169 struct nfs_diskless nfs_diskless = { { { 0 } } };
170 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
171 int nfs_diskless_valid = 0;
172
173 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
174 &nfs_diskless_valid, 0,
175 "Has the diskless struct been filled correctly");
176
177 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
178 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
179
180 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
181 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
182 "%Ssockaddr_in", "Diskless root nfs address");
183
184
185 void nfsargs_ntoh(struct nfs_args *);
186 static int nfs_mountdiskless(char *,
187 struct sockaddr_in *, struct nfs_args *,
188 struct thread *, struct vnode **, struct mount *);
189 static void nfs_convert_diskless(void);
190 static void nfs_convert_oargs(struct nfs_args *args,
191 struct onfs_args *oargs);
192
193 int
194 nfs_iosize(struct nfsmount *nmp)
195 {
196 int iosize;
197
198 /*
199 * Calculate the size used for io buffers. Use the larger
200 * of the two sizes to minimise nfs requests but make sure
201 * that it is at least one VM page to avoid wasting buffer
202 * space.
203 */
204 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
205 iosize = imax(iosize, PAGE_SIZE);
206 return (iosize);
207 }
208
209 static void
210 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
211 {
212
213 args->version = NFS_ARGSVERSION;
214 args->addr = oargs->addr;
215 args->addrlen = oargs->addrlen;
216 args->sotype = oargs->sotype;
217 args->proto = oargs->proto;
218 args->fh = oargs->fh;
219 args->fhsize = oargs->fhsize;
220 args->flags = oargs->flags;
221 args->wsize = oargs->wsize;
222 args->rsize = oargs->rsize;
223 args->readdirsize = oargs->readdirsize;
224 args->timeo = oargs->timeo;
225 args->retrans = oargs->retrans;
226 args->maxgrouplist = oargs->maxgrouplist;
227 args->readahead = oargs->readahead;
228 args->deadthresh = oargs->deadthresh;
229 args->hostname = oargs->hostname;
230 }
231
232 static void
233 nfs_convert_diskless(void)
234 {
235
236 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
237 sizeof(struct ifaliasreq));
238 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
239 sizeof(struct sockaddr_in));
240 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
241 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
242 nfsv3_diskless.root_fhsize = NFSX_V3FH;
243 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
244 } else {
245 nfsv3_diskless.root_fhsize = NFSX_V2FH;
246 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
247 }
248 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
249 sizeof(struct sockaddr_in));
250 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
251 nfsv3_diskless.root_time = nfs_diskless.root_time;
252 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
253 MAXHOSTNAMELEN);
254 nfs_diskless_valid = 3;
255 }
256
257 /*
258 * nfs statfs call
259 */
260 static int
261 nfs_statfs(struct mount *mp, struct statfs *sbp)
262 {
263 struct vnode *vp;
264 struct thread *td;
265 struct nfs_statfs *sfp;
266 caddr_t bpos, dpos;
267 struct nfsmount *nmp = VFSTONFS(mp);
268 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
269 struct mbuf *mreq, *mrep, *md, *mb;
270 struct nfsnode *np;
271 u_quad_t tquad;
272
273 td = curthread;
274 #ifndef nolint
275 sfp = NULL;
276 #endif
277 error = vfs_busy(mp, MBF_NOWAIT);
278 if (error)
279 return (error);
280 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
281 if (error) {
282 vfs_unbusy(mp);
283 return (error);
284 }
285 vp = NFSTOV(np);
286 mtx_lock(&nmp->nm_mtx);
287 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
288 mtx_unlock(&nmp->nm_mtx);
289 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
290 } else
291 mtx_unlock(&nmp->nm_mtx);
292 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
293 mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
294 mb = mreq;
295 bpos = mtod(mb, caddr_t);
296 nfsm_fhtom(vp, v3);
297 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
298 if (v3)
299 nfsm_postop_attr(vp, retattr);
300 if (error) {
301 if (mrep != NULL)
302 m_freem(mrep);
303 goto nfsmout;
304 }
305 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
306 mtx_lock(&nmp->nm_mtx);
307 sbp->f_iosize = nfs_iosize(nmp);
308 mtx_unlock(&nmp->nm_mtx);
309 if (v3) {
310 sbp->f_bsize = NFS_FABLKSIZE;
311 tquad = fxdr_hyper(&sfp->sf_tbytes);
312 sbp->f_blocks = tquad / NFS_FABLKSIZE;
313 tquad = fxdr_hyper(&sfp->sf_fbytes);
314 sbp->f_bfree = tquad / NFS_FABLKSIZE;
315 tquad = fxdr_hyper(&sfp->sf_abytes);
316 sbp->f_bavail = tquad / NFS_FABLKSIZE;
317 sbp->f_files = (fxdr_unsigned(int32_t,
318 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
319 sbp->f_ffree = (fxdr_unsigned(int32_t,
320 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
321 } else {
322 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
323 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
324 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
325 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
326 sbp->f_files = 0;
327 sbp->f_ffree = 0;
328 }
329 m_freem(mrep);
330 nfsmout:
331 vput(vp);
332 vfs_unbusy(mp);
333 return (error);
334 }
335
336 /*
337 * nfs version 3 fsinfo rpc call
338 */
339 int
340 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
341 struct thread *td)
342 {
343 struct nfsv3_fsinfo *fsp;
344 u_int32_t pref, max;
345 caddr_t bpos, dpos;
346 int error = 0, retattr;
347 struct mbuf *mreq, *mrep, *md, *mb;
348 u_int64_t maxfsize;
349
350 nfsstats.rpccnt[NFSPROC_FSINFO]++;
351 mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
352 mb = mreq;
353 bpos = mtod(mb, caddr_t);
354 nfsm_fhtom(vp, 1);
355 nfsm_request(vp, NFSPROC_FSINFO, td, cred);
356 nfsm_postop_attr(vp, retattr);
357 if (!error) {
358 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
359 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
360 mtx_lock(&nmp->nm_mtx);
361 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
362 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
363 ~(NFS_FABLKSIZE - 1);
364 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
365 if (max < nmp->nm_wsize && max > 0) {
366 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
367 if (nmp->nm_wsize == 0)
368 nmp->nm_wsize = max;
369 }
370 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
371 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
372 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
373 ~(NFS_FABLKSIZE - 1);
374 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
375 if (max < nmp->nm_rsize && max > 0) {
376 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
377 if (nmp->nm_rsize == 0)
378 nmp->nm_rsize = max;
379 }
380 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
381 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
382 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
383 ~(NFS_DIRBLKSIZ - 1);
384 if (max < nmp->nm_readdirsize && max > 0) {
385 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
386 if (nmp->nm_readdirsize == 0)
387 nmp->nm_readdirsize = max;
388 }
389 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
390 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
391 nmp->nm_maxfilesize = maxfsize;
392 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
393 nmp->nm_state |= NFSSTA_GOTFSINFO;
394 mtx_unlock(&nmp->nm_mtx);
395 }
396 m_freem(mrep);
397 nfsmout:
398 return (error);
399 }
400
401 /*
402 * Mount a remote root fs via. nfs. This depends on the info in the
403 * nfs_diskless structure that has been filled in properly by some primary
404 * bootstrap.
405 * It goes something like this:
406 * - do enough of "ifconfig" by calling ifioctl() so that the system
407 * can talk to the server
408 * - If nfs_diskless.mygateway is filled in, use that address as
409 * a default gateway.
410 * - build the rootfs mount point and call mountnfs() to do the rest.
411 *
412 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
413 * structure, as well as other global NFS client variables here, as
414 * nfs_mountroot() will be called once in the boot before any other NFS
415 * client activity occurs.
416 */
417 int
418 nfs_mountroot(struct mount *mp)
419 {
420 struct thread *td = curthread;
421 struct nfsv3_diskless *nd = &nfsv3_diskless;
422 struct socket *so;
423 struct vnode *vp;
424 struct ifreq ir;
425 int error;
426 u_long l;
427 char buf[128];
428 char *cp;
429
430 CURVNET_SET(TD_TO_VNET(td));
431
432 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
433 bootpc_init(); /* use bootp to get nfs_diskless filled in */
434 #elif defined(NFS_ROOT)
435 nfs_setup_diskless();
436 #endif
437
438 if (nfs_diskless_valid == 0) {
439 CURVNET_RESTORE();
440 return (-1);
441 }
442 if (nfs_diskless_valid == 1)
443 nfs_convert_diskless();
444
445 /*
446 * XXX splnet, so networks will receive...
447 */
448 splnet();
449
450 /*
451 * Do enough of ifconfig(8) so that the critical net interface can
452 * talk to the server.
453 */
454 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
455 td->td_ucred, td);
456 if (error)
457 panic("nfs_mountroot: socreate(%04x): %d",
458 nd->myif.ifra_addr.sa_family, error);
459
460 #if 0 /* XXX Bad idea */
461 /*
462 * We might not have been told the right interface, so we pass
463 * over the first ten interfaces of the same kind, until we get
464 * one of them configured.
465 */
466
467 for (i = strlen(nd->myif.ifra_name) - 1;
468 nd->myif.ifra_name[i] >= '' &&
469 nd->myif.ifra_name[i] <= '9';
470 nd->myif.ifra_name[i] ++) {
471 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
472 if(!error)
473 break;
474 }
475 #endif
476
477 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
478 if (error)
479 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
480
481 if ((cp = getenv("boot.netif.mtu")) != NULL) {
482 ir.ifr_mtu = strtol(cp, NULL, 10);
483 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
484 freeenv(cp);
485 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
486 if (error)
487 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
488 }
489 soclose(so);
490
491 /*
492 * If the gateway field is filled in, set it as the default route.
493 * Note that pxeboot will set a default route of 0 if the route
494 * is not set by the DHCP server. Check also for a value of 0
495 * to avoid panicking inappropriately in that situation.
496 */
497 if (nd->mygateway.sin_len != 0 &&
498 nd->mygateway.sin_addr.s_addr != 0) {
499 struct sockaddr_in mask, sin;
500
501 bzero((caddr_t)&mask, sizeof(mask));
502 sin = mask;
503 sin.sin_family = AF_INET;
504 sin.sin_len = sizeof(sin);
505 /* XXX MRT use table 0 for this sort of thing */
506 error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
507 (struct sockaddr *)&nd->mygateway,
508 (struct sockaddr *)&mask,
509 RTF_UP | RTF_GATEWAY, NULL);
510 if (error)
511 panic("nfs_mountroot: RTM_ADD: %d", error);
512 }
513
514 /*
515 * Create the rootfs mount point.
516 */
517 nd->root_args.fh = nd->root_fh;
518 nd->root_args.fhsize = nd->root_fhsize;
519 l = ntohl(nd->root_saddr.sin_addr.s_addr);
520 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
521 (l >> 24) & 0xff, (l >> 16) & 0xff,
522 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
523 printf("NFS ROOT: %s\n", buf);
524 nd->root_args.hostname = buf;
525 if ((error = nfs_mountdiskless(buf,
526 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
527 CURVNET_RESTORE();
528 return (error);
529 }
530
531 /*
532 * This is not really an nfs issue, but it is much easier to
533 * set hostname here and then let the "/etc/rc.xxx" files
534 * mount the right /var based upon its preset value.
535 */
536 mtx_lock(&prison0.pr_mtx);
537 strlcpy(prison0.pr_hostname, nd->my_hostnam,
538 sizeof (prison0.pr_hostname));
539 mtx_unlock(&prison0.pr_mtx);
540 inittodr(ntohl(nd->root_time));
541 CURVNET_RESTORE();
542 return (0);
543 }
544
545 /*
546 * Internal version of mount system call for diskless setup.
547 */
548 static int
549 nfs_mountdiskless(char *path,
550 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
551 struct vnode **vpp, struct mount *mp)
552 {
553 struct sockaddr *nam;
554 int error;
555
556 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
557 if ((error = mountnfs(args, mp, nam, path, vpp,
558 td->td_ucred, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
559 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
560 return (error);
561 }
562 return (0);
563 }
564
565 static int
566 nfs_sec_name_to_num(char *sec)
567 {
568 if (!strcmp(sec, "krb5"))
569 return (RPCSEC_GSS_KRB5);
570 if (!strcmp(sec, "krb5i"))
571 return (RPCSEC_GSS_KRB5I);
572 if (!strcmp(sec, "krb5p"))
573 return (RPCSEC_GSS_KRB5P);
574 if (!strcmp(sec, "sys"))
575 return (AUTH_SYS);
576 /*
577 * Userland should validate the string but we will try and
578 * cope with unexpected values.
579 */
580 return (AUTH_SYS);
581 }
582
583 static void
584 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
585 const char *hostname)
586 {
587 int s;
588 int adjsock;
589 int maxio;
590 char *p;
591 char *secname;
592 char *principal;
593
594 s = splnet();
595
596 /*
597 * Set read-only flag if requested; otherwise, clear it if this is
598 * an update. If this is not an update, then either the read-only
599 * flag is already clear, or this is a root mount and it was set
600 * intentionally at some previous point.
601 */
602 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
603 MNT_ILOCK(mp);
604 mp->mnt_flag |= MNT_RDONLY;
605 MNT_IUNLOCK(mp);
606 } else if (mp->mnt_flag & MNT_UPDATE) {
607 MNT_ILOCK(mp);
608 mp->mnt_flag &= ~MNT_RDONLY;
609 MNT_IUNLOCK(mp);
610 }
611
612 /*
613 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
614 * no sense in that context. Also, set up appropriate retransmit
615 * and soft timeout behavior.
616 */
617 if (argp->sotype == SOCK_STREAM) {
618 nmp->nm_flag &= ~NFSMNT_NOCONN;
619 nmp->nm_flag |= NFSMNT_DUMBTIMR;
620 nmp->nm_timeo = NFS_MAXTIMEO;
621 nmp->nm_retry = NFS_RETRANS_TCP;
622 }
623
624 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
625 if ((argp->flags & NFSMNT_NFSV3) == 0)
626 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
627
628 /* Re-bind if rsrvd port requested and wasn't on one */
629 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
630 && (argp->flags & NFSMNT_RESVPORT);
631 /* Also re-bind if we're switching to/from a connected UDP socket */
632 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
633 (argp->flags & NFSMNT_NOCONN));
634
635 /* Update flags atomically. Don't change the lock bits. */
636 nmp->nm_flag = argp->flags | nmp->nm_flag;
637 splx(s);
638
639 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
640 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
641 if (nmp->nm_timeo < NFS_MINTIMEO)
642 nmp->nm_timeo = NFS_MINTIMEO;
643 else if (nmp->nm_timeo > NFS_MAXTIMEO)
644 nmp->nm_timeo = NFS_MAXTIMEO;
645 }
646
647 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
648 nmp->nm_retry = argp->retrans;
649 if (nmp->nm_retry > NFS_MAXREXMIT)
650 nmp->nm_retry = NFS_MAXREXMIT;
651 }
652
653 if (argp->flags & NFSMNT_NFSV3) {
654 if (argp->sotype == SOCK_DGRAM)
655 maxio = NFS_MAXDGRAMDATA;
656 else
657 maxio = NFS_MAXDATA;
658 } else
659 maxio = NFS_V2MAXDATA;
660
661 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
662 nmp->nm_wsize = argp->wsize;
663 /* Round down to multiple of blocksize */
664 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
665 if (nmp->nm_wsize <= 0)
666 nmp->nm_wsize = NFS_FABLKSIZE;
667 }
668 if (nmp->nm_wsize > maxio)
669 nmp->nm_wsize = maxio;
670 if (nmp->nm_wsize > MAXBSIZE)
671 nmp->nm_wsize = MAXBSIZE;
672
673 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
674 nmp->nm_rsize = argp->rsize;
675 /* Round down to multiple of blocksize */
676 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
677 if (nmp->nm_rsize <= 0)
678 nmp->nm_rsize = NFS_FABLKSIZE;
679 }
680 if (nmp->nm_rsize > maxio)
681 nmp->nm_rsize = maxio;
682 if (nmp->nm_rsize > MAXBSIZE)
683 nmp->nm_rsize = MAXBSIZE;
684
685 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
686 nmp->nm_readdirsize = argp->readdirsize;
687 }
688 if (nmp->nm_readdirsize > maxio)
689 nmp->nm_readdirsize = maxio;
690 if (nmp->nm_readdirsize > nmp->nm_rsize)
691 nmp->nm_readdirsize = nmp->nm_rsize;
692
693 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
694 nmp->nm_acregmin = argp->acregmin;
695 else
696 nmp->nm_acregmin = NFS_MINATTRTIMO;
697 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
698 nmp->nm_acregmax = argp->acregmax;
699 else
700 nmp->nm_acregmax = NFS_MAXATTRTIMO;
701 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
702 nmp->nm_acdirmin = argp->acdirmin;
703 else
704 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
705 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
706 nmp->nm_acdirmax = argp->acdirmax;
707 else
708 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
709 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
710 nmp->nm_acdirmin = nmp->nm_acdirmax;
711 if (nmp->nm_acregmin > nmp->nm_acregmax)
712 nmp->nm_acregmin = nmp->nm_acregmax;
713
714 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
715 if (argp->maxgrouplist <= NFS_MAXGRPS)
716 nmp->nm_numgrps = argp->maxgrouplist;
717 else
718 nmp->nm_numgrps = NFS_MAXGRPS;
719 }
720 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
721 if (argp->readahead <= NFS_MAXRAHEAD)
722 nmp->nm_readahead = argp->readahead;
723 else
724 nmp->nm_readahead = NFS_MAXRAHEAD;
725 }
726 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
727 if (argp->wcommitsize < nmp->nm_wsize)
728 nmp->nm_wcommitsize = nmp->nm_wsize;
729 else
730 nmp->nm_wcommitsize = argp->wcommitsize;
731 }
732 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
733 if (argp->deadthresh <= NFS_MAXDEADTHRESH)
734 nmp->nm_deadthresh = argp->deadthresh;
735 else
736 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
737 }
738
739 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
740 (nmp->nm_soproto != argp->proto));
741 nmp->nm_sotype = argp->sotype;
742 nmp->nm_soproto = argp->proto;
743
744 if (nmp->nm_client && adjsock) {
745 nfs_safedisconnect(nmp);
746 if (nmp->nm_sotype == SOCK_DGRAM)
747 while (nfs_connect(nmp)) {
748 printf("nfs_args: retrying connect\n");
749 (void) tsleep(&fake_wchan, PSOCK, "nfscon", hz);
750 }
751 }
752
753 if (hostname) {
754 strlcpy(nmp->nm_hostname, hostname,
755 sizeof(nmp->nm_hostname));
756 p = strchr(nmp->nm_hostname, ':');
757 if (p)
758 *p = '\0';
759 }
760
761 if (vfs_getopt(mp->mnt_optnew, "sec",
762 (void **) &secname, NULL) == 0) {
763 nmp->nm_secflavor = nfs_sec_name_to_num(secname);
764 } else {
765 nmp->nm_secflavor = AUTH_SYS;
766 }
767
768 if (vfs_getopt(mp->mnt_optnew, "principal",
769 (void **) &principal, NULL) == 0) {
770 strlcpy(nmp->nm_principal, principal,
771 sizeof(nmp->nm_principal));
772 } else {
773 snprintf(nmp->nm_principal, sizeof(nmp->nm_principal),
774 "nfs@%s", nmp->nm_hostname);
775 }
776 }
777
778 static const char *nfs_opts[] = { "from", "nfs_args",
779 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
780 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
781 "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
782 "readahead", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp",
783 "wsize", "rsize", "retrans", "acregmin", "acregmax", "acdirmin",
784 "acdirmax", "deadthresh", "hostname", "timeout", "addr", "fh", "nfsv3",
785 "sec", "maxgroups", "principal", "negnametimeo",
786 NULL };
787
788 /*
789 * VFS Operations.
790 *
791 * mount system call
792 * It seems a bit dumb to copyinstr() the host and path here and then
793 * bcopy() them in mountnfs(), but I wanted to detect errors before
794 * doing the sockargs() call because sockargs() allocates an mbuf and
795 * an error after that means that I have to release the mbuf.
796 */
797 /* ARGSUSED */
798 static int
799 nfs_mount(struct mount *mp)
800 {
801 struct nfs_args args = {
802 .version = NFS_ARGSVERSION,
803 .addr = NULL,
804 .addrlen = sizeof (struct sockaddr_in),
805 .sotype = SOCK_STREAM,
806 .proto = 0,
807 .fh = NULL,
808 .fhsize = 0,
809 .flags = NFSMNT_RESVPORT,
810 .wsize = NFS_WSIZE,
811 .rsize = NFS_RSIZE,
812 .readdirsize = NFS_READDIRSIZE,
813 .timeo = 10,
814 .retrans = NFS_RETRANS,
815 .maxgrouplist = NFS_MAXGRPS,
816 .readahead = NFS_DEFRAHEAD,
817 .wcommitsize = 0, /* was: NQ_DEFLEASE */
818 .deadthresh = NFS_MAXDEADTHRESH, /* was: NQ_DEADTHRESH */
819 .hostname = NULL,
820 /* args version 4 */
821 .acregmin = NFS_MINATTRTIMO,
822 .acregmax = NFS_MAXATTRTIMO,
823 .acdirmin = NFS_MINDIRATTRTIMO,
824 .acdirmax = NFS_MAXDIRATTRTIMO,
825 };
826 int error, ret, has_nfs_args_opt;
827 int has_addr_opt, has_fh_opt, has_hostname_opt;
828 struct sockaddr *nam;
829 struct vnode *vp;
830 char hst[MNAMELEN];
831 size_t len;
832 u_char nfh[NFSX_V3FHMAX];
833 char *opt;
834 int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
835
836 has_nfs_args_opt = 0;
837 has_addr_opt = 0;
838 has_fh_opt = 0;
839 has_hostname_opt = 0;
840
841 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
842 error = EINVAL;
843 goto out;
844 }
845
846 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
847 error = nfs_mountroot(mp);
848 goto out;
849 }
850
851 /*
852 * The old mount_nfs program passed the struct nfs_args
853 * from userspace to kernel. The new mount_nfs program
854 * passes string options via nmount() from userspace to kernel
855 * and we populate the struct nfs_args in the kernel.
856 */
857 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
858 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
859 sizeof args);
860 if (error)
861 goto out;
862
863 if (args.version != NFS_ARGSVERSION) {
864 error = EPROGMISMATCH;
865 goto out;
866 }
867 has_nfs_args_opt = 1;
868 }
869
870 if (vfs_getopt(mp->mnt_optnew, "dumbtimer", NULL, NULL) == 0)
871 args.flags |= NFSMNT_DUMBTIMR;
872 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
873 args.flags |= NFSMNT_NOCONN;
874 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
875 args.flags |= NFSMNT_NOCONN;
876 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
877 args.flags |= NFSMNT_NOLOCKD;
878 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
879 args.flags &= ~NFSMNT_NOLOCKD;
880 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
881 args.flags |= NFSMNT_INT;
882 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
883 args.flags |= NFSMNT_RDIRPLUS;
884 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
885 args.flags |= NFSMNT_RESVPORT;
886 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
887 args.flags &= ~NFSMNT_RESVPORT;
888 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
889 args.flags |= NFSMNT_SOFT;
890 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
891 args.flags &= ~NFSMNT_SOFT;
892 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
893 args.sotype = SOCK_DGRAM;
894 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
895 args.sotype = SOCK_DGRAM;
896 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
897 args.sotype = SOCK_STREAM;
898 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
899 args.flags |= NFSMNT_NFSV3;
900 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
901 if (opt == NULL) {
902 vfs_mount_error(mp, "illegal readdirsize");
903 error = EINVAL;
904 goto out;
905 }
906 ret = sscanf(opt, "%d", &args.readdirsize);
907 if (ret != 1 || args.readdirsize <= 0) {
908 vfs_mount_error(mp, "illegal readdirsize: %s",
909 opt);
910 error = EINVAL;
911 goto out;
912 }
913 args.flags |= NFSMNT_READDIRSIZE;
914 }
915 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
916 if (opt == NULL) {
917 vfs_mount_error(mp, "illegal readahead");
918 error = EINVAL;
919 goto out;
920 }
921 ret = sscanf(opt, "%d", &args.readahead);
922 if (ret != 1 || args.readahead <= 0) {
923 vfs_mount_error(mp, "illegal readahead: %s",
924 opt);
925 error = EINVAL;
926 goto out;
927 }
928 args.flags |= NFSMNT_READAHEAD;
929 }
930 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
931 if (opt == NULL) {
932 vfs_mount_error(mp, "illegal wsize");
933 error = EINVAL;
934 goto out;
935 }
936 ret = sscanf(opt, "%d", &args.wsize);
937 if (ret != 1 || args.wsize <= 0) {
938 vfs_mount_error(mp, "illegal wsize: %s",
939 opt);
940 error = EINVAL;
941 goto out;
942 }
943 args.flags |= NFSMNT_WSIZE;
944 }
945 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
946 if (opt == NULL) {
947 vfs_mount_error(mp, "illegal rsize");
948 error = EINVAL;
949 goto out;
950 }
951 ret = sscanf(opt, "%d", &args.rsize);
952 if (ret != 1 || args.rsize <= 0) {
953 vfs_mount_error(mp, "illegal wsize: %s",
954 opt);
955 error = EINVAL;
956 goto out;
957 }
958 args.flags |= NFSMNT_RSIZE;
959 }
960 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
961 if (opt == NULL) {
962 vfs_mount_error(mp, "illegal retrans");
963 error = EINVAL;
964 goto out;
965 }
966 ret = sscanf(opt, "%d", &args.retrans);
967 if (ret != 1 || args.retrans <= 0) {
968 vfs_mount_error(mp, "illegal retrans: %s",
969 opt);
970 error = EINVAL;
971 goto out;
972 }
973 args.flags |= NFSMNT_RETRANS;
974 }
975 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
976 ret = sscanf(opt, "%d", &args.acregmin);
977 if (ret != 1 || args.acregmin < 0) {
978 vfs_mount_error(mp, "illegal acregmin: %s",
979 opt);
980 error = EINVAL;
981 goto out;
982 }
983 args.flags |= NFSMNT_ACREGMIN;
984 }
985 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
986 ret = sscanf(opt, "%d", &args.acregmax);
987 if (ret != 1 || args.acregmax < 0) {
988 vfs_mount_error(mp, "illegal acregmax: %s",
989 opt);
990 error = EINVAL;
991 goto out;
992 }
993 args.flags |= NFSMNT_ACREGMAX;
994 }
995 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
996 ret = sscanf(opt, "%d", &args.acdirmin);
997 if (ret != 1 || args.acdirmin < 0) {
998 vfs_mount_error(mp, "illegal acdirmin: %s",
999 opt);
1000 error = EINVAL;
1001 goto out;
1002 }
1003 args.flags |= NFSMNT_ACDIRMIN;
1004 }
1005 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1006 ret = sscanf(opt, "%d", &args.acdirmax);
1007 if (ret != 1 || args.acdirmax < 0) {
1008 vfs_mount_error(mp, "illegal acdirmax: %s",
1009 opt);
1010 error = EINVAL;
1011 goto out;
1012 }
1013 args.flags |= NFSMNT_ACDIRMAX;
1014 }
1015 if (vfs_getopt(mp->mnt_optnew, "deadthresh", (void **)&opt, NULL) == 0) {
1016 ret = sscanf(opt, "%d", &args.deadthresh);
1017 if (ret != 1 || args.deadthresh <= 0) {
1018 vfs_mount_error(mp, "illegal deadthresh: %s",
1019 opt);
1020 error = EINVAL;
1021 goto out;
1022 }
1023 args.flags |= NFSMNT_DEADTHRESH;
1024 }
1025 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1026 ret = sscanf(opt, "%d", &args.timeo);
1027 if (ret != 1 || args.timeo <= 0) {
1028 vfs_mount_error(mp, "illegal timeout: %s",
1029 opt);
1030 error = EINVAL;
1031 goto out;
1032 }
1033 args.flags |= NFSMNT_TIMEO;
1034 }
1035 if (vfs_getopt(mp->mnt_optnew, "maxgroups", (void **)&opt, NULL) == 0) {
1036 ret = sscanf(opt, "%d", &args.maxgrouplist);
1037 if (ret != 1 || args.maxgrouplist <= 0) {
1038 vfs_mount_error(mp, "illegal maxgroups: %s",
1039 opt);
1040 error = EINVAL;
1041 goto out;
1042 }
1043 args.flags |= NFSMNT_MAXGRPS;
1044 }
1045 if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1046 == 0) {
1047 ret = sscanf(opt, "%d", &negnametimeo);
1048 if (ret != 1 || negnametimeo < 0) {
1049 vfs_mount_error(mp, "illegal negnametimeo: %s",
1050 opt);
1051 error = EINVAL;
1052 goto out;
1053 }
1054 }
1055 if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1056 &args.addrlen) == 0) {
1057 has_addr_opt = 1;
1058 if (args.addrlen > SOCK_MAXADDRLEN) {
1059 error = ENAMETOOLONG;
1060 goto out;
1061 }
1062 nam = malloc(args.addrlen, M_SONAME,
1063 M_WAITOK);
1064 bcopy(args.addr, nam, args.addrlen);
1065 nam->sa_len = args.addrlen;
1066 }
1067 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1068 &args.fhsize) == 0) {
1069 has_fh_opt = 1;
1070 }
1071 if (vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
1072 NULL) == 0) {
1073 has_hostname_opt = 1;
1074 }
1075 if (args.hostname == NULL) {
1076 vfs_mount_error(mp, "Invalid hostname");
1077 error = EINVAL;
1078 goto out;
1079 }
1080 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1081 vfs_mount_error(mp, "Bad file handle");
1082 error = EINVAL;
1083 goto out;
1084 }
1085
1086 if (mp->mnt_flag & MNT_UPDATE) {
1087 struct nfsmount *nmp = VFSTONFS(mp);
1088
1089 if (nmp == NULL) {
1090 error = EIO;
1091 goto out;
1092 }
1093 /*
1094 * When doing an update, we can't change from or to
1095 * v3, switch lockd strategies or change cookie translation
1096 */
1097 args.flags = (args.flags &
1098 ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1099 (nmp->nm_flag &
1100 (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1101 nfs_decode_args(mp, nmp, &args, NULL);
1102 goto out;
1103 }
1104
1105 /*
1106 * Make the nfs_ip_paranoia sysctl serve as the default connection
1107 * or no-connection mode for those protocols that support
1108 * no-connection mode (the flag will be cleared later for protocols
1109 * that do not support no-connection mode). This will allow a client
1110 * to receive replies from a different IP then the request was
1111 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1112 * not 0.
1113 */
1114 if (nfs_ip_paranoia == 0)
1115 args.flags |= NFSMNT_NOCONN;
1116
1117 if (has_nfs_args_opt) {
1118 /*
1119 * In the 'nfs_args' case, the pointers in the args
1120 * structure are in userland - we copy them in here.
1121 */
1122 if (!has_fh_opt) {
1123 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1124 args.fhsize);
1125 if (error) {
1126 goto out;
1127 }
1128 args.fh = nfh;
1129 }
1130 if (!has_hostname_opt) {
1131 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
1132 if (error) {
1133 goto out;
1134 }
1135 bzero(&hst[len], MNAMELEN - len);
1136 args.hostname = hst;
1137 }
1138 if (!has_addr_opt) {
1139 /* sockargs() call must be after above copyin() calls */
1140 error = getsockaddr(&nam, (caddr_t)args.addr,
1141 args.addrlen);
1142 if (error) {
1143 goto out;
1144 }
1145 }
1146 }
1147 error = mountnfs(&args, mp, nam, args.hostname, &vp,
1148 curthread->td_ucred, negnametimeo);
1149 out:
1150 if (!error) {
1151 MNT_ILOCK(mp);
1152 mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1153 MNT_IUNLOCK(mp);
1154 }
1155 return (error);
1156 }
1157
1158
1159 /*
1160 * VFS Operations.
1161 *
1162 * mount system call
1163 * It seems a bit dumb to copyinstr() the host and path here and then
1164 * bcopy() them in mountnfs(), but I wanted to detect errors before
1165 * doing the sockargs() call because sockargs() allocates an mbuf and
1166 * an error after that means that I have to release the mbuf.
1167 */
1168 /* ARGSUSED */
1169 static int
1170 nfs_cmount(struct mntarg *ma, void *data, int flags)
1171 {
1172 int error;
1173 struct nfs_args args;
1174
1175 error = copyin(data, &args, sizeof (struct nfs_args));
1176 if (error)
1177 return error;
1178
1179 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1180
1181 error = kernel_mount(ma, flags);
1182 return (error);
1183 }
1184
1185 /*
1186 * Common code for mount and mountroot
1187 */
1188 static int
1189 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1190 char *hst, struct vnode **vpp, struct ucred *cred, int negnametimeo)
1191 {
1192 struct nfsmount *nmp;
1193 struct nfsnode *np;
1194 int error;
1195 struct vattr attrs;
1196
1197 if (mp->mnt_flag & MNT_UPDATE) {
1198 nmp = VFSTONFS(mp);
1199 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1200 free(nam, M_SONAME);
1201 return (0);
1202 } else {
1203 nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
1204 bzero((caddr_t)nmp, sizeof (struct nfsmount));
1205 TAILQ_INIT(&nmp->nm_bufq);
1206 mp->mnt_data = nmp;
1207 nmp->nm_getinfo = nfs_getnlminfo;
1208 }
1209 vfs_getnewfsid(mp);
1210 nmp->nm_mountp = mp;
1211 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
1212
1213 /*
1214 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1215 * high, depending on whether we end up with negative offsets in
1216 * the client or server somewhere. 2GB-1 may be safer.
1217 *
1218 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
1219 * that we can handle until we find out otherwise.
1220 * XXX Our "safe" limit on the client is what we can store in our
1221 * buffer cache using signed(!) block numbers.
1222 */
1223 if ((argp->flags & NFSMNT_NFSV3) == 0)
1224 nmp->nm_maxfilesize = 0xffffffffLL;
1225 else
1226 nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1227
1228 nmp->nm_timeo = NFS_TIMEO;
1229 nmp->nm_retry = NFS_RETRANS;
1230 if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) {
1231 nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA;
1232 } else {
1233 nmp->nm_wsize = NFS_WSIZE;
1234 nmp->nm_rsize = NFS_RSIZE;
1235 }
1236 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1237 nmp->nm_readdirsize = NFS_READDIRSIZE;
1238 nmp->nm_numgrps = NFS_MAXGRPS;
1239 nmp->nm_readahead = NFS_DEFRAHEAD;
1240 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
1241 nmp->nm_negnametimeo = negnametimeo;
1242 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1243 if (nmp->nm_tprintf_delay < 0)
1244 nmp->nm_tprintf_delay = 0;
1245 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1246 if (nmp->nm_tprintf_initial_delay < 0)
1247 nmp->nm_tprintf_initial_delay = 0;
1248 nmp->nm_fhsize = argp->fhsize;
1249 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1250 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1251 nmp->nm_nam = nam;
1252 /* Set up the sockets and per-host congestion */
1253 nmp->nm_sotype = argp->sotype;
1254 nmp->nm_soproto = argp->proto;
1255 nmp->nm_rpcops = &nfs_rpcops;
1256
1257 nfs_decode_args(mp, nmp, argp, hst);
1258
1259 /*
1260 * For Connection based sockets (TCP,...) defer the connect until
1261 * the first request, in case the server is not responding.
1262 */
1263 if (nmp->nm_sotype == SOCK_DGRAM &&
1264 (error = nfs_connect(nmp)))
1265 goto bad;
1266
1267 /*
1268 * This is silly, but it has to be set so that vinifod() works.
1269 * We do not want to do an nfs_statfs() here since we can get
1270 * stuck on a dead server and we are holding a lock on the mount
1271 * point.
1272 */
1273 mtx_lock(&nmp->nm_mtx);
1274 mp->mnt_stat.f_iosize = nfs_iosize(nmp);
1275 mtx_unlock(&nmp->nm_mtx);
1276 /*
1277 * A reference count is needed on the nfsnode representing the
1278 * remote root. If this object is not persistent, then backward
1279 * traversals of the mount point (i.e. "..") will not work if
1280 * the nfsnode gets flushed out of the cache. Ufs does not have
1281 * this problem, because one can identify root inodes by their
1282 * number == ROOTINO (2).
1283 */
1284 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
1285 if (error)
1286 goto bad;
1287 *vpp = NFSTOV(np);
1288
1289 /*
1290 * Get file attributes and transfer parameters for the
1291 * mountpoint. This has the side effect of filling in
1292 * (*vpp)->v_type with the correct value.
1293 */
1294 if (argp->flags & NFSMNT_NFSV3)
1295 nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread);
1296 else
1297 VOP_GETATTR(*vpp, &attrs, curthread->td_ucred);
1298
1299 /*
1300 * Lose the lock but keep the ref.
1301 */
1302 VOP_UNLOCK(*vpp, 0);
1303
1304 return (0);
1305 bad:
1306 nfs_disconnect(nmp);
1307 mtx_destroy(&nmp->nm_mtx);
1308 uma_zfree(nfsmount_zone, nmp);
1309 free(nam, M_SONAME);
1310 return (error);
1311 }
1312
1313 /*
1314 * unmount system call
1315 */
1316 static int
1317 nfs_unmount(struct mount *mp, int mntflags)
1318 {
1319 struct nfsmount *nmp;
1320 int error, flags = 0;
1321
1322 if (mntflags & MNT_FORCE)
1323 flags |= FORCECLOSE;
1324 nmp = VFSTONFS(mp);
1325 /*
1326 * Goes something like this..
1327 * - Call vflush() to clear out vnodes for this filesystem
1328 * - Close the socket
1329 * - Free up the data structures
1330 */
1331 /* In the forced case, cancel any outstanding requests. */
1332 if (flags & FORCECLOSE) {
1333 error = nfs_nmcancelreqs(nmp);
1334 if (error)
1335 goto out;
1336 }
1337 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1338 error = vflush(mp, 1, flags, curthread);
1339 if (error)
1340 goto out;
1341
1342 /*
1343 * We are now committed to the unmount.
1344 */
1345 nfs_disconnect(nmp);
1346 free(nmp->nm_nam, M_SONAME);
1347
1348 mtx_destroy(&nmp->nm_mtx);
1349 uma_zfree(nfsmount_zone, nmp);
1350 out:
1351 return (error);
1352 }
1353
1354 /*
1355 * Return root of a filesystem
1356 */
1357 static int
1358 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1359 {
1360 struct vnode *vp;
1361 struct nfsmount *nmp;
1362 struct nfsnode *np;
1363 int error;
1364
1365 nmp = VFSTONFS(mp);
1366 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1367 if (error)
1368 return error;
1369 vp = NFSTOV(np);
1370 /*
1371 * Get transfer parameters and attributes for root vnode once.
1372 */
1373 mtx_lock(&nmp->nm_mtx);
1374 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
1375 (nmp->nm_flag & NFSMNT_NFSV3)) {
1376 mtx_unlock(&nmp->nm_mtx);
1377 nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1378 } else
1379 mtx_unlock(&nmp->nm_mtx);
1380 if (vp->v_type == VNON)
1381 vp->v_type = VDIR;
1382 vp->v_vflag |= VV_ROOT;
1383 *vpp = vp;
1384 return (0);
1385 }
1386
1387 /*
1388 * Flush out the buffer cache
1389 */
1390 /* ARGSUSED */
1391 static int
1392 nfs_sync(struct mount *mp, int waitfor)
1393 {
1394 struct vnode *vp, *mvp;
1395 struct thread *td;
1396 int error, allerror = 0;
1397
1398 td = curthread;
1399
1400 /*
1401 * Force stale buffer cache information to be flushed.
1402 */
1403 MNT_ILOCK(mp);
1404 loop:
1405 MNT_VNODE_FOREACH(vp, mp, mvp) {
1406 VI_LOCK(vp);
1407 MNT_IUNLOCK(mp);
1408 /* XXX Racy bv_cnt check. */
1409 if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1410 waitfor == MNT_LAZY) {
1411 VI_UNLOCK(vp);
1412 MNT_ILOCK(mp);
1413 continue;
1414 }
1415 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1416 MNT_ILOCK(mp);
1417 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1418 goto loop;
1419 }
1420 error = VOP_FSYNC(vp, waitfor, td);
1421 if (error)
1422 allerror = error;
1423 VOP_UNLOCK(vp, 0);
1424 vrele(vp);
1425
1426 MNT_ILOCK(mp);
1427 }
1428 MNT_IUNLOCK(mp);
1429 return (allerror);
1430 }
1431
1432 static int
1433 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1434 {
1435 struct nfsmount *nmp = VFSTONFS(mp);
1436 struct vfsquery vq;
1437 int error;
1438
1439 bzero(&vq, sizeof(vq));
1440 switch (op) {
1441 #if 0
1442 case VFS_CTL_NOLOCKS:
1443 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1444 if (req->oldptr != NULL) {
1445 error = SYSCTL_OUT(req, &val, sizeof(val));
1446 if (error)
1447 return (error);
1448 }
1449 if (req->newptr != NULL) {
1450 error = SYSCTL_IN(req, &val, sizeof(val));
1451 if (error)
1452 return (error);
1453 if (val)
1454 nmp->nm_flag |= NFSMNT_NOLOCKS;
1455 else
1456 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1457 }
1458 break;
1459 #endif
1460 case VFS_CTL_QUERY:
1461 mtx_lock(&nmp->nm_mtx);
1462 if (nmp->nm_state & NFSSTA_TIMEO)
1463 vq.vq_flags |= VQ_NOTRESP;
1464 mtx_unlock(&nmp->nm_mtx);
1465 #if 0
1466 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1467 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1468 vq.vq_flags |= VQ_NOTRESPLOCK;
1469 #endif
1470 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1471 break;
1472 case VFS_CTL_TIMEO:
1473 if (req->oldptr != NULL) {
1474 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1475 sizeof(nmp->nm_tprintf_initial_delay));
1476 if (error)
1477 return (error);
1478 }
1479 if (req->newptr != NULL) {
1480 error = vfs_suser(mp, req->td);
1481 if (error)
1482 return (error);
1483 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1484 sizeof(nmp->nm_tprintf_initial_delay));
1485 if (error)
1486 return (error);
1487 if (nmp->nm_tprintf_initial_delay < 0)
1488 nmp->nm_tprintf_initial_delay = 0;
1489 }
1490 break;
1491 default:
1492 return (ENOTSUP);
1493 }
1494 return (0);
1495 }
1496
1497 /*
1498 * Extract the information needed by the nlm from the nfs vnode.
1499 */
1500 static void
1501 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1502 struct sockaddr_storage *sp, int *is_v3p, off_t *sizep)
1503 {
1504 struct nfsmount *nmp;
1505 struct nfsnode *np = VTONFS(vp);
1506
1507 nmp = VFSTONFS(vp->v_mount);
1508 if (fhlenp != NULL)
1509 *fhlenp = (size_t)np->n_fhsize;
1510 if (fhp != NULL)
1511 bcopy(np->n_fhp, fhp, np->n_fhsize);
1512 if (sp != NULL)
1513 bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1514 if (is_v3p != NULL)
1515 *is_v3p = NFS_ISV3(vp);
1516 if (sizep != NULL)
1517 *sizep = np->n_size;
1518 }
1519
Cache object: 26f821bd28c08915508b8af4c534b280
|