1 /*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: releng/8.1/sys/nfsclient/nfs_vfsops.c 208586 2010-05-27 03:15:04Z cperciva $");
37
38
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/jail.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/mount.h>
53 #include <sys/proc.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/vnode.h>
59 #include <sys/signalvar.h>
60
61 #include <vm/vm.h>
62 #include <vm/vm_extern.h>
63 #include <vm/uma.h>
64
65 #include <net/if.h>
66 #include <net/route.h>
67 #include <net/vnet.h>
68
69 #include <netinet/in.h>
70
71 #include <rpc/rpc.h>
72
73 #include <nfs/nfsproto.h>
74 #include <nfsclient/nfs.h>
75 #include <nfsclient/nfsnode.h>
76 #include <nfsclient/nfsmount.h>
77 #include <nfs/xdr_subs.h>
78 #include <nfsclient/nfsm_subs.h>
79 #include <nfsclient/nfsdiskless.h>
80
81 MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
82 MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
83 MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
84 MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
85 MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
86
87 uma_zone_t nfsmount_zone;
88
89 struct nfsstats nfsstats;
90
91 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
92 SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
93 &nfsstats, nfsstats, "S,nfsstats");
94 static int nfs_ip_paranoia = 1;
95 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
96 &nfs_ip_paranoia, 0,
97 "Disallow accepting replies from IPs which differ from those sent");
98 #ifdef NFS_DEBUG
99 int nfs_debug;
100 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
101 "Toggle debug flag");
102 #endif
103 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
104 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
105 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0,
106 "Delay before printing \"nfs server not responding\" messages");
107 /* how long between console messages "nfs server foo not responding" */
108 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
109 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
110 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0,
111 "Delay between printing \"nfs server not responding\" messages");
112
113 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
114 struct nfs_args *argp, const char *hostname);
115 static int mountnfs(struct nfs_args *, struct mount *,
116 struct sockaddr *, char *, struct vnode **,
117 struct ucred *cred, int);
118 static vfs_mount_t nfs_mount;
119 static vfs_cmount_t nfs_cmount;
120 static vfs_unmount_t nfs_unmount;
121 static vfs_root_t nfs_root;
122 static vfs_statfs_t nfs_statfs;
123 static vfs_sync_t nfs_sync;
124 static vfs_sysctl_t nfs_sysctl;
125
126 static int fake_wchan;
127
128 /*
129 * nfs vfs operations.
130 */
131 static struct vfsops nfs_vfsops = {
132 .vfs_init = nfs_init,
133 .vfs_mount = nfs_mount,
134 .vfs_cmount = nfs_cmount,
135 .vfs_root = nfs_root,
136 .vfs_statfs = nfs_statfs,
137 .vfs_sync = nfs_sync,
138 .vfs_uninit = nfs_uninit,
139 .vfs_unmount = nfs_unmount,
140 .vfs_sysctl = nfs_sysctl,
141 };
142 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
143
144 /* So that loader and kldload(2) can find us, wherever we are.. */
145 MODULE_VERSION(nfs, 1);
146 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
147 #ifdef KGSSAPI
148 MODULE_DEPEND(nfs, kgssapi, 1, 1, 1);
149 #endif
150 MODULE_DEPEND(nfs, nfs_common, 1, 1, 1);
151
152 static struct nfs_rpcops nfs_rpcops = {
153 nfs_readrpc,
154 nfs_writerpc,
155 nfs_writebp,
156 nfs_readlinkrpc,
157 nfs_invaldir,
158 nfs_commit,
159 };
160
161 /*
162 * This structure must be filled in by a primary bootstrap or bootstrap
163 * server for a diskless/dataless machine. It is initialized below just
164 * to ensure that it is allocated to initialized data (.data not .bss).
165 */
166 struct nfs_diskless nfs_diskless = { { { 0 } } };
167 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
168 int nfs_diskless_valid = 0;
169
170 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
171 &nfs_diskless_valid, 0,
172 "Has the diskless struct been filled correctly");
173
174 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
175 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
176
177 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
178 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
179 "%Ssockaddr_in", "Diskless root nfs address");
180
181
182 void nfsargs_ntoh(struct nfs_args *);
183 static int nfs_mountdiskless(char *,
184 struct sockaddr_in *, struct nfs_args *,
185 struct thread *, struct vnode **, struct mount *);
186 static void nfs_convert_diskless(void);
187 static void nfs_convert_oargs(struct nfs_args *args,
188 struct onfs_args *oargs);
189
190 int
191 nfs_iosize(struct nfsmount *nmp)
192 {
193 int iosize;
194
195 /*
196 * Calculate the size used for io buffers. Use the larger
197 * of the two sizes to minimise nfs requests but make sure
198 * that it is at least one VM page to avoid wasting buffer
199 * space.
200 */
201 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
202 iosize = imax(iosize, PAGE_SIZE);
203 return (iosize);
204 }
205
206 static void
207 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
208 {
209
210 args->version = NFS_ARGSVERSION;
211 args->addr = oargs->addr;
212 args->addrlen = oargs->addrlen;
213 args->sotype = oargs->sotype;
214 args->proto = oargs->proto;
215 args->fh = oargs->fh;
216 args->fhsize = oargs->fhsize;
217 args->flags = oargs->flags;
218 args->wsize = oargs->wsize;
219 args->rsize = oargs->rsize;
220 args->readdirsize = oargs->readdirsize;
221 args->timeo = oargs->timeo;
222 args->retrans = oargs->retrans;
223 args->maxgrouplist = oargs->maxgrouplist;
224 args->readahead = oargs->readahead;
225 args->deadthresh = oargs->deadthresh;
226 args->hostname = oargs->hostname;
227 }
228
229 static void
230 nfs_convert_diskless(void)
231 {
232
233 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
234 sizeof(struct ifaliasreq));
235 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
236 sizeof(struct sockaddr_in));
237 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
238 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
239 nfsv3_diskless.root_fhsize = NFSX_V3FH;
240 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
241 } else {
242 nfsv3_diskless.root_fhsize = NFSX_V2FH;
243 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
244 }
245 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
246 sizeof(struct sockaddr_in));
247 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
248 nfsv3_diskless.root_time = nfs_diskless.root_time;
249 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
250 MAXHOSTNAMELEN);
251 nfs_diskless_valid = 3;
252 }
253
254 /*
255 * nfs statfs call
256 */
257 static int
258 nfs_statfs(struct mount *mp, struct statfs *sbp)
259 {
260 struct vnode *vp;
261 struct thread *td;
262 struct nfs_statfs *sfp;
263 caddr_t bpos, dpos;
264 struct nfsmount *nmp = VFSTONFS(mp);
265 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
266 struct mbuf *mreq, *mrep, *md, *mb;
267 struct nfsnode *np;
268 u_quad_t tquad;
269
270 td = curthread;
271 #ifndef nolint
272 sfp = NULL;
273 #endif
274 error = vfs_busy(mp, MBF_NOWAIT);
275 if (error)
276 return (error);
277 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
278 if (error) {
279 vfs_unbusy(mp);
280 return (error);
281 }
282 vp = NFSTOV(np);
283 mtx_lock(&nmp->nm_mtx);
284 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
285 mtx_unlock(&nmp->nm_mtx);
286 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
287 } else
288 mtx_unlock(&nmp->nm_mtx);
289 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
290 mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
291 mb = mreq;
292 bpos = mtod(mb, caddr_t);
293 nfsm_fhtom(vp, v3);
294 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
295 if (v3)
296 nfsm_postop_attr(vp, retattr);
297 if (error) {
298 if (mrep != NULL)
299 m_freem(mrep);
300 goto nfsmout;
301 }
302 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
303 mtx_lock(&nmp->nm_mtx);
304 sbp->f_iosize = nfs_iosize(nmp);
305 mtx_unlock(&nmp->nm_mtx);
306 if (v3) {
307 sbp->f_bsize = NFS_FABLKSIZE;
308 tquad = fxdr_hyper(&sfp->sf_tbytes);
309 sbp->f_blocks = tquad / NFS_FABLKSIZE;
310 tquad = fxdr_hyper(&sfp->sf_fbytes);
311 sbp->f_bfree = tquad / NFS_FABLKSIZE;
312 tquad = fxdr_hyper(&sfp->sf_abytes);
313 sbp->f_bavail = tquad / NFS_FABLKSIZE;
314 sbp->f_files = (fxdr_unsigned(int32_t,
315 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
316 sbp->f_ffree = (fxdr_unsigned(int32_t,
317 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
318 } else {
319 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
320 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
321 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
322 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
323 sbp->f_files = 0;
324 sbp->f_ffree = 0;
325 }
326 m_freem(mrep);
327 nfsmout:
328 vput(vp);
329 vfs_unbusy(mp);
330 return (error);
331 }
332
333 /*
334 * nfs version 3 fsinfo rpc call
335 */
336 int
337 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
338 struct thread *td)
339 {
340 struct nfsv3_fsinfo *fsp;
341 u_int32_t pref, max;
342 caddr_t bpos, dpos;
343 int error = 0, retattr;
344 struct mbuf *mreq, *mrep, *md, *mb;
345 u_int64_t maxfsize;
346
347 nfsstats.rpccnt[NFSPROC_FSINFO]++;
348 mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
349 mb = mreq;
350 bpos = mtod(mb, caddr_t);
351 nfsm_fhtom(vp, 1);
352 nfsm_request(vp, NFSPROC_FSINFO, td, cred);
353 nfsm_postop_attr(vp, retattr);
354 if (!error) {
355 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
356 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
357 mtx_lock(&nmp->nm_mtx);
358 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
359 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
360 ~(NFS_FABLKSIZE - 1);
361 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
362 if (max < nmp->nm_wsize && max > 0) {
363 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
364 if (nmp->nm_wsize == 0)
365 nmp->nm_wsize = max;
366 }
367 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
368 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
369 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
370 ~(NFS_FABLKSIZE - 1);
371 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
372 if (max < nmp->nm_rsize && max > 0) {
373 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
374 if (nmp->nm_rsize == 0)
375 nmp->nm_rsize = max;
376 }
377 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
378 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
379 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
380 ~(NFS_DIRBLKSIZ - 1);
381 if (max < nmp->nm_readdirsize && max > 0) {
382 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
383 if (nmp->nm_readdirsize == 0)
384 nmp->nm_readdirsize = max;
385 }
386 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
387 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
388 nmp->nm_maxfilesize = maxfsize;
389 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
390 nmp->nm_state |= NFSSTA_GOTFSINFO;
391 mtx_unlock(&nmp->nm_mtx);
392 }
393 m_freem(mrep);
394 nfsmout:
395 return (error);
396 }
397
398 /*
399 * Mount a remote root fs via. nfs. This depends on the info in the
400 * nfs_diskless structure that has been filled in properly by some primary
401 * bootstrap.
402 * It goes something like this:
403 * - do enough of "ifconfig" by calling ifioctl() so that the system
404 * can talk to the server
405 * - If nfs_diskless.mygateway is filled in, use that address as
406 * a default gateway.
407 * - build the rootfs mount point and call mountnfs() to do the rest.
408 *
409 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
410 * structure, as well as other global NFS client variables here, as
411 * nfs_mountroot() will be called once in the boot before any other NFS
412 * client activity occurs.
413 */
414 int
415 nfs_mountroot(struct mount *mp)
416 {
417 struct thread *td = curthread;
418 struct nfsv3_diskless *nd = &nfsv3_diskless;
419 struct socket *so;
420 struct vnode *vp;
421 struct ifreq ir;
422 int error;
423 u_long l;
424 char buf[128];
425 char *cp;
426
427 CURVNET_SET(TD_TO_VNET(td));
428
429 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
430 bootpc_init(); /* use bootp to get nfs_diskless filled in */
431 #elif defined(NFS_ROOT)
432 nfs_setup_diskless();
433 #endif
434
435 if (nfs_diskless_valid == 0) {
436 CURVNET_RESTORE();
437 return (-1);
438 }
439 if (nfs_diskless_valid == 1)
440 nfs_convert_diskless();
441
442 /*
443 * XXX splnet, so networks will receive...
444 */
445 splnet();
446
447 /*
448 * Do enough of ifconfig(8) so that the critical net interface can
449 * talk to the server.
450 */
451 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
452 td->td_ucred, td);
453 if (error)
454 panic("nfs_mountroot: socreate(%04x): %d",
455 nd->myif.ifra_addr.sa_family, error);
456
457 #if 0 /* XXX Bad idea */
458 /*
459 * We might not have been told the right interface, so we pass
460 * over the first ten interfaces of the same kind, until we get
461 * one of them configured.
462 */
463
464 for (i = strlen(nd->myif.ifra_name) - 1;
465 nd->myif.ifra_name[i] >= '' &&
466 nd->myif.ifra_name[i] <= '9';
467 nd->myif.ifra_name[i] ++) {
468 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
469 if(!error)
470 break;
471 }
472 #endif
473
474 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
475 if (error)
476 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
477
478 if ((cp = getenv("boot.netif.mtu")) != NULL) {
479 ir.ifr_mtu = strtol(cp, NULL, 10);
480 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
481 freeenv(cp);
482 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
483 if (error)
484 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
485 }
486 soclose(so);
487
488 /*
489 * If the gateway field is filled in, set it as the default route.
490 * Note that pxeboot will set a default route of 0 if the route
491 * is not set by the DHCP server. Check also for a value of 0
492 * to avoid panicking inappropriately in that situation.
493 */
494 if (nd->mygateway.sin_len != 0 &&
495 nd->mygateway.sin_addr.s_addr != 0) {
496 struct sockaddr_in mask, sin;
497
498 bzero((caddr_t)&mask, sizeof(mask));
499 sin = mask;
500 sin.sin_family = AF_INET;
501 sin.sin_len = sizeof(sin);
502 /* XXX MRT use table 0 for this sort of thing */
503 error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
504 (struct sockaddr *)&nd->mygateway,
505 (struct sockaddr *)&mask,
506 RTF_UP | RTF_GATEWAY, NULL);
507 if (error)
508 panic("nfs_mountroot: RTM_ADD: %d", error);
509 }
510
511 /*
512 * Create the rootfs mount point.
513 */
514 nd->root_args.fh = nd->root_fh;
515 nd->root_args.fhsize = nd->root_fhsize;
516 l = ntohl(nd->root_saddr.sin_addr.s_addr);
517 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
518 (l >> 24) & 0xff, (l >> 16) & 0xff,
519 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
520 printf("NFS ROOT: %s\n", buf);
521 nd->root_args.hostname = buf;
522 if ((error = nfs_mountdiskless(buf,
523 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
524 CURVNET_RESTORE();
525 return (error);
526 }
527
528 /*
529 * This is not really an nfs issue, but it is much easier to
530 * set hostname here and then let the "/etc/rc.xxx" files
531 * mount the right /var based upon its preset value.
532 */
533 mtx_lock(&prison0.pr_mtx);
534 strlcpy(prison0.pr_hostname, nd->my_hostnam,
535 sizeof (prison0.pr_hostname));
536 mtx_unlock(&prison0.pr_mtx);
537 inittodr(ntohl(nd->root_time));
538 CURVNET_RESTORE();
539 return (0);
540 }
541
542 /*
543 * Internal version of mount system call for diskless setup.
544 */
545 static int
546 nfs_mountdiskless(char *path,
547 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
548 struct vnode **vpp, struct mount *mp)
549 {
550 struct sockaddr *nam;
551 int error;
552
553 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
554 if ((error = mountnfs(args, mp, nam, path, vpp,
555 td->td_ucred, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
556 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
557 return (error);
558 }
559 return (0);
560 }
561
562 static int
563 nfs_sec_name_to_num(char *sec)
564 {
565 if (!strcmp(sec, "krb5"))
566 return (RPCSEC_GSS_KRB5);
567 if (!strcmp(sec, "krb5i"))
568 return (RPCSEC_GSS_KRB5I);
569 if (!strcmp(sec, "krb5p"))
570 return (RPCSEC_GSS_KRB5P);
571 if (!strcmp(sec, "sys"))
572 return (AUTH_SYS);
573 /*
574 * Userland should validate the string but we will try and
575 * cope with unexpected values.
576 */
577 return (AUTH_SYS);
578 }
579
580 static void
581 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
582 const char *hostname)
583 {
584 int s;
585 int adjsock;
586 int maxio;
587 char *p;
588 char *secname;
589 char *principal;
590
591 s = splnet();
592
593 /*
594 * Set read-only flag if requested; otherwise, clear it if this is
595 * an update. If this is not an update, then either the read-only
596 * flag is already clear, or this is a root mount and it was set
597 * intentionally at some previous point.
598 */
599 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
600 MNT_ILOCK(mp);
601 mp->mnt_flag |= MNT_RDONLY;
602 MNT_IUNLOCK(mp);
603 } else if (mp->mnt_flag & MNT_UPDATE) {
604 MNT_ILOCK(mp);
605 mp->mnt_flag &= ~MNT_RDONLY;
606 MNT_IUNLOCK(mp);
607 }
608
609 /*
610 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
611 * no sense in that context. Also, set up appropriate retransmit
612 * and soft timeout behavior.
613 */
614 if (argp->sotype == SOCK_STREAM) {
615 nmp->nm_flag &= ~NFSMNT_NOCONN;
616 nmp->nm_flag |= NFSMNT_DUMBTIMR;
617 nmp->nm_timeo = NFS_MAXTIMEO;
618 nmp->nm_retry = NFS_RETRANS_TCP;
619 }
620
621 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
622 if ((argp->flags & NFSMNT_NFSV3) == 0)
623 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
624
625 /* Re-bind if rsrvd port requested and wasn't on one */
626 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
627 && (argp->flags & NFSMNT_RESVPORT);
628 /* Also re-bind if we're switching to/from a connected UDP socket */
629 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
630 (argp->flags & NFSMNT_NOCONN));
631
632 /* Update flags atomically. Don't change the lock bits. */
633 nmp->nm_flag = argp->flags | nmp->nm_flag;
634 splx(s);
635
636 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
637 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
638 if (nmp->nm_timeo < NFS_MINTIMEO)
639 nmp->nm_timeo = NFS_MINTIMEO;
640 else if (nmp->nm_timeo > NFS_MAXTIMEO)
641 nmp->nm_timeo = NFS_MAXTIMEO;
642 }
643
644 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
645 nmp->nm_retry = argp->retrans;
646 if (nmp->nm_retry > NFS_MAXREXMIT)
647 nmp->nm_retry = NFS_MAXREXMIT;
648 }
649
650 if (argp->flags & NFSMNT_NFSV3) {
651 if (argp->sotype == SOCK_DGRAM)
652 maxio = NFS_MAXDGRAMDATA;
653 else
654 maxio = NFS_MAXDATA;
655 } else
656 maxio = NFS_V2MAXDATA;
657
658 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
659 nmp->nm_wsize = argp->wsize;
660 /* Round down to multiple of blocksize */
661 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
662 if (nmp->nm_wsize <= 0)
663 nmp->nm_wsize = NFS_FABLKSIZE;
664 }
665 if (nmp->nm_wsize > maxio)
666 nmp->nm_wsize = maxio;
667 if (nmp->nm_wsize > MAXBSIZE)
668 nmp->nm_wsize = MAXBSIZE;
669
670 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
671 nmp->nm_rsize = argp->rsize;
672 /* Round down to multiple of blocksize */
673 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
674 if (nmp->nm_rsize <= 0)
675 nmp->nm_rsize = NFS_FABLKSIZE;
676 }
677 if (nmp->nm_rsize > maxio)
678 nmp->nm_rsize = maxio;
679 if (nmp->nm_rsize > MAXBSIZE)
680 nmp->nm_rsize = MAXBSIZE;
681
682 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
683 nmp->nm_readdirsize = argp->readdirsize;
684 }
685 if (nmp->nm_readdirsize > maxio)
686 nmp->nm_readdirsize = maxio;
687 if (nmp->nm_readdirsize > nmp->nm_rsize)
688 nmp->nm_readdirsize = nmp->nm_rsize;
689
690 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
691 nmp->nm_acregmin = argp->acregmin;
692 else
693 nmp->nm_acregmin = NFS_MINATTRTIMO;
694 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
695 nmp->nm_acregmax = argp->acregmax;
696 else
697 nmp->nm_acregmax = NFS_MAXATTRTIMO;
698 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
699 nmp->nm_acdirmin = argp->acdirmin;
700 else
701 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
702 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
703 nmp->nm_acdirmax = argp->acdirmax;
704 else
705 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
706 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
707 nmp->nm_acdirmin = nmp->nm_acdirmax;
708 if (nmp->nm_acregmin > nmp->nm_acregmax)
709 nmp->nm_acregmin = nmp->nm_acregmax;
710
711 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
712 if (argp->maxgrouplist <= NFS_MAXGRPS)
713 nmp->nm_numgrps = argp->maxgrouplist;
714 else
715 nmp->nm_numgrps = NFS_MAXGRPS;
716 }
717 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
718 if (argp->readahead <= NFS_MAXRAHEAD)
719 nmp->nm_readahead = argp->readahead;
720 else
721 nmp->nm_readahead = NFS_MAXRAHEAD;
722 }
723 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
724 if (argp->wcommitsize < nmp->nm_wsize)
725 nmp->nm_wcommitsize = nmp->nm_wsize;
726 else
727 nmp->nm_wcommitsize = argp->wcommitsize;
728 }
729 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
730 if (argp->deadthresh <= NFS_MAXDEADTHRESH)
731 nmp->nm_deadthresh = argp->deadthresh;
732 else
733 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
734 }
735
736 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
737 (nmp->nm_soproto != argp->proto));
738 nmp->nm_sotype = argp->sotype;
739 nmp->nm_soproto = argp->proto;
740
741 if (nmp->nm_client && adjsock) {
742 nfs_safedisconnect(nmp);
743 if (nmp->nm_sotype == SOCK_DGRAM)
744 while (nfs_connect(nmp)) {
745 printf("nfs_args: retrying connect\n");
746 (void) tsleep(&fake_wchan, PSOCK, "nfscon", hz);
747 }
748 }
749
750 if (hostname) {
751 strlcpy(nmp->nm_hostname, hostname,
752 sizeof(nmp->nm_hostname));
753 p = strchr(nmp->nm_hostname, ':');
754 if (p)
755 *p = '\0';
756 }
757
758 if (vfs_getopt(mp->mnt_optnew, "sec",
759 (void **) &secname, NULL) == 0) {
760 nmp->nm_secflavor = nfs_sec_name_to_num(secname);
761 } else {
762 nmp->nm_secflavor = AUTH_SYS;
763 }
764
765 if (vfs_getopt(mp->mnt_optnew, "principal",
766 (void **) &principal, NULL) == 0) {
767 strlcpy(nmp->nm_principal, principal,
768 sizeof(nmp->nm_principal));
769 } else {
770 snprintf(nmp->nm_principal, sizeof(nmp->nm_principal),
771 "nfs@%s", nmp->nm_hostname);
772 }
773 }
774
775 static const char *nfs_opts[] = { "from", "nfs_args",
776 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
777 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
778 "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
779 "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
780 "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax",
781 "deadthresh", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
782 "maxgroups", "principal", "negnametimeo",
783 NULL };
784
785 /*
786 * VFS Operations.
787 *
788 * mount system call
789 * It seems a bit dumb to copyinstr() the host and path here and then
790 * bcopy() them in mountnfs(), but I wanted to detect errors before
791 * doing the sockargs() call because sockargs() allocates an mbuf and
792 * an error after that means that I have to release the mbuf.
793 */
794 /* ARGSUSED */
795 static int
796 nfs_mount(struct mount *mp)
797 {
798 struct nfs_args args = {
799 .version = NFS_ARGSVERSION,
800 .addr = NULL,
801 .addrlen = sizeof (struct sockaddr_in),
802 .sotype = SOCK_STREAM,
803 .proto = 0,
804 .fh = NULL,
805 .fhsize = 0,
806 .flags = NFSMNT_RESVPORT,
807 .wsize = NFS_WSIZE,
808 .rsize = NFS_RSIZE,
809 .readdirsize = NFS_READDIRSIZE,
810 .timeo = 10,
811 .retrans = NFS_RETRANS,
812 .maxgrouplist = NFS_MAXGRPS,
813 .readahead = NFS_DEFRAHEAD,
814 .wcommitsize = 0, /* was: NQ_DEFLEASE */
815 .deadthresh = NFS_MAXDEADTHRESH, /* was: NQ_DEADTHRESH */
816 .hostname = NULL,
817 /* args version 4 */
818 .acregmin = NFS_MINATTRTIMO,
819 .acregmax = NFS_MAXATTRTIMO,
820 .acdirmin = NFS_MINDIRATTRTIMO,
821 .acdirmax = NFS_MAXDIRATTRTIMO,
822 };
823 int error, ret, has_nfs_args_opt;
824 int has_addr_opt, has_fh_opt, has_hostname_opt;
825 struct sockaddr *nam;
826 struct vnode *vp;
827 char hst[MNAMELEN];
828 size_t len;
829 u_char nfh[NFSX_V3FHMAX];
830 char *opt;
831 int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
832
833 has_nfs_args_opt = 0;
834 has_addr_opt = 0;
835 has_fh_opt = 0;
836 has_hostname_opt = 0;
837
838 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
839 error = EINVAL;
840 goto out;
841 }
842
843 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
844 error = nfs_mountroot(mp);
845 goto out;
846 }
847
848 /*
849 * The old mount_nfs program passed the struct nfs_args
850 * from userspace to kernel. The new mount_nfs program
851 * passes string options via nmount() from userspace to kernel
852 * and we populate the struct nfs_args in the kernel.
853 */
854 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
855 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
856 sizeof args);
857 if (error)
858 goto out;
859
860 if (args.version != NFS_ARGSVERSION) {
861 error = EPROGMISMATCH;
862 goto out;
863 }
864 has_nfs_args_opt = 1;
865 }
866
867 if (vfs_getopt(mp->mnt_optnew, "dumbtimer", NULL, NULL) == 0)
868 args.flags |= NFSMNT_DUMBTIMR;
869 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
870 args.flags |= NFSMNT_NOCONN;
871 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
872 args.flags |= NFSMNT_NOCONN;
873 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
874 args.flags |= NFSMNT_NOLOCKD;
875 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
876 args.flags &= ~NFSMNT_NOLOCKD;
877 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
878 args.flags |= NFSMNT_INT;
879 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
880 args.flags |= NFSMNT_RDIRPLUS;
881 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
882 args.flags |= NFSMNT_RESVPORT;
883 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
884 args.flags &= ~NFSMNT_RESVPORT;
885 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
886 args.flags |= NFSMNT_SOFT;
887 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
888 args.flags &= ~NFSMNT_SOFT;
889 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
890 args.sotype = SOCK_DGRAM;
891 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
892 args.sotype = SOCK_DGRAM;
893 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
894 args.sotype = SOCK_STREAM;
895 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
896 args.flags |= NFSMNT_NFSV3;
897 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
898 if (opt == NULL) {
899 vfs_mount_error(mp, "illegal readdirsize");
900 error = EINVAL;
901 goto out;
902 }
903 ret = sscanf(opt, "%d", &args.readdirsize);
904 if (ret != 1 || args.readdirsize <= 0) {
905 vfs_mount_error(mp, "illegal readdirsize: %s",
906 opt);
907 error = EINVAL;
908 goto out;
909 }
910 args.flags |= NFSMNT_READDIRSIZE;
911 }
912 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
913 if (opt == NULL) {
914 vfs_mount_error(mp, "illegal readahead");
915 error = EINVAL;
916 goto out;
917 }
918 ret = sscanf(opt, "%d", &args.readahead);
919 if (ret != 1 || args.readahead <= 0) {
920 vfs_mount_error(mp, "illegal readahead: %s",
921 opt);
922 error = EINVAL;
923 goto out;
924 }
925 args.flags |= NFSMNT_READAHEAD;
926 }
927 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
928 if (opt == NULL) {
929 vfs_mount_error(mp, "illegal wsize");
930 error = EINVAL;
931 goto out;
932 }
933 ret = sscanf(opt, "%d", &args.wsize);
934 if (ret != 1 || args.wsize <= 0) {
935 vfs_mount_error(mp, "illegal wsize: %s",
936 opt);
937 error = EINVAL;
938 goto out;
939 }
940 args.flags |= NFSMNT_WSIZE;
941 }
942 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
943 if (opt == NULL) {
944 vfs_mount_error(mp, "illegal rsize");
945 error = EINVAL;
946 goto out;
947 }
948 ret = sscanf(opt, "%d", &args.rsize);
949 if (ret != 1 || args.rsize <= 0) {
950 vfs_mount_error(mp, "illegal wsize: %s",
951 opt);
952 error = EINVAL;
953 goto out;
954 }
955 args.flags |= NFSMNT_RSIZE;
956 }
957 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
958 if (opt == NULL) {
959 vfs_mount_error(mp, "illegal retrans");
960 error = EINVAL;
961 goto out;
962 }
963 ret = sscanf(opt, "%d", &args.retrans);
964 if (ret != 1 || args.retrans <= 0) {
965 vfs_mount_error(mp, "illegal retrans: %s",
966 opt);
967 error = EINVAL;
968 goto out;
969 }
970 args.flags |= NFSMNT_RETRANS;
971 }
972 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
973 ret = sscanf(opt, "%d", &args.acregmin);
974 if (ret != 1 || args.acregmin < 0) {
975 vfs_mount_error(mp, "illegal acregmin: %s",
976 opt);
977 error = EINVAL;
978 goto out;
979 }
980 args.flags |= NFSMNT_ACREGMIN;
981 }
982 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
983 ret = sscanf(opt, "%d", &args.acregmax);
984 if (ret != 1 || args.acregmax < 0) {
985 vfs_mount_error(mp, "illegal acregmax: %s",
986 opt);
987 error = EINVAL;
988 goto out;
989 }
990 args.flags |= NFSMNT_ACREGMAX;
991 }
992 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
993 ret = sscanf(opt, "%d", &args.acdirmin);
994 if (ret != 1 || args.acdirmin < 0) {
995 vfs_mount_error(mp, "illegal acdirmin: %s",
996 opt);
997 error = EINVAL;
998 goto out;
999 }
1000 args.flags |= NFSMNT_ACDIRMIN;
1001 }
1002 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1003 ret = sscanf(opt, "%d", &args.acdirmax);
1004 if (ret != 1 || args.acdirmax < 0) {
1005 vfs_mount_error(mp, "illegal acdirmax: %s",
1006 opt);
1007 error = EINVAL;
1008 goto out;
1009 }
1010 args.flags |= NFSMNT_ACDIRMAX;
1011 }
1012 if (vfs_getopt(mp->mnt_optnew, "deadthresh", (void **)&opt, NULL) == 0) {
1013 ret = sscanf(opt, "%d", &args.deadthresh);
1014 if (ret != 1 || args.deadthresh <= 0) {
1015 vfs_mount_error(mp, "illegal deadthresh: %s",
1016 opt);
1017 error = EINVAL;
1018 goto out;
1019 }
1020 args.flags |= NFSMNT_DEADTHRESH;
1021 }
1022 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1023 ret = sscanf(opt, "%d", &args.timeo);
1024 if (ret != 1 || args.timeo <= 0) {
1025 vfs_mount_error(mp, "illegal timeout: %s",
1026 opt);
1027 error = EINVAL;
1028 goto out;
1029 }
1030 args.flags |= NFSMNT_TIMEO;
1031 }
1032 if (vfs_getopt(mp->mnt_optnew, "maxgroups", (void **)&opt, NULL) == 0) {
1033 ret = sscanf(opt, "%d", &args.maxgrouplist);
1034 if (ret != 1 || args.maxgrouplist <= 0) {
1035 vfs_mount_error(mp, "illegal maxgroups: %s",
1036 opt);
1037 error = EINVAL;
1038 goto out;
1039 }
1040 args.flags |= NFSMNT_MAXGRPS;
1041 }
1042 if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1043 == 0) {
1044 ret = sscanf(opt, "%d", &negnametimeo);
1045 if (ret != 1 || negnametimeo < 0) {
1046 vfs_mount_error(mp, "illegal negnametimeo: %s",
1047 opt);
1048 error = EINVAL;
1049 goto out;
1050 }
1051 }
1052 if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1053 &args.addrlen) == 0) {
1054 has_addr_opt = 1;
1055 if (args.addrlen > SOCK_MAXADDRLEN) {
1056 error = ENAMETOOLONG;
1057 goto out;
1058 }
1059 nam = malloc(args.addrlen, M_SONAME,
1060 M_WAITOK);
1061 bcopy(args.addr, nam, args.addrlen);
1062 nam->sa_len = args.addrlen;
1063 }
1064 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1065 &args.fhsize) == 0) {
1066 has_fh_opt = 1;
1067 }
1068 if (vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
1069 NULL) == 0) {
1070 has_hostname_opt = 1;
1071 }
1072 if (args.hostname == NULL) {
1073 vfs_mount_error(mp, "Invalid hostname");
1074 error = EINVAL;
1075 goto out;
1076 }
1077 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1078 vfs_mount_error(mp, "Bad file handle");
1079 error = EINVAL;
1080 goto out;
1081 }
1082
1083 if (mp->mnt_flag & MNT_UPDATE) {
1084 struct nfsmount *nmp = VFSTONFS(mp);
1085
1086 if (nmp == NULL) {
1087 error = EIO;
1088 goto out;
1089 }
1090 /*
1091 * When doing an update, we can't change from or to
1092 * v3, switch lockd strategies or change cookie translation
1093 */
1094 args.flags = (args.flags &
1095 ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1096 (nmp->nm_flag &
1097 (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1098 nfs_decode_args(mp, nmp, &args, NULL);
1099 goto out;
1100 }
1101
1102 /*
1103 * Make the nfs_ip_paranoia sysctl serve as the default connection
1104 * or no-connection mode for those protocols that support
1105 * no-connection mode (the flag will be cleared later for protocols
1106 * that do not support no-connection mode). This will allow a client
1107 * to receive replies from a different IP then the request was
1108 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1109 * not 0.
1110 */
1111 if (nfs_ip_paranoia == 0)
1112 args.flags |= NFSMNT_NOCONN;
1113
1114 if (has_nfs_args_opt) {
1115 /*
1116 * In the 'nfs_args' case, the pointers in the args
1117 * structure are in userland - we copy them in here.
1118 */
1119 if (!has_fh_opt) {
1120 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1121 args.fhsize);
1122 if (error) {
1123 goto out;
1124 }
1125 args.fh = nfh;
1126 }
1127 if (!has_hostname_opt) {
1128 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
1129 if (error) {
1130 goto out;
1131 }
1132 bzero(&hst[len], MNAMELEN - len);
1133 args.hostname = hst;
1134 }
1135 if (!has_addr_opt) {
1136 /* sockargs() call must be after above copyin() calls */
1137 error = getsockaddr(&nam, (caddr_t)args.addr,
1138 args.addrlen);
1139 if (error) {
1140 goto out;
1141 }
1142 }
1143 }
1144 error = mountnfs(&args, mp, nam, args.hostname, &vp,
1145 curthread->td_ucred, negnametimeo);
1146 out:
1147 if (!error) {
1148 MNT_ILOCK(mp);
1149 mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1150 MNT_IUNLOCK(mp);
1151 }
1152 return (error);
1153 }
1154
1155
1156 /*
1157 * VFS Operations.
1158 *
1159 * mount system call
1160 * It seems a bit dumb to copyinstr() the host and path here and then
1161 * bcopy() them in mountnfs(), but I wanted to detect errors before
1162 * doing the sockargs() call because sockargs() allocates an mbuf and
1163 * an error after that means that I have to release the mbuf.
1164 */
1165 /* ARGSUSED */
1166 static int
1167 nfs_cmount(struct mntarg *ma, void *data, int flags)
1168 {
1169 int error;
1170 struct nfs_args args;
1171
1172 error = copyin(data, &args, sizeof (struct nfs_args));
1173 if (error)
1174 return error;
1175
1176 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1177
1178 error = kernel_mount(ma, flags);
1179 return (error);
1180 }
1181
1182 /*
1183 * Common code for mount and mountroot
1184 */
1185 static int
1186 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1187 char *hst, struct vnode **vpp, struct ucred *cred, int negnametimeo)
1188 {
1189 struct nfsmount *nmp;
1190 struct nfsnode *np;
1191 int error;
1192 struct vattr attrs;
1193
1194 if (mp->mnt_flag & MNT_UPDATE) {
1195 nmp = VFSTONFS(mp);
1196 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1197 free(nam, M_SONAME);
1198 return (0);
1199 } else {
1200 nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
1201 bzero((caddr_t)nmp, sizeof (struct nfsmount));
1202 TAILQ_INIT(&nmp->nm_bufq);
1203 mp->mnt_data = nmp;
1204 }
1205 vfs_getnewfsid(mp);
1206 nmp->nm_mountp = mp;
1207 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
1208
1209 /*
1210 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1211 * high, depending on whether we end up with negative offsets in
1212 * the client or server somewhere. 2GB-1 may be safer.
1213 *
1214 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
1215 * that we can handle until we find out otherwise.
1216 * XXX Our "safe" limit on the client is what we can store in our
1217 * buffer cache using signed(!) block numbers.
1218 */
1219 if ((argp->flags & NFSMNT_NFSV3) == 0)
1220 nmp->nm_maxfilesize = 0xffffffffLL;
1221 else
1222 nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1223
1224 nmp->nm_timeo = NFS_TIMEO;
1225 nmp->nm_retry = NFS_RETRANS;
1226 if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) {
1227 nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA;
1228 } else {
1229 nmp->nm_wsize = NFS_WSIZE;
1230 nmp->nm_rsize = NFS_RSIZE;
1231 }
1232 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1233 nmp->nm_readdirsize = NFS_READDIRSIZE;
1234 nmp->nm_numgrps = NFS_MAXGRPS;
1235 nmp->nm_readahead = NFS_DEFRAHEAD;
1236 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
1237 nmp->nm_negnametimeo = negnametimeo;
1238 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1239 if (nmp->nm_tprintf_delay < 0)
1240 nmp->nm_tprintf_delay = 0;
1241 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1242 if (nmp->nm_tprintf_initial_delay < 0)
1243 nmp->nm_tprintf_initial_delay = 0;
1244 nmp->nm_fhsize = argp->fhsize;
1245 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1246 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1247 nmp->nm_nam = nam;
1248 /* Set up the sockets and per-host congestion */
1249 nmp->nm_sotype = argp->sotype;
1250 nmp->nm_soproto = argp->proto;
1251 nmp->nm_rpcops = &nfs_rpcops;
1252
1253 nfs_decode_args(mp, nmp, argp, hst);
1254
1255 /*
1256 * For Connection based sockets (TCP,...) defer the connect until
1257 * the first request, in case the server is not responding.
1258 */
1259 if (nmp->nm_sotype == SOCK_DGRAM &&
1260 (error = nfs_connect(nmp)))
1261 goto bad;
1262
1263 /*
1264 * This is silly, but it has to be set so that vinifod() works.
1265 * We do not want to do an nfs_statfs() here since we can get
1266 * stuck on a dead server and we are holding a lock on the mount
1267 * point.
1268 */
1269 mtx_lock(&nmp->nm_mtx);
1270 mp->mnt_stat.f_iosize = nfs_iosize(nmp);
1271 mtx_unlock(&nmp->nm_mtx);
1272 /*
1273 * A reference count is needed on the nfsnode representing the
1274 * remote root. If this object is not persistent, then backward
1275 * traversals of the mount point (i.e. "..") will not work if
1276 * the nfsnode gets flushed out of the cache. Ufs does not have
1277 * this problem, because one can identify root inodes by their
1278 * number == ROOTINO (2).
1279 */
1280 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
1281 if (error)
1282 goto bad;
1283 *vpp = NFSTOV(np);
1284
1285 /*
1286 * Get file attributes and transfer parameters for the
1287 * mountpoint. This has the side effect of filling in
1288 * (*vpp)->v_type with the correct value.
1289 */
1290 if (argp->flags & NFSMNT_NFSV3)
1291 nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread);
1292 else
1293 VOP_GETATTR(*vpp, &attrs, curthread->td_ucred);
1294
1295 /*
1296 * Lose the lock but keep the ref.
1297 */
1298 VOP_UNLOCK(*vpp, 0);
1299
1300 return (0);
1301 bad:
1302 nfs_disconnect(nmp);
1303 mtx_destroy(&nmp->nm_mtx);
1304 uma_zfree(nfsmount_zone, nmp);
1305 free(nam, M_SONAME);
1306 return (error);
1307 }
1308
1309 /*
1310 * unmount system call
1311 */
1312 static int
1313 nfs_unmount(struct mount *mp, int mntflags)
1314 {
1315 struct nfsmount *nmp;
1316 int error, flags = 0;
1317
1318 if (mntflags & MNT_FORCE)
1319 flags |= FORCECLOSE;
1320 nmp = VFSTONFS(mp);
1321 /*
1322 * Goes something like this..
1323 * - Call vflush() to clear out vnodes for this filesystem
1324 * - Close the socket
1325 * - Free up the data structures
1326 */
1327 /* In the forced case, cancel any outstanding requests. */
1328 if (flags & FORCECLOSE) {
1329 error = nfs_nmcancelreqs(nmp);
1330 if (error)
1331 goto out;
1332 }
1333 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1334 error = vflush(mp, 1, flags, curthread);
1335 if (error)
1336 goto out;
1337
1338 /*
1339 * We are now committed to the unmount.
1340 */
1341 nfs_disconnect(nmp);
1342 free(nmp->nm_nam, M_SONAME);
1343
1344 mtx_destroy(&nmp->nm_mtx);
1345 uma_zfree(nfsmount_zone, nmp);
1346 out:
1347 return (error);
1348 }
1349
1350 /*
1351 * Return root of a filesystem
1352 */
1353 static int
1354 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1355 {
1356 struct vnode *vp;
1357 struct nfsmount *nmp;
1358 struct nfsnode *np;
1359 int error;
1360
1361 nmp = VFSTONFS(mp);
1362 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1363 if (error)
1364 return error;
1365 vp = NFSTOV(np);
1366 /*
1367 * Get transfer parameters and attributes for root vnode once.
1368 */
1369 mtx_lock(&nmp->nm_mtx);
1370 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
1371 (nmp->nm_flag & NFSMNT_NFSV3)) {
1372 mtx_unlock(&nmp->nm_mtx);
1373 nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1374 } else
1375 mtx_unlock(&nmp->nm_mtx);
1376 if (vp->v_type == VNON)
1377 vp->v_type = VDIR;
1378 vp->v_vflag |= VV_ROOT;
1379 *vpp = vp;
1380 return (0);
1381 }
1382
1383 /*
1384 * Flush out the buffer cache
1385 */
1386 /* ARGSUSED */
1387 static int
1388 nfs_sync(struct mount *mp, int waitfor)
1389 {
1390 struct vnode *vp, *mvp;
1391 struct thread *td;
1392 int error, allerror = 0;
1393
1394 td = curthread;
1395
1396 /*
1397 * Force stale buffer cache information to be flushed.
1398 */
1399 MNT_ILOCK(mp);
1400 loop:
1401 MNT_VNODE_FOREACH(vp, mp, mvp) {
1402 VI_LOCK(vp);
1403 MNT_IUNLOCK(mp);
1404 /* XXX Racy bv_cnt check. */
1405 if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1406 waitfor == MNT_LAZY) {
1407 VI_UNLOCK(vp);
1408 MNT_ILOCK(mp);
1409 continue;
1410 }
1411 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1412 MNT_ILOCK(mp);
1413 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1414 goto loop;
1415 }
1416 error = VOP_FSYNC(vp, waitfor, td);
1417 if (error)
1418 allerror = error;
1419 VOP_UNLOCK(vp, 0);
1420 vrele(vp);
1421
1422 MNT_ILOCK(mp);
1423 }
1424 MNT_IUNLOCK(mp);
1425 return (allerror);
1426 }
1427
1428 static int
1429 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1430 {
1431 struct nfsmount *nmp = VFSTONFS(mp);
1432 struct vfsquery vq;
1433 int error;
1434
1435 bzero(&vq, sizeof(vq));
1436 switch (op) {
1437 #if 0
1438 case VFS_CTL_NOLOCKS:
1439 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1440 if (req->oldptr != NULL) {
1441 error = SYSCTL_OUT(req, &val, sizeof(val));
1442 if (error)
1443 return (error);
1444 }
1445 if (req->newptr != NULL) {
1446 error = SYSCTL_IN(req, &val, sizeof(val));
1447 if (error)
1448 return (error);
1449 if (val)
1450 nmp->nm_flag |= NFSMNT_NOLOCKS;
1451 else
1452 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1453 }
1454 break;
1455 #endif
1456 case VFS_CTL_QUERY:
1457 mtx_lock(&nmp->nm_mtx);
1458 if (nmp->nm_state & NFSSTA_TIMEO)
1459 vq.vq_flags |= VQ_NOTRESP;
1460 mtx_unlock(&nmp->nm_mtx);
1461 #if 0
1462 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1463 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1464 vq.vq_flags |= VQ_NOTRESPLOCK;
1465 #endif
1466 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1467 break;
1468 case VFS_CTL_TIMEO:
1469 if (req->oldptr != NULL) {
1470 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1471 sizeof(nmp->nm_tprintf_initial_delay));
1472 if (error)
1473 return (error);
1474 }
1475 if (req->newptr != NULL) {
1476 error = vfs_suser(mp, req->td);
1477 if (error)
1478 return (error);
1479 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1480 sizeof(nmp->nm_tprintf_initial_delay));
1481 if (error)
1482 return (error);
1483 if (nmp->nm_tprintf_initial_delay < 0)
1484 nmp->nm_tprintf_initial_delay = 0;
1485 }
1486 break;
1487 default:
1488 return (ENOTSUP);
1489 }
1490 return (0);
1491 }
Cache object: 6bbe13b57fb2ec284fec7b965d22a0d0
|