1 /*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: releng/7.2/sys/nfsclient/nfs_vfsops.c 208586 2010-05-27 03:15:04Z cperciva $");
37
38
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/mount.h>
53 #include <sys/proc.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/vnode.h>
59 #include <sys/signalvar.h>
60
61 #include <vm/vm.h>
62 #include <vm/vm_extern.h>
63 #include <vm/uma.h>
64
65 #include <net/if.h>
66 #include <net/route.h>
67 #include <netinet/in.h>
68
69 #include <rpc/rpcclnt.h>
70
71 #include <nfs/rpcv2.h>
72 #include <nfs/nfsproto.h>
73 #include <nfsclient/nfs.h>
74 #include <nfsclient/nfsnode.h>
75 #include <nfsclient/nfsmount.h>
76 #include <nfs/xdr_subs.h>
77 #include <nfsclient/nfsm_subs.h>
78 #include <nfsclient/nfsdiskless.h>
79
80 MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
81 MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
82 MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
83 MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
84 MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
85
86 uma_zone_t nfsmount_zone;
87
88 struct nfsstats nfsstats;
89
90 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
91 SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
92 &nfsstats, nfsstats, "S,nfsstats");
93 static int nfs_ip_paranoia = 1;
94 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95 &nfs_ip_paranoia, 0, "");
96 #ifdef NFS_DEBUG
97 int nfs_debug;
98 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, "");
99 #endif
100 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
101 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
102 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
103 /* how long between console messages "nfs server foo not responding" */
104 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
105 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
106 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
107
108 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
109 struct nfs_args *argp, const char *hostname);
110 static int mountnfs(struct nfs_args *, struct mount *,
111 struct sockaddr *, char *, struct vnode **,
112 struct ucred *cred);
113 static vfs_mount_t nfs_mount;
114 static vfs_cmount_t nfs_cmount;
115 static vfs_unmount_t nfs_unmount;
116 static vfs_root_t nfs_root;
117 static vfs_statfs_t nfs_statfs;
118 static vfs_sync_t nfs_sync;
119 static vfs_sysctl_t nfs_sysctl;
120
121 /*
122 * nfs vfs operations.
123 */
124 static struct vfsops nfs_vfsops = {
125 .vfs_init = nfs_init,
126 .vfs_mount = nfs_mount,
127 .vfs_cmount = nfs_cmount,
128 .vfs_root = nfs_root,
129 .vfs_statfs = nfs_statfs,
130 .vfs_sync = nfs_sync,
131 .vfs_uninit = nfs_uninit,
132 .vfs_unmount = nfs_unmount,
133 .vfs_sysctl = nfs_sysctl,
134 };
135 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
136
137 /* So that loader and kldload(2) can find us, wherever we are.. */
138 MODULE_VERSION(nfs, 1);
139
140 static struct nfs_rpcops nfs_rpcops = {
141 nfs_readrpc,
142 nfs_writerpc,
143 nfs_writebp,
144 nfs_readlinkrpc,
145 nfs_invaldir,
146 nfs_commit,
147 };
148
149 /*
150 * This structure must be filled in by a primary bootstrap or bootstrap
151 * server for a diskless/dataless machine. It is initialized below just
152 * to ensure that it is allocated to initialized data (.data not .bss).
153 */
154 struct nfs_diskless nfs_diskless = { { { 0 } } };
155 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
156 int nfs_diskless_valid = 0;
157
158 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
159 &nfs_diskless_valid, 0, "");
160
161 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
162 nfsv3_diskless.root_hostnam, 0, "");
163
164 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
165 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
166 "%Ssockaddr_in", "");
167
168
169 void nfsargs_ntoh(struct nfs_args *);
170 static int nfs_mountdiskless(char *, int,
171 struct sockaddr_in *, struct nfs_args *,
172 struct thread *, struct vnode **, struct mount *);
173 static void nfs_convert_diskless(void);
174 static void nfs_convert_oargs(struct nfs_args *args,
175 struct onfs_args *oargs);
176
177 int
178 nfs_iosize(struct nfsmount *nmp)
179 {
180 int iosize;
181
182 /*
183 * Calculate the size used for io buffers. Use the larger
184 * of the two sizes to minimise nfs requests but make sure
185 * that it is at least one VM page to avoid wasting buffer
186 * space.
187 */
188 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
189 iosize = imax(iosize, PAGE_SIZE);
190 return (iosize);
191 }
192
193 static void
194 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
195 {
196
197 args->version = NFS_ARGSVERSION;
198 args->addr = oargs->addr;
199 args->addrlen = oargs->addrlen;
200 args->sotype = oargs->sotype;
201 args->proto = oargs->proto;
202 args->fh = oargs->fh;
203 args->fhsize = oargs->fhsize;
204 args->flags = oargs->flags;
205 args->wsize = oargs->wsize;
206 args->rsize = oargs->rsize;
207 args->readdirsize = oargs->readdirsize;
208 args->timeo = oargs->timeo;
209 args->retrans = oargs->retrans;
210 args->maxgrouplist = oargs->maxgrouplist;
211 args->readahead = oargs->readahead;
212 args->deadthresh = oargs->deadthresh;
213 args->hostname = oargs->hostname;
214 }
215
216 static void
217 nfs_convert_diskless(void)
218 {
219
220 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
221 sizeof(struct ifaliasreq));
222 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
223 sizeof(struct sockaddr_in));
224 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
225 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
226 nfsv3_diskless.root_fhsize = NFSX_V3FH;
227 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
228 } else {
229 nfsv3_diskless.root_fhsize = NFSX_V2FH;
230 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
231 }
232 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
233 sizeof(struct sockaddr_in));
234 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
235 nfsv3_diskless.root_time = nfs_diskless.root_time;
236 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
237 MAXHOSTNAMELEN);
238 nfs_diskless_valid = 3;
239 }
240
241 /*
242 * nfs statfs call
243 */
244 static int
245 nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
246 {
247 struct vnode *vp;
248 struct nfs_statfs *sfp;
249 caddr_t bpos, dpos;
250 struct nfsmount *nmp = VFSTONFS(mp);
251 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
252 struct mbuf *mreq, *mrep, *md, *mb;
253 struct nfsnode *np;
254 u_quad_t tquad;
255
256 #ifndef nolint
257 sfp = NULL;
258 #endif
259 error = vfs_busy(mp, 0, NULL, td);
260 if (error)
261 return (error);
262 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
263 if (error) {
264 vfs_unbusy(mp, td);
265 return (error);
266 }
267 vp = NFSTOV(np);
268 mtx_lock(&nmp->nm_mtx);
269 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
270 mtx_unlock(&nmp->nm_mtx);
271 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
272 } else
273 mtx_unlock(&nmp->nm_mtx);
274 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
275 mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
276 mb = mreq;
277 bpos = mtod(mb, caddr_t);
278 nfsm_fhtom(vp, v3);
279 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
280 if (v3)
281 nfsm_postop_attr(vp, retattr);
282 if (error) {
283 if (mrep != NULL)
284 m_freem(mrep);
285 goto nfsmout;
286 }
287 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
288 mtx_lock(&nmp->nm_mtx);
289 sbp->f_iosize = nfs_iosize(nmp);
290 mtx_unlock(&nmp->nm_mtx);
291 if (v3) {
292 sbp->f_bsize = NFS_FABLKSIZE;
293 tquad = fxdr_hyper(&sfp->sf_tbytes);
294 sbp->f_blocks = tquad / NFS_FABLKSIZE;
295 tquad = fxdr_hyper(&sfp->sf_fbytes);
296 sbp->f_bfree = tquad / NFS_FABLKSIZE;
297 tquad = fxdr_hyper(&sfp->sf_abytes);
298 sbp->f_bavail = tquad / NFS_FABLKSIZE;
299 sbp->f_files = (fxdr_unsigned(int32_t,
300 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
301 sbp->f_ffree = (fxdr_unsigned(int32_t,
302 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
303 } else {
304 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
305 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
306 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
307 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
308 sbp->f_files = 0;
309 sbp->f_ffree = 0;
310 }
311 m_freem(mrep);
312 nfsmout:
313 vput(vp);
314 vfs_unbusy(mp, td);
315 return (error);
316 }
317
318 /*
319 * nfs version 3 fsinfo rpc call
320 */
321 int
322 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
323 struct thread *td)
324 {
325 struct nfsv3_fsinfo *fsp;
326 u_int32_t pref, max;
327 caddr_t bpos, dpos;
328 int error = 0, retattr;
329 struct mbuf *mreq, *mrep, *md, *mb;
330 u_int64_t maxfsize;
331
332 nfsstats.rpccnt[NFSPROC_FSINFO]++;
333 mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
334 mb = mreq;
335 bpos = mtod(mb, caddr_t);
336 nfsm_fhtom(vp, 1);
337 nfsm_request(vp, NFSPROC_FSINFO, td, cred);
338 nfsm_postop_attr(vp, retattr);
339 if (!error) {
340 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
341 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
342 mtx_lock(&nmp->nm_mtx);
343 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
344 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
345 ~(NFS_FABLKSIZE - 1);
346 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
347 if (max < nmp->nm_wsize && max > 0) {
348 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
349 if (nmp->nm_wsize == 0)
350 nmp->nm_wsize = max;
351 }
352 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
353 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
354 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
355 ~(NFS_FABLKSIZE - 1);
356 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
357 if (max < nmp->nm_rsize && max > 0) {
358 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
359 if (nmp->nm_rsize == 0)
360 nmp->nm_rsize = max;
361 }
362 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
363 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
364 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
365 ~(NFS_DIRBLKSIZ - 1);
366 if (max < nmp->nm_readdirsize && max > 0) {
367 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
368 if (nmp->nm_readdirsize == 0)
369 nmp->nm_readdirsize = max;
370 }
371 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
372 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
373 nmp->nm_maxfilesize = maxfsize;
374 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
375 nmp->nm_state |= NFSSTA_GOTFSINFO;
376 mtx_unlock(&nmp->nm_mtx);
377 }
378 m_freem(mrep);
379 nfsmout:
380 return (error);
381 }
382
383 /*
384 * Mount a remote root fs via. nfs. This depends on the info in the
385 * nfs_diskless structure that has been filled in properly by some primary
386 * bootstrap.
387 * It goes something like this:
388 * - do enough of "ifconfig" by calling ifioctl() so that the system
389 * can talk to the server
390 * - If nfs_diskless.mygateway is filled in, use that address as
391 * a default gateway.
392 * - build the rootfs mount point and call mountnfs() to do the rest.
393 *
394 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
395 * structure, as well as other global NFS client variables here, as
396 * nfs_mountroot() will be called once in the boot before any other NFS
397 * client activity occurs.
398 */
399 int
400 nfs_mountroot(struct mount *mp, struct thread *td)
401 {
402 struct nfsv3_diskless *nd = &nfsv3_diskless;
403 struct socket *so;
404 struct vnode *vp;
405 struct ifreq ir;
406 int error, i;
407 u_long l;
408 char buf[128];
409 char *cp;
410
411 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
412 bootpc_init(); /* use bootp to get nfs_diskless filled in */
413 #elif defined(NFS_ROOT)
414 nfs_setup_diskless();
415 #endif
416
417 if (nfs_diskless_valid == 0)
418 return (-1);
419 if (nfs_diskless_valid == 1)
420 nfs_convert_diskless();
421
422 /*
423 * XXX splnet, so networks will receive...
424 */
425 splnet();
426
427 /*
428 * Do enough of ifconfig(8) so that the critical net interface can
429 * talk to the server.
430 */
431 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
432 td->td_ucred, td);
433 if (error)
434 panic("nfs_mountroot: socreate(%04x): %d",
435 nd->myif.ifra_addr.sa_family, error);
436
437 #if 0 /* XXX Bad idea */
438 /*
439 * We might not have been told the right interface, so we pass
440 * over the first ten interfaces of the same kind, until we get
441 * one of them configured.
442 */
443
444 for (i = strlen(nd->myif.ifra_name) - 1;
445 nd->myif.ifra_name[i] >= '' &&
446 nd->myif.ifra_name[i] <= '9';
447 nd->myif.ifra_name[i] ++) {
448 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
449 if(!error)
450 break;
451 }
452 #endif
453 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
454 if (error)
455 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
456 if ((cp = getenv("boot.netif.mtu")) != NULL) {
457 ir.ifr_mtu = strtol(cp, NULL, 10);
458 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
459 freeenv(cp);
460 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
461 if (error)
462 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
463 }
464 soclose(so);
465
466 /*
467 * If the gateway field is filled in, set it as the default route.
468 * Note that pxeboot will set a default route of 0 if the route
469 * is not set by the DHCP server. Check also for a value of 0
470 * to avoid panicking inappropriately in that situation.
471 */
472 if (nd->mygateway.sin_len != 0 &&
473 nd->mygateway.sin_addr.s_addr != 0) {
474 struct sockaddr_in mask, sin;
475
476 bzero((caddr_t)&mask, sizeof(mask));
477 sin = mask;
478 sin.sin_family = AF_INET;
479 sin.sin_len = sizeof(sin);
480 /* XXX MRT use table 0 for this sort of thing */
481 error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
482 (struct sockaddr *)&nd->mygateway,
483 (struct sockaddr *)&mask,
484 RTF_UP | RTF_GATEWAY, NULL);
485 if (error)
486 panic("nfs_mountroot: RTM_ADD: %d", error);
487 }
488
489 /*
490 * Create the rootfs mount point.
491 */
492 nd->root_args.fh = nd->root_fh;
493 nd->root_args.fhsize = nd->root_fhsize;
494 l = ntohl(nd->root_saddr.sin_addr.s_addr);
495 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
496 (l >> 24) & 0xff, (l >> 16) & 0xff,
497 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
498 printf("NFS ROOT: %s\n", buf);
499 nd->root_args.hostname = buf;
500 if ((error = nfs_mountdiskless(buf, MNT_RDONLY,
501 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
502 return (error);
503 }
504
505 /*
506 * This is not really an nfs issue, but it is much easier to
507 * set hostname here and then let the "/etc/rc.xxx" files
508 * mount the right /var based upon its preset value.
509 */
510 bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
511 hostname[MAXHOSTNAMELEN - 1] = '\0';
512 for (i = 0; i < MAXHOSTNAMELEN; i++)
513 if (hostname[i] == '\0')
514 break;
515 inittodr(ntohl(nd->root_time));
516 return (0);
517 }
518
519 /*
520 * Internal version of mount system call for diskless setup.
521 */
522 static int
523 nfs_mountdiskless(char *path, int mountflag,
524 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
525 struct vnode **vpp, struct mount *mp)
526 {
527 struct sockaddr *nam;
528 int error;
529
530 MNT_ILOCK(mp);
531 mp->mnt_kern_flag = 0;
532 mp->mnt_flag = mountflag;
533 MNT_IUNLOCK(mp);
534 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
535 if ((error = mountnfs(args, mp, nam, path, vpp,
536 td->td_ucred)) != 0) {
537 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
538 return (error);
539 }
540 return (0);
541 }
542
543 static void
544 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
545 const char *hostname)
546 {
547 int s;
548 int adjsock;
549 int maxio;
550 char *p;
551
552 s = splnet();
553
554 /*
555 * Set read-only flag if requested; otherwise, clear it if this is
556 * an update. If this is not an update, then either the read-only
557 * flag is already clear, or this is a root mount and it was set
558 * intentionally at some previous point.
559 */
560 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
561 MNT_ILOCK(mp);
562 mp->mnt_flag |= MNT_RDONLY;
563 MNT_IUNLOCK(mp);
564 } else if (mp->mnt_flag & MNT_UPDATE) {
565 MNT_ILOCK(mp);
566 mp->mnt_flag &= ~MNT_RDONLY;
567 MNT_IUNLOCK(mp);
568 }
569
570 /*
571 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
572 * no sense in that context. Also, set up appropriate retransmit
573 * and soft timeout behavior.
574 */
575 if (argp->sotype == SOCK_STREAM) {
576 nmp->nm_flag &= ~NFSMNT_NOCONN;
577 nmp->nm_flag |= NFSMNT_DUMBTIMR;
578 nmp->nm_timeo = NFS_MAXTIMEO;
579 nmp->nm_retry = NFS_RETRANS_TCP;
580 }
581
582 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
583 if ((argp->flags & NFSMNT_NFSV3) == 0)
584 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
585
586 /* Re-bind if rsrvd port requested and wasn't on one */
587 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
588 && (argp->flags & NFSMNT_RESVPORT);
589 /* Also re-bind if we're switching to/from a connected UDP socket */
590 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
591 (argp->flags & NFSMNT_NOCONN));
592
593 /* Update flags atomically. Don't change the lock bits. */
594 nmp->nm_flag = argp->flags | nmp->nm_flag;
595 splx(s);
596
597 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
598 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
599 if (nmp->nm_timeo < NFS_MINTIMEO)
600 nmp->nm_timeo = NFS_MINTIMEO;
601 else if (nmp->nm_timeo > NFS_MAXTIMEO)
602 nmp->nm_timeo = NFS_MAXTIMEO;
603 }
604
605 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
606 nmp->nm_retry = argp->retrans;
607 if (nmp->nm_retry > NFS_MAXREXMIT)
608 nmp->nm_retry = NFS_MAXREXMIT;
609 }
610
611 if (argp->flags & NFSMNT_NFSV3) {
612 if (argp->sotype == SOCK_DGRAM)
613 maxio = NFS_MAXDGRAMDATA;
614 else
615 maxio = NFS_MAXDATA;
616 } else
617 maxio = NFS_V2MAXDATA;
618
619 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
620 nmp->nm_wsize = argp->wsize;
621 /* Round down to multiple of blocksize */
622 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
623 if (nmp->nm_wsize <= 0)
624 nmp->nm_wsize = NFS_FABLKSIZE;
625 }
626 if (nmp->nm_wsize > maxio)
627 nmp->nm_wsize = maxio;
628 if (nmp->nm_wsize > MAXBSIZE)
629 nmp->nm_wsize = MAXBSIZE;
630
631 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
632 nmp->nm_rsize = argp->rsize;
633 /* Round down to multiple of blocksize */
634 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
635 if (nmp->nm_rsize <= 0)
636 nmp->nm_rsize = NFS_FABLKSIZE;
637 }
638 if (nmp->nm_rsize > maxio)
639 nmp->nm_rsize = maxio;
640 if (nmp->nm_rsize > MAXBSIZE)
641 nmp->nm_rsize = MAXBSIZE;
642
643 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
644 nmp->nm_readdirsize = argp->readdirsize;
645 }
646 if (nmp->nm_readdirsize > maxio)
647 nmp->nm_readdirsize = maxio;
648 if (nmp->nm_readdirsize > nmp->nm_rsize)
649 nmp->nm_readdirsize = nmp->nm_rsize;
650
651 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
652 nmp->nm_acregmin = argp->acregmin;
653 else
654 nmp->nm_acregmin = NFS_MINATTRTIMO;
655 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
656 nmp->nm_acregmax = argp->acregmax;
657 else
658 nmp->nm_acregmax = NFS_MAXATTRTIMO;
659 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
660 nmp->nm_acdirmin = argp->acdirmin;
661 else
662 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
663 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
664 nmp->nm_acdirmax = argp->acdirmax;
665 else
666 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
667 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
668 nmp->nm_acdirmin = nmp->nm_acdirmax;
669 if (nmp->nm_acregmin > nmp->nm_acregmax)
670 nmp->nm_acregmin = nmp->nm_acregmax;
671
672 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
673 if (argp->maxgrouplist <= NFS_MAXGRPS)
674 nmp->nm_numgrps = argp->maxgrouplist;
675 else
676 nmp->nm_numgrps = NFS_MAXGRPS;
677 }
678 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
679 if (argp->readahead <= NFS_MAXRAHEAD)
680 nmp->nm_readahead = argp->readahead;
681 else
682 nmp->nm_readahead = NFS_MAXRAHEAD;
683 }
684 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
685 if (argp->wcommitsize < nmp->nm_wsize)
686 nmp->nm_wcommitsize = nmp->nm_wsize;
687 else
688 nmp->nm_wcommitsize = argp->wcommitsize;
689 }
690 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
691 if (argp->deadthresh <= NFS_MAXDEADTHRESH)
692 nmp->nm_deadthresh = argp->deadthresh;
693 else
694 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
695 }
696
697 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
698 (nmp->nm_soproto != argp->proto));
699 nmp->nm_sotype = argp->sotype;
700 nmp->nm_soproto = argp->proto;
701
702 if (nmp->nm_so && adjsock) {
703 nfs_safedisconnect(nmp);
704 if (nmp->nm_sotype == SOCK_DGRAM)
705 while (nfs_connect(nmp, NULL)) {
706 printf("nfs_args: retrying connect\n");
707 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
708 }
709 }
710
711 if (hostname) {
712 strlcpy(nmp->nm_hostname, hostname,
713 sizeof(nmp->nm_hostname));
714 p = strchr(nmp->nm_hostname, ':');
715 if (p)
716 *p = '\0';
717 }
718 }
719
720 static const char *nfs_opts[] = { "from", "nfs_args",
721 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
722 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
723 "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
724 "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
725 "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax",
726 "deadthresh", "hostname", "timeout", "addr", "fh", "nfsv3",
727 "maxgroups",
728 NULL };
729
730 /*
731 * VFS Operations.
732 *
733 * mount system call
734 * It seems a bit dumb to copyinstr() the host and path here and then
735 * bcopy() them in mountnfs(), but I wanted to detect errors before
736 * doing the sockargs() call because sockargs() allocates an mbuf and
737 * an error after that means that I have to release the mbuf.
738 */
739 /* ARGSUSED */
740 static int
741 nfs_mount(struct mount *mp, struct thread *td)
742 {
743 int error, ret, has_nfs_args_opt;
744 int has_addr_opt, has_fh_opt, has_hostname_opt;
745 struct nfs_args args;
746 struct sockaddr *nam;
747 struct vnode *vp;
748 char hst[MNAMELEN];
749 size_t len;
750 u_char nfh[NFSX_V3FHMAX];
751 char *opt;
752
753 has_nfs_args_opt = 0;
754 has_addr_opt = 0;
755 has_fh_opt = 0;
756 has_hostname_opt = 0;
757
758 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
759 error = EINVAL;
760 goto out;
761 }
762
763 if (mp->mnt_flag & MNT_ROOTFS) {
764 error = nfs_mountroot(mp, td);
765 goto out;
766 }
767
768 /*
769 * The old mount_nfs program passed the struct nfs_args
770 * from userspace to kernel. The new mount_nfs program
771 * passes string options via nmount() from userspace to kernel
772 * and we populate the struct nfs_args in the kernel.
773 */
774 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
775 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
776 sizeof args);
777 if (error)
778 goto out;
779
780 if (args.version != NFS_ARGSVERSION) {
781 error = EPROGMISMATCH;
782 goto out;
783 }
784 has_nfs_args_opt = 1;
785 }
786
787 if (vfs_getopt(mp->mnt_optnew, "dumbtimer", NULL, NULL) == 0)
788 args.flags |= NFSMNT_DUMBTIMR;
789 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
790 args.flags |= NFSMNT_NOCONN;
791 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
792 args.flags |= NFSMNT_NOCONN;
793 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
794 args.flags |= NFSMNT_NOLOCKD;
795 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
796 args.flags &= ~NFSMNT_NOLOCKD;
797 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
798 args.flags |= NFSMNT_INT;
799 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
800 args.flags |= NFSMNT_RDIRPLUS;
801 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
802 args.flags |= NFSMNT_RESVPORT;
803 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
804 args.flags &= ~NFSMNT_RESVPORT;
805 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
806 args.flags |= NFSMNT_SOFT;
807 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
808 args.flags &= ~NFSMNT_SOFT;
809 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
810 args.sotype = SOCK_DGRAM;
811 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
812 args.sotype = SOCK_DGRAM;
813 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
814 args.sotype = SOCK_STREAM;
815 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
816 args.flags |= NFSMNT_NFSV3;
817 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
818 if (opt == NULL) {
819 vfs_mount_error(mp, "illegal readdirsize");
820 error = EINVAL;
821 goto out;
822 }
823 ret = sscanf(opt, "%d", &args.readdirsize);
824 if (ret != 1 || args.readdirsize <= 0) {
825 vfs_mount_error(mp, "illegal readdirsize: %s",
826 opt);
827 error = EINVAL;
828 goto out;
829 }
830 args.flags |= NFSMNT_READDIRSIZE;
831 }
832 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
833 if (opt == NULL) {
834 vfs_mount_error(mp, "illegal readahead");
835 error = EINVAL;
836 goto out;
837 }
838 ret = sscanf(opt, "%d", &args.readahead);
839 if (ret != 1 || args.readahead <= 0) {
840 vfs_mount_error(mp, "illegal readahead: %s",
841 opt);
842 error = EINVAL;
843 goto out;
844 }
845 args.flags |= NFSMNT_READAHEAD;
846 }
847 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
848 if (opt == NULL) {
849 vfs_mount_error(mp, "illegal wsize");
850 error = EINVAL;
851 goto out;
852 }
853 ret = sscanf(opt, "%d", &args.wsize);
854 if (ret != 1 || args.wsize <= 0) {
855 vfs_mount_error(mp, "illegal wsize: %s",
856 opt);
857 error = EINVAL;
858 goto out;
859 }
860 args.flags |= NFSMNT_WSIZE;
861 }
862 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
863 if (opt == NULL) {
864 vfs_mount_error(mp, "illegal rsize");
865 error = EINVAL;
866 goto out;
867 }
868 ret = sscanf(opt, "%d", &args.rsize);
869 if (ret != 1 || args.rsize <= 0) {
870 vfs_mount_error(mp, "illegal wsize: %s",
871 opt);
872 error = EINVAL;
873 goto out;
874 }
875 args.flags |= NFSMNT_RSIZE;
876 }
877 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
878 if (opt == NULL) {
879 vfs_mount_error(mp, "illegal retrans");
880 error = EINVAL;
881 goto out;
882 }
883 ret = sscanf(opt, "%d", &args.retrans);
884 if (ret != 1 || args.retrans <= 0) {
885 vfs_mount_error(mp, "illegal retrans: %s",
886 opt);
887 error = EINVAL;
888 goto out;
889 }
890 args.flags |= NFSMNT_RETRANS;
891 }
892 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
893 ret = sscanf(opt, "%d", &args.acregmin);
894 if (ret != 1 || args.acregmin < 0) {
895 vfs_mount_error(mp, "illegal acregmin: %s",
896 opt);
897 error = EINVAL;
898 goto out;
899 }
900 args.flags |= NFSMNT_ACREGMIN;
901 }
902 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
903 ret = sscanf(opt, "%d", &args.acregmax);
904 if (ret != 1 || args.acregmax < 0) {
905 vfs_mount_error(mp, "illegal acregmax: %s",
906 opt);
907 error = EINVAL;
908 goto out;
909 }
910 args.flags |= NFSMNT_ACREGMAX;
911 }
912 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
913 ret = sscanf(opt, "%d", &args.acdirmin);
914 if (ret != 1 || args.acdirmin < 0) {
915 vfs_mount_error(mp, "illegal acdirmin: %s",
916 opt);
917 error = EINVAL;
918 goto out;
919 }
920 args.flags |= NFSMNT_ACDIRMIN;
921 }
922 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
923 ret = sscanf(opt, "%d", &args.acdirmax);
924 if (ret != 1 || args.acdirmax < 0) {
925 vfs_mount_error(mp, "illegal acdirmax: %s",
926 opt);
927 error = EINVAL;
928 goto out;
929 }
930 args.flags |= NFSMNT_ACDIRMAX;
931 }
932 if (vfs_getopt(mp->mnt_optnew, "deadthresh", (void **)&opt, NULL) == 0) {
933 ret = sscanf(opt, "%d", &args.deadthresh);
934 if (ret != 1 || args.deadthresh <= 0) {
935 vfs_mount_error(mp, "illegal deadthresh: %s",
936 opt);
937 error = EINVAL;
938 goto out;
939 }
940 args.flags |= NFSMNT_DEADTHRESH;
941 }
942 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
943 ret = sscanf(opt, "%d", &args.timeo);
944 if (ret != 1 || args.timeo <= 0) {
945 vfs_mount_error(mp, "illegal timeout: %s",
946 opt);
947 error = EINVAL;
948 goto out;
949 }
950 args.flags |= NFSMNT_TIMEO;
951 }
952 if (vfs_getopt(mp->mnt_optnew, "maxgroups", (void **)&opt, NULL) == 0) {
953 ret = sscanf(opt, "%d", &args.maxgrouplist);
954 if (ret != 1 || args.timeo <= 0) {
955 vfs_mount_error(mp, "illegal maxgroups: %s",
956 opt);
957 error = EINVAL;
958 goto out;
959 }
960 args.flags |= NFSMNT_MAXGRPS;
961 }
962 if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
963 &args.addrlen) == 0) {
964 has_addr_opt = 1;
965 if (args.addrlen > SOCK_MAXADDRLEN) {
966 error = ENAMETOOLONG;
967 goto out;
968 }
969 MALLOC(nam, struct sockaddr *, args.addrlen, M_SONAME,
970 M_WAITOK);
971 bcopy(args.addr, nam, args.addrlen);
972 nam->sa_len = args.addrlen;
973 }
974 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
975 &args.fhsize) == 0) {
976 has_fh_opt = 1;
977 }
978 if (vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
979 NULL) == 0) {
980 has_hostname_opt = 1;
981 }
982 if (args.hostname == NULL) {
983 vfs_mount_error(mp, "Invalid hostname");
984 error = EINVAL;
985 goto out;
986 }
987 if (mp->mnt_flag & MNT_UPDATE) {
988 struct nfsmount *nmp = VFSTONFS(mp);
989
990 if (nmp == NULL) {
991 error = EIO;
992 goto out;
993 }
994 /*
995 * When doing an update, we can't change from or to
996 * v3, switch lockd strategies or change cookie translation
997 */
998 args.flags = (args.flags &
999 ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1000 (nmp->nm_flag &
1001 (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1002 nfs_decode_args(mp, nmp, &args, NULL);
1003 goto out;
1004 }
1005 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1006 vfs_mount_error(mp, "Bad file handle");
1007 error = EINVAL;
1008 goto out;
1009 }
1010
1011 /*
1012 * Make the nfs_ip_paranoia sysctl serve as the default connection
1013 * or no-connection mode for those protocols that support
1014 * no-connection mode (the flag will be cleared later for protocols
1015 * that do not support no-connection mode). This will allow a client
1016 * to receive replies from a different IP then the request was
1017 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1018 * not 0.
1019 */
1020 if (nfs_ip_paranoia == 0)
1021 args.flags |= NFSMNT_NOCONN;
1022
1023 if (has_nfs_args_opt) {
1024 /*
1025 * In the 'nfs_args' case, the pointers in the args
1026 * structure are in userland - we copy them in here.
1027 */
1028 if (!has_fh_opt) {
1029 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1030 args.fhsize);
1031 if (error) {
1032 goto out;
1033 }
1034 args.fh = nfh;
1035 }
1036 if (!has_hostname_opt) {
1037 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
1038 if (error) {
1039 goto out;
1040 }
1041 bzero(&hst[len], MNAMELEN - len);
1042 args.hostname = hst;
1043 }
1044 if (!has_addr_opt) {
1045 /* sockargs() call must be after above copyin() calls */
1046 error = getsockaddr(&nam, (caddr_t)args.addr,
1047 args.addrlen);
1048 if (error) {
1049 goto out;
1050 }
1051 }
1052 }
1053 error = mountnfs(&args, mp, nam, args.hostname, &vp, td->td_ucred);
1054 out:
1055 if (!error) {
1056 MNT_ILOCK(mp);
1057 mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1058 MNT_IUNLOCK(mp);
1059 }
1060 return (error);
1061 }
1062
1063
1064 /*
1065 * VFS Operations.
1066 *
1067 * mount system call
1068 * It seems a bit dumb to copyinstr() the host and path here and then
1069 * bcopy() them in mountnfs(), but I wanted to detect errors before
1070 * doing the sockargs() call because sockargs() allocates an mbuf and
1071 * an error after that means that I have to release the mbuf.
1072 */
1073 /* ARGSUSED */
1074 static int
1075 nfs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
1076 {
1077 int error;
1078 struct nfs_args args;
1079
1080 error = copyin(data, &args, sizeof (struct nfs_args));
1081 if (error)
1082 return error;
1083
1084 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1085
1086 error = kernel_mount(ma, flags);
1087 return (error);
1088 }
1089
1090 /*
1091 * Common code for mount and mountroot
1092 */
1093 static int
1094 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1095 char *hst, struct vnode **vpp, struct ucred *cred)
1096 {
1097 struct nfsmount *nmp;
1098 struct nfsnode *np;
1099 int error;
1100 struct vattr attrs;
1101
1102 if (mp->mnt_flag & MNT_UPDATE) {
1103 nmp = VFSTONFS(mp);
1104 /* update paths, file handles, etc, here XXX */
1105 FREE(nam, M_SONAME);
1106 return (0);
1107 } else {
1108 nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
1109 bzero((caddr_t)nmp, sizeof (struct nfsmount));
1110 TAILQ_INIT(&nmp->nm_bufq);
1111 mp->mnt_data = (qaddr_t)nmp;
1112 }
1113 vfs_getnewfsid(mp);
1114 nmp->nm_mountp = mp;
1115 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
1116
1117 /*
1118 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1119 * high, depending on whether we end up with negative offsets in
1120 * the client or server somewhere. 2GB-1 may be safer.
1121 *
1122 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
1123 * that we can handle until we find out otherwise.
1124 * XXX Our "safe" limit on the client is what we can store in our
1125 * buffer cache using signed(!) block numbers.
1126 */
1127 if ((argp->flags & NFSMNT_NFSV3) == 0)
1128 nmp->nm_maxfilesize = 0xffffffffLL;
1129 else
1130 nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1131
1132 nmp->nm_timeo = NFS_TIMEO;
1133 nmp->nm_retry = NFS_RETRANS;
1134 if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) {
1135 nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA;
1136 } else {
1137 nmp->nm_wsize = NFS_WSIZE;
1138 nmp->nm_rsize = NFS_RSIZE;
1139 }
1140 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1141 nmp->nm_readdirsize = NFS_READDIRSIZE;
1142 nmp->nm_numgrps = NFS_MAXGRPS;
1143 nmp->nm_readahead = NFS_DEFRAHEAD;
1144 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
1145 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1146 if (nmp->nm_tprintf_delay < 0)
1147 nmp->nm_tprintf_delay = 0;
1148 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1149 if (nmp->nm_tprintf_initial_delay < 0)
1150 nmp->nm_tprintf_initial_delay = 0;
1151 nmp->nm_fhsize = argp->fhsize;
1152 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1153 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1154 nmp->nm_nam = nam;
1155 /* Set up the sockets and per-host congestion */
1156 nmp->nm_sotype = argp->sotype;
1157 nmp->nm_soproto = argp->proto;
1158 nmp->nm_rpcops = &nfs_rpcops;
1159
1160 nfs_decode_args(mp, nmp, argp, hst);
1161
1162 /*
1163 * For Connection based sockets (TCP,...) defer the connect until
1164 * the first request, in case the server is not responding.
1165 */
1166 if (nmp->nm_sotype == SOCK_DGRAM &&
1167 (error = nfs_connect(nmp, NULL)))
1168 goto bad;
1169
1170 /*
1171 * This is silly, but it has to be set so that vinifod() works.
1172 * We do not want to do an nfs_statfs() here since we can get
1173 * stuck on a dead server and we are holding a lock on the mount
1174 * point.
1175 */
1176 mtx_lock(&nmp->nm_mtx);
1177 mp->mnt_stat.f_iosize = nfs_iosize(nmp);
1178 mtx_unlock(&nmp->nm_mtx);
1179 /*
1180 * A reference count is needed on the nfsnode representing the
1181 * remote root. If this object is not persistent, then backward
1182 * traversals of the mount point (i.e. "..") will not work if
1183 * the nfsnode gets flushed out of the cache. Ufs does not have
1184 * this problem, because one can identify root inodes by their
1185 * number == ROOTINO (2).
1186 */
1187 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
1188 if (error)
1189 goto bad;
1190 *vpp = NFSTOV(np);
1191
1192 /*
1193 * Get file attributes and transfer parameters for the
1194 * mountpoint. This has the side effect of filling in
1195 * (*vpp)->v_type with the correct value.
1196 */
1197 if (argp->flags & NFSMNT_NFSV3)
1198 nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread);
1199 else
1200 VOP_GETATTR(*vpp, &attrs, curthread->td_ucred, curthread);
1201
1202 /*
1203 * Lose the lock but keep the ref.
1204 */
1205 VOP_UNLOCK(*vpp, 0, curthread);
1206
1207 return (0);
1208 bad:
1209 nfs_disconnect(nmp);
1210 mtx_destroy(&nmp->nm_mtx);
1211 uma_zfree(nfsmount_zone, nmp);
1212 FREE(nam, M_SONAME);
1213 return (error);
1214 }
1215
1216 /*
1217 * unmount system call
1218 */
1219 static int
1220 nfs_unmount(struct mount *mp, int mntflags, struct thread *td)
1221 {
1222 struct nfsmount *nmp;
1223 int error, flags = 0;
1224
1225 if (mntflags & MNT_FORCE)
1226 flags |= FORCECLOSE;
1227 nmp = VFSTONFS(mp);
1228 /*
1229 * Goes something like this..
1230 * - Call vflush() to clear out vnodes for this filesystem
1231 * - Close the socket
1232 * - Free up the data structures
1233 */
1234 /* In the forced case, cancel any outstanding requests. */
1235 if (flags & FORCECLOSE) {
1236 error = nfs_nmcancelreqs(nmp);
1237 if (error)
1238 goto out;
1239 }
1240 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1241 error = vflush(mp, 1, flags, td);
1242 if (error)
1243 goto out;
1244
1245 /*
1246 * We are now committed to the unmount.
1247 */
1248 nfs_disconnect(nmp);
1249 FREE(nmp->nm_nam, M_SONAME);
1250
1251 mtx_destroy(&nmp->nm_mtx);
1252 uma_zfree(nfsmount_zone, nmp);
1253 out:
1254 return (error);
1255 }
1256
1257 /*
1258 * Return root of a filesystem
1259 */
1260 static int
1261 nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
1262 {
1263 struct vnode *vp;
1264 struct nfsmount *nmp;
1265 struct nfsnode *np;
1266 int error;
1267
1268 nmp = VFSTONFS(mp);
1269 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1270 if (error)
1271 return error;
1272 vp = NFSTOV(np);
1273 /*
1274 * Get transfer parameters and attributes for root vnode once.
1275 */
1276 mtx_lock(&nmp->nm_mtx);
1277 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
1278 (nmp->nm_flag & NFSMNT_NFSV3)) {
1279 mtx_unlock(&nmp->nm_mtx);
1280 nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1281 } else
1282 mtx_unlock(&nmp->nm_mtx);
1283 if (vp->v_type == VNON)
1284 vp->v_type = VDIR;
1285 vp->v_vflag |= VV_ROOT;
1286 *vpp = vp;
1287 return (0);
1288 }
1289
1290 /*
1291 * Flush out the buffer cache
1292 */
1293 /* ARGSUSED */
1294 static int
1295 nfs_sync(struct mount *mp, int waitfor, struct thread *td)
1296 {
1297 struct vnode *vp, *mvp;
1298 int error, allerror = 0;
1299
1300 /*
1301 * Force stale buffer cache information to be flushed.
1302 */
1303 MNT_ILOCK(mp);
1304 loop:
1305 MNT_VNODE_FOREACH(vp, mp, mvp) {
1306 VI_LOCK(vp);
1307 MNT_IUNLOCK(mp);
1308 if (VOP_ISLOCKED(vp, NULL) ||
1309 vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1310 waitfor == MNT_LAZY) {
1311 VI_UNLOCK(vp);
1312 MNT_ILOCK(mp);
1313 continue;
1314 }
1315 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1316 MNT_ILOCK(mp);
1317 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1318 goto loop;
1319 }
1320 error = VOP_FSYNC(vp, waitfor, td);
1321 if (error)
1322 allerror = error;
1323 VOP_UNLOCK(vp, 0, td);
1324 vrele(vp);
1325
1326 MNT_ILOCK(mp);
1327 }
1328 MNT_IUNLOCK(mp);
1329 return (allerror);
1330 }
1331
1332 static int
1333 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1334 {
1335 struct nfsmount *nmp = VFSTONFS(mp);
1336 struct vfsquery vq;
1337 int error;
1338
1339 bzero(&vq, sizeof(vq));
1340 switch (op) {
1341 #if 0
1342 case VFS_CTL_NOLOCKS:
1343 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1344 if (req->oldptr != NULL) {
1345 error = SYSCTL_OUT(req, &val, sizeof(val));
1346 if (error)
1347 return (error);
1348 }
1349 if (req->newptr != NULL) {
1350 error = SYSCTL_IN(req, &val, sizeof(val));
1351 if (error)
1352 return (error);
1353 if (val)
1354 nmp->nm_flag |= NFSMNT_NOLOCKS;
1355 else
1356 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1357 }
1358 break;
1359 #endif
1360 case VFS_CTL_QUERY:
1361 mtx_lock(&nmp->nm_mtx);
1362 if (nmp->nm_state & NFSSTA_TIMEO)
1363 vq.vq_flags |= VQ_NOTRESP;
1364 mtx_unlock(&nmp->nm_mtx);
1365 #if 0
1366 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1367 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1368 vq.vq_flags |= VQ_NOTRESPLOCK;
1369 #endif
1370 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1371 break;
1372 case VFS_CTL_TIMEO:
1373 if (req->oldptr != NULL) {
1374 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1375 sizeof(nmp->nm_tprintf_initial_delay));
1376 if (error)
1377 return (error);
1378 }
1379 if (req->newptr != NULL) {
1380 error = vfs_suser(mp, req->td);
1381 if (error)
1382 return (error);
1383 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1384 sizeof(nmp->nm_tprintf_initial_delay));
1385 if (error)
1386 return (error);
1387 if (nmp->nm_tprintf_initial_delay < 0)
1388 nmp->nm_tprintf_initial_delay = 0;
1389 }
1390 break;
1391 default:
1392 return (ENOTSUP);
1393 }
1394 return (0);
1395 }
Cache object: 6435bfc49e168f312bf7fa8c0ff71bca
|