1 /*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: releng/8.0/sys/nfsclient/nfs_vfsops.c 208586 2010-05-27 03:15:04Z cperciva $");
37
38
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/jail.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/mount.h>
53 #include <sys/proc.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/vnode.h>
59 #include <sys/signalvar.h>
60
61 #include <vm/vm.h>
62 #include <vm/vm_extern.h>
63 #include <vm/uma.h>
64
65 #include <net/if.h>
66 #include <net/route.h>
67 #include <netinet/in.h>
68
69 #include <rpc/rpc.h>
70
71 #include <nfs/nfsproto.h>
72 #include <nfsclient/nfs.h>
73 #include <nfsclient/nfsnode.h>
74 #include <nfsclient/nfsmount.h>
75 #include <nfs/xdr_subs.h>
76 #include <nfsclient/nfsm_subs.h>
77 #include <nfsclient/nfsdiskless.h>
78
79 MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
80 MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
81 MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
82 MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
83 MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
84
85 uma_zone_t nfsmount_zone;
86
87 struct nfsstats nfsstats;
88
89 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
90 SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
91 &nfsstats, nfsstats, "S,nfsstats");
92 static int nfs_ip_paranoia = 1;
93 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
94 &nfs_ip_paranoia, 0,
95 "Disallow accepting replies from IPs which differ from those sent");
96 #ifdef NFS_DEBUG
97 int nfs_debug;
98 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
99 "Toggle debug flag");
100 #endif
101 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
102 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
103 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0,
104 "Delay before printing \"nfs server not responding\" messages");
105 /* how long between console messages "nfs server foo not responding" */
106 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
107 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
108 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0,
109 "Delay between printing \"nfs server not responding\" messages");
110
111 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
112 struct nfs_args *argp, const char *hostname);
113 static int mountnfs(struct nfs_args *, struct mount *,
114 struct sockaddr *, char *, struct vnode **,
115 struct ucred *cred);
116 static vfs_mount_t nfs_mount;
117 static vfs_cmount_t nfs_cmount;
118 static vfs_unmount_t nfs_unmount;
119 static vfs_root_t nfs_root;
120 static vfs_statfs_t nfs_statfs;
121 static vfs_sync_t nfs_sync;
122 static vfs_sysctl_t nfs_sysctl;
123
124 static int fake_wchan;
125
126 /*
127 * nfs vfs operations.
128 */
129 static struct vfsops nfs_vfsops = {
130 .vfs_init = nfs_init,
131 .vfs_mount = nfs_mount,
132 .vfs_cmount = nfs_cmount,
133 .vfs_root = nfs_root,
134 .vfs_statfs = nfs_statfs,
135 .vfs_sync = nfs_sync,
136 .vfs_uninit = nfs_uninit,
137 .vfs_unmount = nfs_unmount,
138 .vfs_sysctl = nfs_sysctl,
139 };
140 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
141
142 /* So that loader and kldload(2) can find us, wherever we are.. */
143 MODULE_VERSION(nfs, 1);
144 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
145 #ifdef KGSSAPI
146 MODULE_DEPEND(nfs, kgssapi, 1, 1, 1);
147 #endif
148
149 static struct nfs_rpcops nfs_rpcops = {
150 nfs_readrpc,
151 nfs_writerpc,
152 nfs_writebp,
153 nfs_readlinkrpc,
154 nfs_invaldir,
155 nfs_commit,
156 };
157
158 /*
159 * This structure must be filled in by a primary bootstrap or bootstrap
160 * server for a diskless/dataless machine. It is initialized below just
161 * to ensure that it is allocated to initialized data (.data not .bss).
162 */
163 struct nfs_diskless nfs_diskless = { { { 0 } } };
164 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
165 int nfs_diskless_valid = 0;
166
167 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
168 &nfs_diskless_valid, 0,
169 "Has the diskless struct been filled correctly");
170
171 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
172 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
173
174 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
175 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
176 "%Ssockaddr_in", "Diskless root nfs address");
177
178
179 void nfsargs_ntoh(struct nfs_args *);
180 static int nfs_mountdiskless(char *,
181 struct sockaddr_in *, struct nfs_args *,
182 struct thread *, struct vnode **, struct mount *);
183 static void nfs_convert_diskless(void);
184 static void nfs_convert_oargs(struct nfs_args *args,
185 struct onfs_args *oargs);
186
187 int
188 nfs_iosize(struct nfsmount *nmp)
189 {
190 int iosize;
191
192 /*
193 * Calculate the size used for io buffers. Use the larger
194 * of the two sizes to minimise nfs requests but make sure
195 * that it is at least one VM page to avoid wasting buffer
196 * space.
197 */
198 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
199 iosize = imax(iosize, PAGE_SIZE);
200 return (iosize);
201 }
202
203 static void
204 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
205 {
206
207 args->version = NFS_ARGSVERSION;
208 args->addr = oargs->addr;
209 args->addrlen = oargs->addrlen;
210 args->sotype = oargs->sotype;
211 args->proto = oargs->proto;
212 args->fh = oargs->fh;
213 args->fhsize = oargs->fhsize;
214 args->flags = oargs->flags;
215 args->wsize = oargs->wsize;
216 args->rsize = oargs->rsize;
217 args->readdirsize = oargs->readdirsize;
218 args->timeo = oargs->timeo;
219 args->retrans = oargs->retrans;
220 args->maxgrouplist = oargs->maxgrouplist;
221 args->readahead = oargs->readahead;
222 args->deadthresh = oargs->deadthresh;
223 args->hostname = oargs->hostname;
224 }
225
226 static void
227 nfs_convert_diskless(void)
228 {
229
230 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
231 sizeof(struct ifaliasreq));
232 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
233 sizeof(struct sockaddr_in));
234 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
235 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
236 nfsv3_diskless.root_fhsize = NFSX_V3FH;
237 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
238 } else {
239 nfsv3_diskless.root_fhsize = NFSX_V2FH;
240 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
241 }
242 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
243 sizeof(struct sockaddr_in));
244 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
245 nfsv3_diskless.root_time = nfs_diskless.root_time;
246 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
247 MAXHOSTNAMELEN);
248 nfs_diskless_valid = 3;
249 }
250
251 /*
252 * nfs statfs call
253 */
254 static int
255 nfs_statfs(struct mount *mp, struct statfs *sbp)
256 {
257 struct vnode *vp;
258 struct thread *td;
259 struct nfs_statfs *sfp;
260 caddr_t bpos, dpos;
261 struct nfsmount *nmp = VFSTONFS(mp);
262 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
263 struct mbuf *mreq, *mrep, *md, *mb;
264 struct nfsnode *np;
265 u_quad_t tquad;
266
267 td = curthread;
268 #ifndef nolint
269 sfp = NULL;
270 #endif
271 error = vfs_busy(mp, MBF_NOWAIT);
272 if (error)
273 return (error);
274 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
275 if (error) {
276 vfs_unbusy(mp);
277 return (error);
278 }
279 vp = NFSTOV(np);
280 mtx_lock(&nmp->nm_mtx);
281 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
282 mtx_unlock(&nmp->nm_mtx);
283 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
284 } else
285 mtx_unlock(&nmp->nm_mtx);
286 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
287 mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
288 mb = mreq;
289 bpos = mtod(mb, caddr_t);
290 nfsm_fhtom(vp, v3);
291 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
292 if (v3)
293 nfsm_postop_attr(vp, retattr);
294 if (error) {
295 if (mrep != NULL)
296 m_freem(mrep);
297 goto nfsmout;
298 }
299 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
300 mtx_lock(&nmp->nm_mtx);
301 sbp->f_iosize = nfs_iosize(nmp);
302 mtx_unlock(&nmp->nm_mtx);
303 if (v3) {
304 sbp->f_bsize = NFS_FABLKSIZE;
305 tquad = fxdr_hyper(&sfp->sf_tbytes);
306 sbp->f_blocks = tquad / NFS_FABLKSIZE;
307 tquad = fxdr_hyper(&sfp->sf_fbytes);
308 sbp->f_bfree = tquad / NFS_FABLKSIZE;
309 tquad = fxdr_hyper(&sfp->sf_abytes);
310 sbp->f_bavail = tquad / NFS_FABLKSIZE;
311 sbp->f_files = (fxdr_unsigned(int32_t,
312 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
313 sbp->f_ffree = (fxdr_unsigned(int32_t,
314 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
315 } else {
316 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
317 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
318 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
319 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
320 sbp->f_files = 0;
321 sbp->f_ffree = 0;
322 }
323 m_freem(mrep);
324 nfsmout:
325 vput(vp);
326 vfs_unbusy(mp);
327 return (error);
328 }
329
330 /*
331 * nfs version 3 fsinfo rpc call
332 */
333 int
334 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
335 struct thread *td)
336 {
337 struct nfsv3_fsinfo *fsp;
338 u_int32_t pref, max;
339 caddr_t bpos, dpos;
340 int error = 0, retattr;
341 struct mbuf *mreq, *mrep, *md, *mb;
342 u_int64_t maxfsize;
343
344 nfsstats.rpccnt[NFSPROC_FSINFO]++;
345 mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
346 mb = mreq;
347 bpos = mtod(mb, caddr_t);
348 nfsm_fhtom(vp, 1);
349 nfsm_request(vp, NFSPROC_FSINFO, td, cred);
350 nfsm_postop_attr(vp, retattr);
351 if (!error) {
352 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
353 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
354 mtx_lock(&nmp->nm_mtx);
355 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
356 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
357 ~(NFS_FABLKSIZE - 1);
358 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
359 if (max < nmp->nm_wsize && max > 0) {
360 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
361 if (nmp->nm_wsize == 0)
362 nmp->nm_wsize = max;
363 }
364 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
365 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
366 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
367 ~(NFS_FABLKSIZE - 1);
368 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
369 if (max < nmp->nm_rsize && max > 0) {
370 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
371 if (nmp->nm_rsize == 0)
372 nmp->nm_rsize = max;
373 }
374 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
375 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
376 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
377 ~(NFS_DIRBLKSIZ - 1);
378 if (max < nmp->nm_readdirsize && max > 0) {
379 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
380 if (nmp->nm_readdirsize == 0)
381 nmp->nm_readdirsize = max;
382 }
383 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
384 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
385 nmp->nm_maxfilesize = maxfsize;
386 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
387 nmp->nm_state |= NFSSTA_GOTFSINFO;
388 mtx_unlock(&nmp->nm_mtx);
389 }
390 m_freem(mrep);
391 nfsmout:
392 return (error);
393 }
394
395 /*
396 * Mount a remote root fs via. nfs. This depends on the info in the
397 * nfs_diskless structure that has been filled in properly by some primary
398 * bootstrap.
399 * It goes something like this:
400 * - do enough of "ifconfig" by calling ifioctl() so that the system
401 * can talk to the server
402 * - If nfs_diskless.mygateway is filled in, use that address as
403 * a default gateway.
404 * - build the rootfs mount point and call mountnfs() to do the rest.
405 *
406 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
407 * structure, as well as other global NFS client variables here, as
408 * nfs_mountroot() will be called once in the boot before any other NFS
409 * client activity occurs.
410 */
411 int
412 nfs_mountroot(struct mount *mp)
413 {
414 struct thread *td = curthread;
415 struct nfsv3_diskless *nd = &nfsv3_diskless;
416 struct socket *so;
417 struct vnode *vp;
418 struct ifreq ir;
419 int error;
420 u_long l;
421 char buf[128];
422 char *cp;
423
424 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
425 bootpc_init(); /* use bootp to get nfs_diskless filled in */
426 #elif defined(NFS_ROOT)
427 nfs_setup_diskless();
428 #endif
429
430 if (nfs_diskless_valid == 0)
431 return (-1);
432 if (nfs_diskless_valid == 1)
433 nfs_convert_diskless();
434
435 /*
436 * XXX splnet, so networks will receive...
437 */
438 splnet();
439
440 /*
441 * Do enough of ifconfig(8) so that the critical net interface can
442 * talk to the server.
443 */
444 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
445 td->td_ucred, td);
446 if (error)
447 panic("nfs_mountroot: socreate(%04x): %d",
448 nd->myif.ifra_addr.sa_family, error);
449
450 #if 0 /* XXX Bad idea */
451 /*
452 * We might not have been told the right interface, so we pass
453 * over the first ten interfaces of the same kind, until we get
454 * one of them configured.
455 */
456
457 for (i = strlen(nd->myif.ifra_name) - 1;
458 nd->myif.ifra_name[i] >= '' &&
459 nd->myif.ifra_name[i] <= '9';
460 nd->myif.ifra_name[i] ++) {
461 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
462 if(!error)
463 break;
464 }
465 #endif
466
467 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
468 if (error)
469 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
470
471 if ((cp = getenv("boot.netif.mtu")) != NULL) {
472 ir.ifr_mtu = strtol(cp, NULL, 10);
473 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
474 freeenv(cp);
475 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
476 if (error)
477 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
478 }
479 soclose(so);
480
481 /*
482 * If the gateway field is filled in, set it as the default route.
483 * Note that pxeboot will set a default route of 0 if the route
484 * is not set by the DHCP server. Check also for a value of 0
485 * to avoid panicking inappropriately in that situation.
486 */
487 if (nd->mygateway.sin_len != 0 &&
488 nd->mygateway.sin_addr.s_addr != 0) {
489 struct sockaddr_in mask, sin;
490
491 bzero((caddr_t)&mask, sizeof(mask));
492 sin = mask;
493 sin.sin_family = AF_INET;
494 sin.sin_len = sizeof(sin);
495 /* XXX MRT use table 0 for this sort of thing */
496 error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
497 (struct sockaddr *)&nd->mygateway,
498 (struct sockaddr *)&mask,
499 RTF_UP | RTF_GATEWAY, NULL);
500 if (error)
501 panic("nfs_mountroot: RTM_ADD: %d", error);
502 }
503
504 /*
505 * Create the rootfs mount point.
506 */
507 nd->root_args.fh = nd->root_fh;
508 nd->root_args.fhsize = nd->root_fhsize;
509 l = ntohl(nd->root_saddr.sin_addr.s_addr);
510 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
511 (l >> 24) & 0xff, (l >> 16) & 0xff,
512 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
513 printf("NFS ROOT: %s\n", buf);
514 nd->root_args.hostname = buf;
515 if ((error = nfs_mountdiskless(buf,
516 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
517 return (error);
518 }
519
520 /*
521 * This is not really an nfs issue, but it is much easier to
522 * set hostname here and then let the "/etc/rc.xxx" files
523 * mount the right /var based upon its preset value.
524 */
525 mtx_lock(&prison0.pr_mtx);
526 strlcpy(prison0.pr_hostname, nd->my_hostnam,
527 sizeof (prison0.pr_hostname));
528 mtx_unlock(&prison0.pr_mtx);
529 inittodr(ntohl(nd->root_time));
530 return (0);
531 }
532
533 /*
534 * Internal version of mount system call for diskless setup.
535 */
536 static int
537 nfs_mountdiskless(char *path,
538 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
539 struct vnode **vpp, struct mount *mp)
540 {
541 struct sockaddr *nam;
542 int error;
543
544 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
545 if ((error = mountnfs(args, mp, nam, path, vpp,
546 td->td_ucred)) != 0) {
547 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
548 return (error);
549 }
550 return (0);
551 }
552
553 static int
554 nfs_sec_name_to_num(char *sec)
555 {
556 if (!strcmp(sec, "krb5"))
557 return (RPCSEC_GSS_KRB5);
558 if (!strcmp(sec, "krb5i"))
559 return (RPCSEC_GSS_KRB5I);
560 if (!strcmp(sec, "krb5p"))
561 return (RPCSEC_GSS_KRB5P);
562 if (!strcmp(sec, "sys"))
563 return (AUTH_SYS);
564 /*
565 * Userland should validate the string but we will try and
566 * cope with unexpected values.
567 */
568 return (AUTH_SYS);
569 }
570
571 static void
572 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
573 const char *hostname)
574 {
575 int s;
576 int adjsock;
577 int maxio;
578 char *p;
579 char *secname;
580 char *principal;
581
582 s = splnet();
583
584 /*
585 * Set read-only flag if requested; otherwise, clear it if this is
586 * an update. If this is not an update, then either the read-only
587 * flag is already clear, or this is a root mount and it was set
588 * intentionally at some previous point.
589 */
590 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
591 MNT_ILOCK(mp);
592 mp->mnt_flag |= MNT_RDONLY;
593 MNT_IUNLOCK(mp);
594 } else if (mp->mnt_flag & MNT_UPDATE) {
595 MNT_ILOCK(mp);
596 mp->mnt_flag &= ~MNT_RDONLY;
597 MNT_IUNLOCK(mp);
598 }
599
600 /*
601 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
602 * no sense in that context. Also, set up appropriate retransmit
603 * and soft timeout behavior.
604 */
605 if (argp->sotype == SOCK_STREAM) {
606 nmp->nm_flag &= ~NFSMNT_NOCONN;
607 nmp->nm_flag |= NFSMNT_DUMBTIMR;
608 nmp->nm_timeo = NFS_MAXTIMEO;
609 nmp->nm_retry = NFS_RETRANS_TCP;
610 }
611
612 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
613 if ((argp->flags & NFSMNT_NFSV3) == 0)
614 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
615
616 /* Re-bind if rsrvd port requested and wasn't on one */
617 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
618 && (argp->flags & NFSMNT_RESVPORT);
619 /* Also re-bind if we're switching to/from a connected UDP socket */
620 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
621 (argp->flags & NFSMNT_NOCONN));
622
623 /* Update flags atomically. Don't change the lock bits. */
624 nmp->nm_flag = argp->flags | nmp->nm_flag;
625 splx(s);
626
627 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
628 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
629 if (nmp->nm_timeo < NFS_MINTIMEO)
630 nmp->nm_timeo = NFS_MINTIMEO;
631 else if (nmp->nm_timeo > NFS_MAXTIMEO)
632 nmp->nm_timeo = NFS_MAXTIMEO;
633 }
634
635 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
636 nmp->nm_retry = argp->retrans;
637 if (nmp->nm_retry > NFS_MAXREXMIT)
638 nmp->nm_retry = NFS_MAXREXMIT;
639 }
640
641 if (argp->flags & NFSMNT_NFSV3) {
642 if (argp->sotype == SOCK_DGRAM)
643 maxio = NFS_MAXDGRAMDATA;
644 else
645 maxio = NFS_MAXDATA;
646 } else
647 maxio = NFS_V2MAXDATA;
648
649 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
650 nmp->nm_wsize = argp->wsize;
651 /* Round down to multiple of blocksize */
652 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
653 if (nmp->nm_wsize <= 0)
654 nmp->nm_wsize = NFS_FABLKSIZE;
655 }
656 if (nmp->nm_wsize > maxio)
657 nmp->nm_wsize = maxio;
658 if (nmp->nm_wsize > MAXBSIZE)
659 nmp->nm_wsize = MAXBSIZE;
660
661 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
662 nmp->nm_rsize = argp->rsize;
663 /* Round down to multiple of blocksize */
664 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
665 if (nmp->nm_rsize <= 0)
666 nmp->nm_rsize = NFS_FABLKSIZE;
667 }
668 if (nmp->nm_rsize > maxio)
669 nmp->nm_rsize = maxio;
670 if (nmp->nm_rsize > MAXBSIZE)
671 nmp->nm_rsize = MAXBSIZE;
672
673 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
674 nmp->nm_readdirsize = argp->readdirsize;
675 }
676 if (nmp->nm_readdirsize > maxio)
677 nmp->nm_readdirsize = maxio;
678 if (nmp->nm_readdirsize > nmp->nm_rsize)
679 nmp->nm_readdirsize = nmp->nm_rsize;
680
681 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
682 nmp->nm_acregmin = argp->acregmin;
683 else
684 nmp->nm_acregmin = NFS_MINATTRTIMO;
685 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
686 nmp->nm_acregmax = argp->acregmax;
687 else
688 nmp->nm_acregmax = NFS_MAXATTRTIMO;
689 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
690 nmp->nm_acdirmin = argp->acdirmin;
691 else
692 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
693 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
694 nmp->nm_acdirmax = argp->acdirmax;
695 else
696 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
697 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
698 nmp->nm_acdirmin = nmp->nm_acdirmax;
699 if (nmp->nm_acregmin > nmp->nm_acregmax)
700 nmp->nm_acregmin = nmp->nm_acregmax;
701
702 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
703 if (argp->maxgrouplist <= NFS_MAXGRPS)
704 nmp->nm_numgrps = argp->maxgrouplist;
705 else
706 nmp->nm_numgrps = NFS_MAXGRPS;
707 }
708 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
709 if (argp->readahead <= NFS_MAXRAHEAD)
710 nmp->nm_readahead = argp->readahead;
711 else
712 nmp->nm_readahead = NFS_MAXRAHEAD;
713 }
714 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
715 if (argp->wcommitsize < nmp->nm_wsize)
716 nmp->nm_wcommitsize = nmp->nm_wsize;
717 else
718 nmp->nm_wcommitsize = argp->wcommitsize;
719 }
720 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
721 if (argp->deadthresh <= NFS_MAXDEADTHRESH)
722 nmp->nm_deadthresh = argp->deadthresh;
723 else
724 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
725 }
726
727 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
728 (nmp->nm_soproto != argp->proto));
729 nmp->nm_sotype = argp->sotype;
730 nmp->nm_soproto = argp->proto;
731
732 if (nmp->nm_client && adjsock) {
733 nfs_safedisconnect(nmp);
734 if (nmp->nm_sotype == SOCK_DGRAM)
735 while (nfs_connect(nmp)) {
736 printf("nfs_args: retrying connect\n");
737 (void) tsleep(&fake_wchan, PSOCK, "nfscon", hz);
738 }
739 }
740
741 if (hostname) {
742 strlcpy(nmp->nm_hostname, hostname,
743 sizeof(nmp->nm_hostname));
744 p = strchr(nmp->nm_hostname, ':');
745 if (p)
746 *p = '\0';
747 }
748
749 if (vfs_getopt(mp->mnt_optnew, "sec",
750 (void **) &secname, NULL) == 0) {
751 nmp->nm_secflavor = nfs_sec_name_to_num(secname);
752 } else {
753 nmp->nm_secflavor = AUTH_SYS;
754 }
755
756 if (vfs_getopt(mp->mnt_optnew, "principal",
757 (void **) &principal, NULL) == 0) {
758 strlcpy(nmp->nm_principal, principal,
759 sizeof(nmp->nm_principal));
760 } else {
761 snprintf(nmp->nm_principal, sizeof(nmp->nm_principal),
762 "nfs@%s", nmp->nm_hostname);
763 }
764 }
765
766 static const char *nfs_opts[] = { "from", "nfs_args",
767 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
768 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
769 "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
770 "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
771 "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax",
772 "deadthresh", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
773 "maxgroups", "principal",
774 NULL };
775
776 /*
777 * VFS Operations.
778 *
779 * mount system call
780 * It seems a bit dumb to copyinstr() the host and path here and then
781 * bcopy() them in mountnfs(), but I wanted to detect errors before
782 * doing the sockargs() call because sockargs() allocates an mbuf and
783 * an error after that means that I have to release the mbuf.
784 */
785 /* ARGSUSED */
786 static int
787 nfs_mount(struct mount *mp)
788 {
789 struct nfs_args args = {
790 .version = NFS_ARGSVERSION,
791 .addr = NULL,
792 .addrlen = sizeof (struct sockaddr_in),
793 .sotype = SOCK_STREAM,
794 .proto = 0,
795 .fh = NULL,
796 .fhsize = 0,
797 .flags = NFSMNT_RESVPORT,
798 .wsize = NFS_WSIZE,
799 .rsize = NFS_RSIZE,
800 .readdirsize = NFS_READDIRSIZE,
801 .timeo = 10,
802 .retrans = NFS_RETRANS,
803 .maxgrouplist = NFS_MAXGRPS,
804 .readahead = NFS_DEFRAHEAD,
805 .wcommitsize = 0, /* was: NQ_DEFLEASE */
806 .deadthresh = NFS_MAXDEADTHRESH, /* was: NQ_DEADTHRESH */
807 .hostname = NULL,
808 /* args version 4 */
809 .acregmin = NFS_MINATTRTIMO,
810 .acregmax = NFS_MAXATTRTIMO,
811 .acdirmin = NFS_MINDIRATTRTIMO,
812 .acdirmax = NFS_MAXDIRATTRTIMO,
813 };
814 int error, ret, has_nfs_args_opt;
815 int has_addr_opt, has_fh_opt, has_hostname_opt;
816 struct sockaddr *nam;
817 struct vnode *vp;
818 char hst[MNAMELEN];
819 size_t len;
820 u_char nfh[NFSX_V3FHMAX];
821 char *opt;
822
823 has_nfs_args_opt = 0;
824 has_addr_opt = 0;
825 has_fh_opt = 0;
826 has_hostname_opt = 0;
827
828 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
829 error = EINVAL;
830 goto out;
831 }
832
833 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
834 error = nfs_mountroot(mp);
835 goto out;
836 }
837
838 /*
839 * The old mount_nfs program passed the struct nfs_args
840 * from userspace to kernel. The new mount_nfs program
841 * passes string options via nmount() from userspace to kernel
842 * and we populate the struct nfs_args in the kernel.
843 */
844 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
845 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
846 sizeof args);
847 if (error)
848 goto out;
849
850 if (args.version != NFS_ARGSVERSION) {
851 error = EPROGMISMATCH;
852 goto out;
853 }
854 has_nfs_args_opt = 1;
855 }
856
857 if (vfs_getopt(mp->mnt_optnew, "dumbtimer", NULL, NULL) == 0)
858 args.flags |= NFSMNT_DUMBTIMR;
859 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
860 args.flags |= NFSMNT_NOCONN;
861 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
862 args.flags |= NFSMNT_NOCONN;
863 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
864 args.flags |= NFSMNT_NOLOCKD;
865 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
866 args.flags &= ~NFSMNT_NOLOCKD;
867 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
868 args.flags |= NFSMNT_INT;
869 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
870 args.flags |= NFSMNT_RDIRPLUS;
871 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
872 args.flags |= NFSMNT_RESVPORT;
873 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
874 args.flags &= ~NFSMNT_RESVPORT;
875 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
876 args.flags |= NFSMNT_SOFT;
877 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
878 args.flags &= ~NFSMNT_SOFT;
879 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
880 args.sotype = SOCK_DGRAM;
881 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
882 args.sotype = SOCK_DGRAM;
883 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
884 args.sotype = SOCK_STREAM;
885 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
886 args.flags |= NFSMNT_NFSV3;
887 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
888 if (opt == NULL) {
889 vfs_mount_error(mp, "illegal readdirsize");
890 error = EINVAL;
891 goto out;
892 }
893 ret = sscanf(opt, "%d", &args.readdirsize);
894 if (ret != 1 || args.readdirsize <= 0) {
895 vfs_mount_error(mp, "illegal readdirsize: %s",
896 opt);
897 error = EINVAL;
898 goto out;
899 }
900 args.flags |= NFSMNT_READDIRSIZE;
901 }
902 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
903 if (opt == NULL) {
904 vfs_mount_error(mp, "illegal readahead");
905 error = EINVAL;
906 goto out;
907 }
908 ret = sscanf(opt, "%d", &args.readahead);
909 if (ret != 1 || args.readahead <= 0) {
910 vfs_mount_error(mp, "illegal readahead: %s",
911 opt);
912 error = EINVAL;
913 goto out;
914 }
915 args.flags |= NFSMNT_READAHEAD;
916 }
917 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
918 if (opt == NULL) {
919 vfs_mount_error(mp, "illegal wsize");
920 error = EINVAL;
921 goto out;
922 }
923 ret = sscanf(opt, "%d", &args.wsize);
924 if (ret != 1 || args.wsize <= 0) {
925 vfs_mount_error(mp, "illegal wsize: %s",
926 opt);
927 error = EINVAL;
928 goto out;
929 }
930 args.flags |= NFSMNT_WSIZE;
931 }
932 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
933 if (opt == NULL) {
934 vfs_mount_error(mp, "illegal rsize");
935 error = EINVAL;
936 goto out;
937 }
938 ret = sscanf(opt, "%d", &args.rsize);
939 if (ret != 1 || args.rsize <= 0) {
940 vfs_mount_error(mp, "illegal wsize: %s",
941 opt);
942 error = EINVAL;
943 goto out;
944 }
945 args.flags |= NFSMNT_RSIZE;
946 }
947 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
948 if (opt == NULL) {
949 vfs_mount_error(mp, "illegal retrans");
950 error = EINVAL;
951 goto out;
952 }
953 ret = sscanf(opt, "%d", &args.retrans);
954 if (ret != 1 || args.retrans <= 0) {
955 vfs_mount_error(mp, "illegal retrans: %s",
956 opt);
957 error = EINVAL;
958 goto out;
959 }
960 args.flags |= NFSMNT_RETRANS;
961 }
962 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
963 ret = sscanf(opt, "%d", &args.acregmin);
964 if (ret != 1 || args.acregmin < 0) {
965 vfs_mount_error(mp, "illegal acregmin: %s",
966 opt);
967 error = EINVAL;
968 goto out;
969 }
970 args.flags |= NFSMNT_ACREGMIN;
971 }
972 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
973 ret = sscanf(opt, "%d", &args.acregmax);
974 if (ret != 1 || args.acregmax < 0) {
975 vfs_mount_error(mp, "illegal acregmax: %s",
976 opt);
977 error = EINVAL;
978 goto out;
979 }
980 args.flags |= NFSMNT_ACREGMAX;
981 }
982 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
983 ret = sscanf(opt, "%d", &args.acdirmin);
984 if (ret != 1 || args.acdirmin < 0) {
985 vfs_mount_error(mp, "illegal acdirmin: %s",
986 opt);
987 error = EINVAL;
988 goto out;
989 }
990 args.flags |= NFSMNT_ACDIRMIN;
991 }
992 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
993 ret = sscanf(opt, "%d", &args.acdirmax);
994 if (ret != 1 || args.acdirmax < 0) {
995 vfs_mount_error(mp, "illegal acdirmax: %s",
996 opt);
997 error = EINVAL;
998 goto out;
999 }
1000 args.flags |= NFSMNT_ACDIRMAX;
1001 }
1002 if (vfs_getopt(mp->mnt_optnew, "deadthresh", (void **)&opt, NULL) == 0) {
1003 ret = sscanf(opt, "%d", &args.deadthresh);
1004 if (ret != 1 || args.deadthresh <= 0) {
1005 vfs_mount_error(mp, "illegal deadthresh: %s",
1006 opt);
1007 error = EINVAL;
1008 goto out;
1009 }
1010 args.flags |= NFSMNT_DEADTHRESH;
1011 }
1012 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1013 ret = sscanf(opt, "%d", &args.timeo);
1014 if (ret != 1 || args.timeo <= 0) {
1015 vfs_mount_error(mp, "illegal timeout: %s",
1016 opt);
1017 error = EINVAL;
1018 goto out;
1019 }
1020 args.flags |= NFSMNT_TIMEO;
1021 }
1022 if (vfs_getopt(mp->mnt_optnew, "maxgroups", (void **)&opt, NULL) == 0) {
1023 ret = sscanf(opt, "%d", &args.maxgrouplist);
1024 if (ret != 1 || args.timeo <= 0) {
1025 vfs_mount_error(mp, "illegal maxgroups: %s",
1026 opt);
1027 error = EINVAL;
1028 goto out;
1029 }
1030 args.flags |= NFSMNT_MAXGRPS;
1031 }
1032 if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1033 &args.addrlen) == 0) {
1034 has_addr_opt = 1;
1035 if (args.addrlen > SOCK_MAXADDRLEN) {
1036 error = ENAMETOOLONG;
1037 goto out;
1038 }
1039 nam = malloc(args.addrlen, M_SONAME,
1040 M_WAITOK);
1041 bcopy(args.addr, nam, args.addrlen);
1042 nam->sa_len = args.addrlen;
1043 }
1044 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1045 &args.fhsize) == 0) {
1046 has_fh_opt = 1;
1047 }
1048 if (vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
1049 NULL) == 0) {
1050 has_hostname_opt = 1;
1051 }
1052 if (args.hostname == NULL) {
1053 vfs_mount_error(mp, "Invalid hostname");
1054 error = EINVAL;
1055 goto out;
1056 }
1057 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1058 vfs_mount_error(mp, "Bad file handle");
1059 error = EINVAL;
1060 goto out;
1061 }
1062
1063 if (mp->mnt_flag & MNT_UPDATE) {
1064 struct nfsmount *nmp = VFSTONFS(mp);
1065
1066 if (nmp == NULL) {
1067 error = EIO;
1068 goto out;
1069 }
1070 /*
1071 * When doing an update, we can't change from or to
1072 * v3, switch lockd strategies or change cookie translation
1073 */
1074 args.flags = (args.flags &
1075 ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1076 (nmp->nm_flag &
1077 (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1078 nfs_decode_args(mp, nmp, &args, NULL);
1079 goto out;
1080 }
1081
1082 /*
1083 * Make the nfs_ip_paranoia sysctl serve as the default connection
1084 * or no-connection mode for those protocols that support
1085 * no-connection mode (the flag will be cleared later for protocols
1086 * that do not support no-connection mode). This will allow a client
1087 * to receive replies from a different IP then the request was
1088 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1089 * not 0.
1090 */
1091 if (nfs_ip_paranoia == 0)
1092 args.flags |= NFSMNT_NOCONN;
1093
1094 if (has_nfs_args_opt) {
1095 /*
1096 * In the 'nfs_args' case, the pointers in the args
1097 * structure are in userland - we copy them in here.
1098 */
1099 if (!has_fh_opt) {
1100 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1101 args.fhsize);
1102 if (error) {
1103 goto out;
1104 }
1105 args.fh = nfh;
1106 }
1107 if (!has_hostname_opt) {
1108 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
1109 if (error) {
1110 goto out;
1111 }
1112 bzero(&hst[len], MNAMELEN - len);
1113 args.hostname = hst;
1114 }
1115 if (!has_addr_opt) {
1116 /* sockargs() call must be after above copyin() calls */
1117 error = getsockaddr(&nam, (caddr_t)args.addr,
1118 args.addrlen);
1119 if (error) {
1120 goto out;
1121 }
1122 }
1123 }
1124 error = mountnfs(&args, mp, nam, args.hostname, &vp,
1125 curthread->td_ucred);
1126 out:
1127 if (!error) {
1128 MNT_ILOCK(mp);
1129 mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1130 MNT_IUNLOCK(mp);
1131 }
1132 return (error);
1133 }
1134
1135
1136 /*
1137 * VFS Operations.
1138 *
1139 * mount system call
1140 * It seems a bit dumb to copyinstr() the host and path here and then
1141 * bcopy() them in mountnfs(), but I wanted to detect errors before
1142 * doing the sockargs() call because sockargs() allocates an mbuf and
1143 * an error after that means that I have to release the mbuf.
1144 */
1145 /* ARGSUSED */
1146 static int
1147 nfs_cmount(struct mntarg *ma, void *data, int flags)
1148 {
1149 int error;
1150 struct nfs_args args;
1151
1152 error = copyin(data, &args, sizeof (struct nfs_args));
1153 if (error)
1154 return error;
1155
1156 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1157
1158 error = kernel_mount(ma, flags);
1159 return (error);
1160 }
1161
1162 /*
1163 * Common code for mount and mountroot
1164 */
1165 static int
1166 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1167 char *hst, struct vnode **vpp, struct ucred *cred)
1168 {
1169 struct nfsmount *nmp;
1170 struct nfsnode *np;
1171 int error;
1172 struct vattr attrs;
1173
1174 if (mp->mnt_flag & MNT_UPDATE) {
1175 nmp = VFSTONFS(mp);
1176 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1177 free(nam, M_SONAME);
1178 return (0);
1179 } else {
1180 nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
1181 bzero((caddr_t)nmp, sizeof (struct nfsmount));
1182 TAILQ_INIT(&nmp->nm_bufq);
1183 mp->mnt_data = nmp;
1184 }
1185 vfs_getnewfsid(mp);
1186 nmp->nm_mountp = mp;
1187 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
1188
1189 /*
1190 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1191 * high, depending on whether we end up with negative offsets in
1192 * the client or server somewhere. 2GB-1 may be safer.
1193 *
1194 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
1195 * that we can handle until we find out otherwise.
1196 * XXX Our "safe" limit on the client is what we can store in our
1197 * buffer cache using signed(!) block numbers.
1198 */
1199 if ((argp->flags & NFSMNT_NFSV3) == 0)
1200 nmp->nm_maxfilesize = 0xffffffffLL;
1201 else
1202 nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1203
1204 nmp->nm_timeo = NFS_TIMEO;
1205 nmp->nm_retry = NFS_RETRANS;
1206 if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) {
1207 nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA;
1208 } else {
1209 nmp->nm_wsize = NFS_WSIZE;
1210 nmp->nm_rsize = NFS_RSIZE;
1211 }
1212 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1213 nmp->nm_readdirsize = NFS_READDIRSIZE;
1214 nmp->nm_numgrps = NFS_MAXGRPS;
1215 nmp->nm_readahead = NFS_DEFRAHEAD;
1216 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
1217 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1218 if (nmp->nm_tprintf_delay < 0)
1219 nmp->nm_tprintf_delay = 0;
1220 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1221 if (nmp->nm_tprintf_initial_delay < 0)
1222 nmp->nm_tprintf_initial_delay = 0;
1223 nmp->nm_fhsize = argp->fhsize;
1224 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1225 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1226 nmp->nm_nam = nam;
1227 /* Set up the sockets and per-host congestion */
1228 nmp->nm_sotype = argp->sotype;
1229 nmp->nm_soproto = argp->proto;
1230 nmp->nm_rpcops = &nfs_rpcops;
1231
1232 nfs_decode_args(mp, nmp, argp, hst);
1233
1234 /*
1235 * For Connection based sockets (TCP,...) defer the connect until
1236 * the first request, in case the server is not responding.
1237 */
1238 if (nmp->nm_sotype == SOCK_DGRAM &&
1239 (error = nfs_connect(nmp)))
1240 goto bad;
1241
1242 /*
1243 * This is silly, but it has to be set so that vinifod() works.
1244 * We do not want to do an nfs_statfs() here since we can get
1245 * stuck on a dead server and we are holding a lock on the mount
1246 * point.
1247 */
1248 mtx_lock(&nmp->nm_mtx);
1249 mp->mnt_stat.f_iosize = nfs_iosize(nmp);
1250 mtx_unlock(&nmp->nm_mtx);
1251 /*
1252 * A reference count is needed on the nfsnode representing the
1253 * remote root. If this object is not persistent, then backward
1254 * traversals of the mount point (i.e. "..") will not work if
1255 * the nfsnode gets flushed out of the cache. Ufs does not have
1256 * this problem, because one can identify root inodes by their
1257 * number == ROOTINO (2).
1258 */
1259 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
1260 if (error)
1261 goto bad;
1262 *vpp = NFSTOV(np);
1263
1264 /*
1265 * Get file attributes and transfer parameters for the
1266 * mountpoint. This has the side effect of filling in
1267 * (*vpp)->v_type with the correct value.
1268 */
1269 if (argp->flags & NFSMNT_NFSV3)
1270 nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread);
1271 else
1272 VOP_GETATTR(*vpp, &attrs, curthread->td_ucred);
1273
1274 /*
1275 * Lose the lock but keep the ref.
1276 */
1277 VOP_UNLOCK(*vpp, 0);
1278
1279 return (0);
1280 bad:
1281 nfs_disconnect(nmp);
1282 mtx_destroy(&nmp->nm_mtx);
1283 uma_zfree(nfsmount_zone, nmp);
1284 free(nam, M_SONAME);
1285 return (error);
1286 }
1287
1288 /*
1289 * unmount system call
1290 */
1291 static int
1292 nfs_unmount(struct mount *mp, int mntflags)
1293 {
1294 struct nfsmount *nmp;
1295 int error, flags = 0;
1296
1297 if (mntflags & MNT_FORCE)
1298 flags |= FORCECLOSE;
1299 nmp = VFSTONFS(mp);
1300 /*
1301 * Goes something like this..
1302 * - Call vflush() to clear out vnodes for this filesystem
1303 * - Close the socket
1304 * - Free up the data structures
1305 */
1306 /* In the forced case, cancel any outstanding requests. */
1307 if (flags & FORCECLOSE) {
1308 error = nfs_nmcancelreqs(nmp);
1309 if (error)
1310 goto out;
1311 }
1312 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1313 error = vflush(mp, 1, flags, curthread);
1314 if (error)
1315 goto out;
1316
1317 /*
1318 * We are now committed to the unmount.
1319 */
1320 nfs_disconnect(nmp);
1321 free(nmp->nm_nam, M_SONAME);
1322
1323 mtx_destroy(&nmp->nm_mtx);
1324 uma_zfree(nfsmount_zone, nmp);
1325 out:
1326 return (error);
1327 }
1328
1329 /*
1330 * Return root of a filesystem
1331 */
1332 static int
1333 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1334 {
1335 struct vnode *vp;
1336 struct nfsmount *nmp;
1337 struct nfsnode *np;
1338 int error;
1339
1340 nmp = VFSTONFS(mp);
1341 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1342 if (error)
1343 return error;
1344 vp = NFSTOV(np);
1345 /*
1346 * Get transfer parameters and attributes for root vnode once.
1347 */
1348 mtx_lock(&nmp->nm_mtx);
1349 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
1350 (nmp->nm_flag & NFSMNT_NFSV3)) {
1351 mtx_unlock(&nmp->nm_mtx);
1352 nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1353 } else
1354 mtx_unlock(&nmp->nm_mtx);
1355 if (vp->v_type == VNON)
1356 vp->v_type = VDIR;
1357 vp->v_vflag |= VV_ROOT;
1358 *vpp = vp;
1359 return (0);
1360 }
1361
1362 /*
1363 * Flush out the buffer cache
1364 */
1365 /* ARGSUSED */
1366 static int
1367 nfs_sync(struct mount *mp, int waitfor)
1368 {
1369 struct vnode *vp, *mvp;
1370 struct thread *td;
1371 int error, allerror = 0;
1372
1373 td = curthread;
1374
1375 /*
1376 * Force stale buffer cache information to be flushed.
1377 */
1378 MNT_ILOCK(mp);
1379 loop:
1380 MNT_VNODE_FOREACH(vp, mp, mvp) {
1381 VI_LOCK(vp);
1382 MNT_IUNLOCK(mp);
1383 /* XXX Racy bv_cnt check. */
1384 if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1385 waitfor == MNT_LAZY) {
1386 VI_UNLOCK(vp);
1387 MNT_ILOCK(mp);
1388 continue;
1389 }
1390 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1391 MNT_ILOCK(mp);
1392 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1393 goto loop;
1394 }
1395 error = VOP_FSYNC(vp, waitfor, td);
1396 if (error)
1397 allerror = error;
1398 VOP_UNLOCK(vp, 0);
1399 vrele(vp);
1400
1401 MNT_ILOCK(mp);
1402 }
1403 MNT_IUNLOCK(mp);
1404 return (allerror);
1405 }
1406
1407 static int
1408 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1409 {
1410 struct nfsmount *nmp = VFSTONFS(mp);
1411 struct vfsquery vq;
1412 int error;
1413
1414 bzero(&vq, sizeof(vq));
1415 switch (op) {
1416 #if 0
1417 case VFS_CTL_NOLOCKS:
1418 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1419 if (req->oldptr != NULL) {
1420 error = SYSCTL_OUT(req, &val, sizeof(val));
1421 if (error)
1422 return (error);
1423 }
1424 if (req->newptr != NULL) {
1425 error = SYSCTL_IN(req, &val, sizeof(val));
1426 if (error)
1427 return (error);
1428 if (val)
1429 nmp->nm_flag |= NFSMNT_NOLOCKS;
1430 else
1431 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1432 }
1433 break;
1434 #endif
1435 case VFS_CTL_QUERY:
1436 mtx_lock(&nmp->nm_mtx);
1437 if (nmp->nm_state & NFSSTA_TIMEO)
1438 vq.vq_flags |= VQ_NOTRESP;
1439 mtx_unlock(&nmp->nm_mtx);
1440 #if 0
1441 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1442 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1443 vq.vq_flags |= VQ_NOTRESPLOCK;
1444 #endif
1445 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1446 break;
1447 case VFS_CTL_TIMEO:
1448 if (req->oldptr != NULL) {
1449 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1450 sizeof(nmp->nm_tprintf_initial_delay));
1451 if (error)
1452 return (error);
1453 }
1454 if (req->newptr != NULL) {
1455 error = vfs_suser(mp, req->td);
1456 if (error)
1457 return (error);
1458 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1459 sizeof(nmp->nm_tprintf_initial_delay));
1460 if (error)
1461 return (error);
1462 if (nmp->nm_tprintf_initial_delay < 0)
1463 nmp->nm_tprintf_initial_delay = 0;
1464 }
1465 break;
1466 default:
1467 return (ENOTSUP);
1468 }
1469 return (0);
1470 }
Cache object: c5856fe8f842bc05514907f22377f5ef
|