1 /*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/jail.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/mount.h>
53 #include <sys/proc.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/vnode.h>
59 #include <sys/signalvar.h>
60
61 #include <vm/vm.h>
62 #include <vm/vm_extern.h>
63 #include <vm/uma.h>
64
65 #include <net/if.h>
66 #include <net/route.h>
67 #include <netinet/in.h>
68
69 #include <rpc/rpc.h>
70
71 #include <nfs/nfsproto.h>
72 #include <nfsclient/nfs.h>
73 #include <nfsclient/nfsnode.h>
74 #include <nfsclient/nfsmount.h>
75 #include <nfs/xdr_subs.h>
76 #include <nfsclient/nfsm_subs.h>
77 #include <nfsclient/nfsdiskless.h>
78
79 MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
80 MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
81 MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
82 MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
83 MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
84
85 uma_zone_t nfsmount_zone;
86
87 struct nfsstats nfsstats;
88
89 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
90 SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
91 &nfsstats, nfsstats, "S,nfsstats");
92 static int nfs_ip_paranoia = 1;
93 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
94 &nfs_ip_paranoia, 0,
95 "Disallow accepting replies from IPs which differ from those sent");
96 #ifdef NFS_DEBUG
97 int nfs_debug;
98 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
99 "Toggle debug flag");
100 #endif
101 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
102 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
103 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0,
104 "Delay before printing \"nfs server not responding\" messages");
105 /* how long between console messages "nfs server foo not responding" */
106 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
107 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
108 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0,
109 "Delay between printing \"nfs server not responding\" messages");
110
111 static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
112 struct nfs_args *argp, const char *hostname);
113 static int mountnfs(struct nfs_args *, struct mount *,
114 struct sockaddr *, char *, struct vnode **,
115 struct ucred *cred);
116 static vfs_mount_t nfs_mount;
117 static vfs_cmount_t nfs_cmount;
118 static vfs_unmount_t nfs_unmount;
119 static vfs_root_t nfs_root;
120 static vfs_statfs_t nfs_statfs;
121 static vfs_sync_t nfs_sync;
122 static vfs_sysctl_t nfs_sysctl;
123
124 static int fake_wchan;
125
126 /*
127 * nfs vfs operations.
128 */
129 static struct vfsops nfs_vfsops = {
130 .vfs_init = nfs_init,
131 .vfs_mount = nfs_mount,
132 .vfs_cmount = nfs_cmount,
133 .vfs_root = nfs_root,
134 .vfs_statfs = nfs_statfs,
135 .vfs_sync = nfs_sync,
136 .vfs_uninit = nfs_uninit,
137 .vfs_unmount = nfs_unmount,
138 .vfs_sysctl = nfs_sysctl,
139 };
140 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
141
142 /* So that loader and kldload(2) can find us, wherever we are.. */
143 MODULE_VERSION(nfs, 1);
144 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
145 #ifdef KGSSAPI
146 MODULE_DEPEND(nfs, kgssapi, 1, 1, 1);
147 #endif
148
149 static struct nfs_rpcops nfs_rpcops = {
150 nfs_readrpc,
151 nfs_writerpc,
152 nfs_writebp,
153 nfs_readlinkrpc,
154 nfs_invaldir,
155 nfs_commit,
156 };
157
158 /*
159 * This structure must be filled in by a primary bootstrap or bootstrap
160 * server for a diskless/dataless machine. It is initialized below just
161 * to ensure that it is allocated to initialized data (.data not .bss).
162 */
163 struct nfs_diskless nfs_diskless = { { { 0 } } };
164 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
165 int nfs_diskless_valid = 0;
166
167 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
168 &nfs_diskless_valid, 0,
169 "Has the diskless struct been filled correctly");
170
171 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
172 nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
173
174 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
175 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
176 "%Ssockaddr_in", "Diskless root nfs address");
177
178
179 void nfsargs_ntoh(struct nfs_args *);
180 static int nfs_mountdiskless(char *,
181 struct sockaddr_in *, struct nfs_args *,
182 struct thread *, struct vnode **, struct mount *);
183 static void nfs_convert_diskless(void);
184 static void nfs_convert_oargs(struct nfs_args *args,
185 struct onfs_args *oargs);
186
187 int
188 nfs_iosize(struct nfsmount *nmp)
189 {
190 int iosize;
191
192 /*
193 * Calculate the size used for io buffers. Use the larger
194 * of the two sizes to minimise nfs requests but make sure
195 * that it is at least one VM page to avoid wasting buffer
196 * space.
197 */
198 iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
199 iosize = imax(iosize, PAGE_SIZE);
200 return (iosize);
201 }
202
203 static void
204 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
205 {
206
207 args->version = NFS_ARGSVERSION;
208 args->addr = oargs->addr;
209 args->addrlen = oargs->addrlen;
210 args->sotype = oargs->sotype;
211 args->proto = oargs->proto;
212 args->fh = oargs->fh;
213 args->fhsize = oargs->fhsize;
214 args->flags = oargs->flags;
215 args->wsize = oargs->wsize;
216 args->rsize = oargs->rsize;
217 args->readdirsize = oargs->readdirsize;
218 args->timeo = oargs->timeo;
219 args->retrans = oargs->retrans;
220 args->maxgrouplist = oargs->maxgrouplist;
221 args->readahead = oargs->readahead;
222 args->deadthresh = oargs->deadthresh;
223 args->hostname = oargs->hostname;
224 }
225
226 static void
227 nfs_convert_diskless(void)
228 {
229
230 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
231 sizeof(struct ifaliasreq));
232 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
233 sizeof(struct sockaddr_in));
234 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
235 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
236 nfsv3_diskless.root_fhsize = NFSX_V3FH;
237 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
238 } else {
239 nfsv3_diskless.root_fhsize = NFSX_V2FH;
240 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
241 }
242 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
243 sizeof(struct sockaddr_in));
244 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
245 nfsv3_diskless.root_time = nfs_diskless.root_time;
246 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
247 MAXHOSTNAMELEN);
248 nfs_diskless_valid = 3;
249 }
250
251 /*
252 * nfs statfs call
253 */
254 static int
255 nfs_statfs(struct mount *mp, struct statfs *sbp)
256 {
257 struct vnode *vp;
258 struct thread *td;
259 struct nfs_statfs *sfp;
260 caddr_t bpos, dpos;
261 struct nfsmount *nmp = VFSTONFS(mp);
262 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
263 struct mbuf *mreq, *mrep, *md, *mb;
264 struct nfsnode *np;
265 u_quad_t tquad;
266
267 td = curthread;
268 #ifndef nolint
269 sfp = NULL;
270 #endif
271 error = vfs_busy(mp, MBF_NOWAIT);
272 if (error)
273 return (error);
274 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
275 if (error) {
276 vfs_unbusy(mp);
277 return (error);
278 }
279 vp = NFSTOV(np);
280 mtx_lock(&nmp->nm_mtx);
281 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
282 mtx_unlock(&nmp->nm_mtx);
283 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
284 } else
285 mtx_unlock(&nmp->nm_mtx);
286 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
287 mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
288 mb = mreq;
289 bpos = mtod(mb, caddr_t);
290 nfsm_fhtom(vp, v3);
291 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
292 if (v3)
293 nfsm_postop_attr(vp, retattr);
294 if (error) {
295 if (mrep != NULL)
296 m_freem(mrep);
297 goto nfsmout;
298 }
299 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
300 mtx_lock(&nmp->nm_mtx);
301 sbp->f_iosize = nfs_iosize(nmp);
302 mtx_unlock(&nmp->nm_mtx);
303 if (v3) {
304 sbp->f_bsize = NFS_FABLKSIZE;
305 tquad = fxdr_hyper(&sfp->sf_tbytes);
306 sbp->f_blocks = tquad / NFS_FABLKSIZE;
307 tquad = fxdr_hyper(&sfp->sf_fbytes);
308 sbp->f_bfree = tquad / NFS_FABLKSIZE;
309 tquad = fxdr_hyper(&sfp->sf_abytes);
310 sbp->f_bavail = tquad / NFS_FABLKSIZE;
311 sbp->f_files = (fxdr_unsigned(int32_t,
312 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
313 sbp->f_ffree = (fxdr_unsigned(int32_t,
314 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
315 } else {
316 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
317 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
318 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
319 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
320 sbp->f_files = 0;
321 sbp->f_ffree = 0;
322 }
323 m_freem(mrep);
324 nfsmout:
325 vput(vp);
326 vfs_unbusy(mp);
327 return (error);
328 }
329
330 /*
331 * nfs version 3 fsinfo rpc call
332 */
333 int
334 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
335 struct thread *td)
336 {
337 struct nfsv3_fsinfo *fsp;
338 u_int32_t pref, max;
339 caddr_t bpos, dpos;
340 int error = 0, retattr;
341 struct mbuf *mreq, *mrep, *md, *mb;
342 u_int64_t maxfsize;
343
344 nfsstats.rpccnt[NFSPROC_FSINFO]++;
345 mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
346 mb = mreq;
347 bpos = mtod(mb, caddr_t);
348 nfsm_fhtom(vp, 1);
349 nfsm_request(vp, NFSPROC_FSINFO, td, cred);
350 nfsm_postop_attr(vp, retattr);
351 if (!error) {
352 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
353 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
354 mtx_lock(&nmp->nm_mtx);
355 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
356 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
357 ~(NFS_FABLKSIZE - 1);
358 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
359 if (max < nmp->nm_wsize && max > 0) {
360 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
361 if (nmp->nm_wsize == 0)
362 nmp->nm_wsize = max;
363 }
364 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
365 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
366 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
367 ~(NFS_FABLKSIZE - 1);
368 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
369 if (max < nmp->nm_rsize && max > 0) {
370 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
371 if (nmp->nm_rsize == 0)
372 nmp->nm_rsize = max;
373 }
374 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
375 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
376 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
377 ~(NFS_DIRBLKSIZ - 1);
378 if (max < nmp->nm_readdirsize && max > 0) {
379 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
380 if (nmp->nm_readdirsize == 0)
381 nmp->nm_readdirsize = max;
382 }
383 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
384 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
385 nmp->nm_maxfilesize = maxfsize;
386 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
387 nmp->nm_state |= NFSSTA_GOTFSINFO;
388 mtx_unlock(&nmp->nm_mtx);
389 }
390 m_freem(mrep);
391 nfsmout:
392 return (error);
393 }
394
395 /*
396 * Mount a remote root fs via. nfs. This depends on the info in the
397 * nfs_diskless structure that has been filled in properly by some primary
398 * bootstrap.
399 * It goes something like this:
400 * - do enough of "ifconfig" by calling ifioctl() so that the system
401 * can talk to the server
402 * - If nfs_diskless.mygateway is filled in, use that address as
403 * a default gateway.
404 * - build the rootfs mount point and call mountnfs() to do the rest.
405 *
406 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
407 * structure, as well as other global NFS client variables here, as
408 * nfs_mountroot() will be called once in the boot before any other NFS
409 * client activity occurs.
410 */
411 int
412 nfs_mountroot(struct mount *mp)
413 {
414 struct thread *td = curthread;
415 struct nfsv3_diskless *nd = &nfsv3_diskless;
416 struct socket *so;
417 struct vnode *vp;
418 struct ifreq ir;
419 int error;
420 u_long l;
421 char buf[128];
422 char *cp;
423
424 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
425 bootpc_init(); /* use bootp to get nfs_diskless filled in */
426 #elif defined(NFS_ROOT)
427 nfs_setup_diskless();
428 #endif
429
430 if (nfs_diskless_valid == 0)
431 return (-1);
432 if (nfs_diskless_valid == 1)
433 nfs_convert_diskless();
434
435 /*
436 * XXX splnet, so networks will receive...
437 */
438 splnet();
439
440 /*
441 * Do enough of ifconfig(8) so that the critical net interface can
442 * talk to the server.
443 */
444 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
445 td->td_ucred, td);
446 if (error)
447 panic("nfs_mountroot: socreate(%04x): %d",
448 nd->myif.ifra_addr.sa_family, error);
449
450 #if 0 /* XXX Bad idea */
451 /*
452 * We might not have been told the right interface, so we pass
453 * over the first ten interfaces of the same kind, until we get
454 * one of them configured.
455 */
456
457 for (i = strlen(nd->myif.ifra_name) - 1;
458 nd->myif.ifra_name[i] >= '' &&
459 nd->myif.ifra_name[i] <= '9';
460 nd->myif.ifra_name[i] ++) {
461 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
462 if(!error)
463 break;
464 }
465 #endif
466
467 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
468 if (error)
469 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
470
471 if ((cp = getenv("boot.netif.mtu")) != NULL) {
472 ir.ifr_mtu = strtol(cp, NULL, 10);
473 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
474 freeenv(cp);
475 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
476 if (error)
477 printf("nfs_mountroot: SIOCSIFMTU: %d", error);
478 }
479 soclose(so);
480
481 /*
482 * If the gateway field is filled in, set it as the default route.
483 * Note that pxeboot will set a default route of 0 if the route
484 * is not set by the DHCP server. Check also for a value of 0
485 * to avoid panicking inappropriately in that situation.
486 */
487 if (nd->mygateway.sin_len != 0 &&
488 nd->mygateway.sin_addr.s_addr != 0) {
489 struct sockaddr_in mask, sin;
490
491 bzero((caddr_t)&mask, sizeof(mask));
492 sin = mask;
493 sin.sin_family = AF_INET;
494 sin.sin_len = sizeof(sin);
495 /* XXX MRT use table 0 for this sort of thing */
496 error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
497 (struct sockaddr *)&nd->mygateway,
498 (struct sockaddr *)&mask,
499 RTF_UP | RTF_GATEWAY, NULL);
500 if (error)
501 panic("nfs_mountroot: RTM_ADD: %d", error);
502 }
503
504 /*
505 * Create the rootfs mount point.
506 */
507 nd->root_args.fh = nd->root_fh;
508 nd->root_args.fhsize = nd->root_fhsize;
509 l = ntohl(nd->root_saddr.sin_addr.s_addr);
510 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
511 (l >> 24) & 0xff, (l >> 16) & 0xff,
512 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam);
513 printf("NFS ROOT: %s\n", buf);
514 nd->root_args.hostname = buf;
515 if ((error = nfs_mountdiskless(buf,
516 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
517 return (error);
518 }
519
520 /*
521 * This is not really an nfs issue, but it is much easier to
522 * set hostname here and then let the "/etc/rc.xxx" files
523 * mount the right /var based upon its preset value.
524 */
525 mtx_lock(&prison0.pr_mtx);
526 strlcpy(prison0.pr_hostname, nd->my_hostnam,
527 sizeof (prison0.pr_hostname));
528 mtx_unlock(&prison0.pr_mtx);
529 inittodr(ntohl(nd->root_time));
530 return (0);
531 }
532
533 /*
534 * Internal version of mount system call for diskless setup.
535 */
536 static int
537 nfs_mountdiskless(char *path,
538 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
539 struct vnode **vpp, struct mount *mp)
540 {
541 struct sockaddr *nam;
542 int error;
543
544 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
545 if ((error = mountnfs(args, mp, nam, path, vpp,
546 td->td_ucred)) != 0) {
547 printf("nfs_mountroot: mount %s on /: %d\n", path, error);
548 return (error);
549 }
550 return (0);
551 }
552
553 static int
554 nfs_sec_name_to_num(char *sec)
555 {
556 if (!strcmp(sec, "krb5"))
557 return (RPCSEC_GSS_KRB5);
558 if (!strcmp(sec, "krb5i"))
559 return (RPCSEC_GSS_KRB5I);
560 if (!strcmp(sec, "krb5p"))
561 return (RPCSEC_GSS_KRB5P);
562 if (!strcmp(sec, "sys"))
563 return (AUTH_SYS);
564 /*
565 * Userland should validate the string but we will try and
566 * cope with unexpected values.
567 */
568 return (AUTH_SYS);
569 }
570
571 static void
572 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
573 const char *hostname)
574 {
575 int s;
576 int adjsock;
577 int maxio;
578 char *p;
579 char *secname;
580 char *principal;
581
582 s = splnet();
583
584 /*
585 * Set read-only flag if requested; otherwise, clear it if this is
586 * an update. If this is not an update, then either the read-only
587 * flag is already clear, or this is a root mount and it was set
588 * intentionally at some previous point.
589 */
590 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
591 MNT_ILOCK(mp);
592 mp->mnt_flag |= MNT_RDONLY;
593 MNT_IUNLOCK(mp);
594 } else if (mp->mnt_flag & MNT_UPDATE) {
595 MNT_ILOCK(mp);
596 mp->mnt_flag &= ~MNT_RDONLY;
597 MNT_IUNLOCK(mp);
598 }
599
600 /*
601 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
602 * no sense in that context. Also, set up appropriate retransmit
603 * and soft timeout behavior.
604 */
605 if (argp->sotype == SOCK_STREAM) {
606 nmp->nm_flag &= ~NFSMNT_NOCONN;
607 nmp->nm_flag |= NFSMNT_DUMBTIMR;
608 nmp->nm_timeo = NFS_MAXTIMEO;
609 nmp->nm_retry = NFS_RETRANS_TCP;
610 }
611
612 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
613 if ((argp->flags & NFSMNT_NFSV3) == 0)
614 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
615
616 /* Re-bind if rsrvd port requested and wasn't on one */
617 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
618 && (argp->flags & NFSMNT_RESVPORT);
619 /* Also re-bind if we're switching to/from a connected UDP socket */
620 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
621 (argp->flags & NFSMNT_NOCONN));
622
623 /* Update flags atomically. Don't change the lock bits. */
624 nmp->nm_flag = argp->flags | nmp->nm_flag;
625 splx(s);
626
627 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
628 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
629 if (nmp->nm_timeo < NFS_MINTIMEO)
630 nmp->nm_timeo = NFS_MINTIMEO;
631 else if (nmp->nm_timeo > NFS_MAXTIMEO)
632 nmp->nm_timeo = NFS_MAXTIMEO;
633 }
634
635 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
636 nmp->nm_retry = argp->retrans;
637 if (nmp->nm_retry > NFS_MAXREXMIT)
638 nmp->nm_retry = NFS_MAXREXMIT;
639 }
640
641 if (argp->flags & NFSMNT_NFSV3) {
642 if (argp->sotype == SOCK_DGRAM)
643 maxio = NFS_MAXDGRAMDATA;
644 else
645 maxio = NFS_MAXDATA;
646 } else
647 maxio = NFS_V2MAXDATA;
648
649 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
650 nmp->nm_wsize = argp->wsize;
651 /* Round down to multiple of blocksize */
652 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
653 if (nmp->nm_wsize <= 0)
654 nmp->nm_wsize = NFS_FABLKSIZE;
655 }
656 if (nmp->nm_wsize > maxio)
657 nmp->nm_wsize = maxio;
658 if (nmp->nm_wsize > MAXBSIZE)
659 nmp->nm_wsize = MAXBSIZE;
660
661 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
662 nmp->nm_rsize = argp->rsize;
663 /* Round down to multiple of blocksize */
664 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
665 if (nmp->nm_rsize <= 0)
666 nmp->nm_rsize = NFS_FABLKSIZE;
667 }
668 if (nmp->nm_rsize > maxio)
669 nmp->nm_rsize = maxio;
670 if (nmp->nm_rsize > MAXBSIZE)
671 nmp->nm_rsize = MAXBSIZE;
672
673 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
674 nmp->nm_readdirsize = argp->readdirsize;
675 }
676 if (nmp->nm_readdirsize > maxio)
677 nmp->nm_readdirsize = maxio;
678 if (nmp->nm_readdirsize > nmp->nm_rsize)
679 nmp->nm_readdirsize = nmp->nm_rsize;
680
681 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
682 nmp->nm_acregmin = argp->acregmin;
683 else
684 nmp->nm_acregmin = NFS_MINATTRTIMO;
685 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
686 nmp->nm_acregmax = argp->acregmax;
687 else
688 nmp->nm_acregmax = NFS_MAXATTRTIMO;
689 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
690 nmp->nm_acdirmin = argp->acdirmin;
691 else
692 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
693 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
694 nmp->nm_acdirmax = argp->acdirmax;
695 else
696 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
697 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
698 nmp->nm_acdirmin = nmp->nm_acdirmax;
699 if (nmp->nm_acregmin > nmp->nm_acregmax)
700 nmp->nm_acregmin = nmp->nm_acregmax;
701
702 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
703 if (argp->maxgrouplist <= NFS_MAXGRPS)
704 nmp->nm_numgrps = argp->maxgrouplist;
705 else
706 nmp->nm_numgrps = NFS_MAXGRPS;
707 }
708 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
709 if (argp->readahead <= NFS_MAXRAHEAD)
710 nmp->nm_readahead = argp->readahead;
711 else
712 nmp->nm_readahead = NFS_MAXRAHEAD;
713 }
714 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
715 if (argp->wcommitsize < nmp->nm_wsize)
716 nmp->nm_wcommitsize = nmp->nm_wsize;
717 else
718 nmp->nm_wcommitsize = argp->wcommitsize;
719 }
720 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
721 if (argp->deadthresh <= NFS_MAXDEADTHRESH)
722 nmp->nm_deadthresh = argp->deadthresh;
723 else
724 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
725 }
726
727 adjsock |= ((nmp->nm_sotype != argp->sotype) ||
728 (nmp->nm_soproto != argp->proto));
729 nmp->nm_sotype = argp->sotype;
730 nmp->nm_soproto = argp->proto;
731
732 if (nmp->nm_client && adjsock) {
733 nfs_safedisconnect(nmp);
734 if (nmp->nm_sotype == SOCK_DGRAM)
735 while (nfs_connect(nmp)) {
736 printf("nfs_args: retrying connect\n");
737 (void) tsleep(&fake_wchan, PSOCK, "nfscon", hz);
738 }
739 }
740
741 if (hostname) {
742 strlcpy(nmp->nm_hostname, hostname,
743 sizeof(nmp->nm_hostname));
744 p = strchr(nmp->nm_hostname, ':');
745 if (p)
746 *p = '\0';
747 }
748
749 if (vfs_getopt(mp->mnt_optnew, "sec",
750 (void **) &secname, NULL) == 0) {
751 nmp->nm_secflavor = nfs_sec_name_to_num(secname);
752 } else {
753 nmp->nm_secflavor = AUTH_SYS;
754 }
755
756 if (vfs_getopt(mp->mnt_optnew, "principal",
757 (void **) &principal, NULL) == 0) {
758 strlcpy(nmp->nm_principal, principal,
759 sizeof(nmp->nm_principal));
760 } else {
761 snprintf(nmp->nm_principal, sizeof(nmp->nm_principal),
762 "nfs@%s", nmp->nm_hostname);
763 }
764 }
765
766 static const char *nfs_opts[] = { "from", "nfs_args",
767 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
768 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
769 "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
770 "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
771 "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax",
772 "deadthresh", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
773 "maxgroups", "principal",
774 NULL };
775
776 /*
777 * VFS Operations.
778 *
779 * mount system call
780 * It seems a bit dumb to copyinstr() the host and path here and then
781 * bcopy() them in mountnfs(), but I wanted to detect errors before
782 * doing the sockargs() call because sockargs() allocates an mbuf and
783 * an error after that means that I have to release the mbuf.
784 */
785 /* ARGSUSED */
786 static int
787 nfs_mount(struct mount *mp)
788 {
789 struct nfs_args args = {
790 .version = NFS_ARGSVERSION,
791 .addr = NULL,
792 .addrlen = sizeof (struct sockaddr_in),
793 .sotype = SOCK_STREAM,
794 .proto = 0,
795 .fh = NULL,
796 .fhsize = 0,
797 .flags = NFSMNT_RESVPORT,
798 .wsize = NFS_WSIZE,
799 .rsize = NFS_RSIZE,
800 .readdirsize = NFS_READDIRSIZE,
801 .timeo = 10,
802 .retrans = NFS_RETRANS,
803 .maxgrouplist = NFS_MAXGRPS,
804 .readahead = NFS_DEFRAHEAD,
805 .wcommitsize = 0, /* was: NQ_DEFLEASE */
806 .deadthresh = NFS_MAXDEADTHRESH, /* was: NQ_DEADTHRESH */
807 .hostname = NULL,
808 /* args version 4 */
809 .acregmin = NFS_MINATTRTIMO,
810 .acregmax = NFS_MAXATTRTIMO,
811 .acdirmin = NFS_MINDIRATTRTIMO,
812 .acdirmax = NFS_MAXDIRATTRTIMO,
813 };
814 int error, ret, has_nfs_args_opt;
815 int has_addr_opt, has_fh_opt, has_hostname_opt;
816 struct sockaddr *nam;
817 struct vnode *vp;
818 char hst[MNAMELEN];
819 size_t len;
820 u_char nfh[NFSX_V3FHMAX];
821 char *opt;
822
823 has_nfs_args_opt = 0;
824 has_addr_opt = 0;
825 has_fh_opt = 0;
826 has_hostname_opt = 0;
827
828 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
829 error = EINVAL;
830 goto out;
831 }
832
833 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
834 error = nfs_mountroot(mp);
835 goto out;
836 }
837
838 /*
839 * The old mount_nfs program passed the struct nfs_args
840 * from userspace to kernel. The new mount_nfs program
841 * passes string options via nmount() from userspace to kernel
842 * and we populate the struct nfs_args in the kernel.
843 */
844 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
845 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
846 sizeof args);
847 if (error)
848 goto out;
849
850 if (args.version != NFS_ARGSVERSION) {
851 error = EPROGMISMATCH;
852 goto out;
853 }
854 has_nfs_args_opt = 1;
855 }
856
857 if (vfs_getopt(mp->mnt_optnew, "dumbtimer", NULL, NULL) == 0)
858 args.flags |= NFSMNT_DUMBTIMR;
859 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
860 args.flags |= NFSMNT_NOCONN;
861 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
862 args.flags |= NFSMNT_NOCONN;
863 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
864 args.flags |= NFSMNT_NOLOCKD;
865 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
866 args.flags &= ~NFSMNT_NOLOCKD;
867 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
868 args.flags |= NFSMNT_INT;
869 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
870 args.flags |= NFSMNT_RDIRPLUS;
871 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
872 args.flags |= NFSMNT_RESVPORT;
873 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
874 args.flags &= ~NFSMNT_RESVPORT;
875 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
876 args.flags |= NFSMNT_SOFT;
877 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
878 args.flags &= ~NFSMNT_SOFT;
879 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
880 args.sotype = SOCK_DGRAM;
881 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
882 args.sotype = SOCK_DGRAM;
883 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
884 args.sotype = SOCK_STREAM;
885 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
886 args.flags |= NFSMNT_NFSV3;
887 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
888 if (opt == NULL) {
889 vfs_mount_error(mp, "illegal readdirsize");
890 error = EINVAL;
891 goto out;
892 }
893 ret = sscanf(opt, "%d", &args.readdirsize);
894 if (ret != 1 || args.readdirsize <= 0) {
895 vfs_mount_error(mp, "illegal readdirsize: %s",
896 opt);
897 error = EINVAL;
898 goto out;
899 }
900 args.flags |= NFSMNT_READDIRSIZE;
901 }
902 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
903 if (opt == NULL) {
904 vfs_mount_error(mp, "illegal readahead");
905 error = EINVAL;
906 goto out;
907 }
908 ret = sscanf(opt, "%d", &args.readahead);
909 if (ret != 1 || args.readahead <= 0) {
910 vfs_mount_error(mp, "illegal readahead: %s",
911 opt);
912 error = EINVAL;
913 goto out;
914 }
915 args.flags |= NFSMNT_READAHEAD;
916 }
917 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
918 if (opt == NULL) {
919 vfs_mount_error(mp, "illegal wsize");
920 error = EINVAL;
921 goto out;
922 }
923 ret = sscanf(opt, "%d", &args.wsize);
924 if (ret != 1 || args.wsize <= 0) {
925 vfs_mount_error(mp, "illegal wsize: %s",
926 opt);
927 error = EINVAL;
928 goto out;
929 }
930 args.flags |= NFSMNT_WSIZE;
931 }
932 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
933 if (opt == NULL) {
934 vfs_mount_error(mp, "illegal rsize");
935 error = EINVAL;
936 goto out;
937 }
938 ret = sscanf(opt, "%d", &args.rsize);
939 if (ret != 1 || args.rsize <= 0) {
940 vfs_mount_error(mp, "illegal wsize: %s",
941 opt);
942 error = EINVAL;
943 goto out;
944 }
945 args.flags |= NFSMNT_RSIZE;
946 }
947 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
948 if (opt == NULL) {
949 vfs_mount_error(mp, "illegal retrans");
950 error = EINVAL;
951 goto out;
952 }
953 ret = sscanf(opt, "%d", &args.retrans);
954 if (ret != 1 || args.retrans <= 0) {
955 vfs_mount_error(mp, "illegal retrans: %s",
956 opt);
957 error = EINVAL;
958 goto out;
959 }
960 args.flags |= NFSMNT_RETRANS;
961 }
962 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
963 ret = sscanf(opt, "%d", &args.acregmin);
964 if (ret != 1 || args.acregmin < 0) {
965 vfs_mount_error(mp, "illegal acregmin: %s",
966 opt);
967 error = EINVAL;
968 goto out;
969 }
970 args.flags |= NFSMNT_ACREGMIN;
971 }
972 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
973 ret = sscanf(opt, "%d", &args.acregmax);
974 if (ret != 1 || args.acregmax < 0) {
975 vfs_mount_error(mp, "illegal acregmax: %s",
976 opt);
977 error = EINVAL;
978 goto out;
979 }
980 args.flags |= NFSMNT_ACREGMAX;
981 }
982 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
983 ret = sscanf(opt, "%d", &args.acdirmin);
984 if (ret != 1 || args.acdirmin < 0) {
985 vfs_mount_error(mp, "illegal acdirmin: %s",
986 opt);
987 error = EINVAL;
988 goto out;
989 }
990 args.flags |= NFSMNT_ACDIRMIN;
991 }
992 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
993 ret = sscanf(opt, "%d", &args.acdirmax);
994 if (ret != 1 || args.acdirmax < 0) {
995 vfs_mount_error(mp, "illegal acdirmax: %s",
996 opt);
997 error = EINVAL;
998 goto out;
999 }
1000 args.flags |= NFSMNT_ACDIRMAX;
1001 }
1002 if (vfs_getopt(mp->mnt_optnew, "deadthresh", (void **)&opt, NULL) == 0) {
1003 ret = sscanf(opt, "%d", &args.deadthresh);
1004 if (ret != 1 || args.deadthresh <= 0) {
1005 vfs_mount_error(mp, "illegal deadthresh: %s",
1006 opt);
1007 error = EINVAL;
1008 goto out;
1009 }
1010 args.flags |= NFSMNT_DEADTHRESH;
1011 }
1012 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1013 ret = sscanf(opt, "%d", &args.timeo);
1014 if (ret != 1 || args.timeo <= 0) {
1015 vfs_mount_error(mp, "illegal timeout: %s",
1016 opt);
1017 error = EINVAL;
1018 goto out;
1019 }
1020 args.flags |= NFSMNT_TIMEO;
1021 }
1022 if (vfs_getopt(mp->mnt_optnew, "maxgroups", (void **)&opt, NULL) == 0) {
1023 ret = sscanf(opt, "%d", &args.maxgrouplist);
1024 if (ret != 1 || args.timeo <= 0) {
1025 vfs_mount_error(mp, "illegal maxgroups: %s",
1026 opt);
1027 error = EINVAL;
1028 goto out;
1029 }
1030 args.flags |= NFSMNT_MAXGRPS;
1031 }
1032 if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1033 &args.addrlen) == 0) {
1034 has_addr_opt = 1;
1035 if (args.addrlen > SOCK_MAXADDRLEN) {
1036 error = ENAMETOOLONG;
1037 goto out;
1038 }
1039 nam = malloc(args.addrlen, M_SONAME,
1040 M_WAITOK);
1041 bcopy(args.addr, nam, args.addrlen);
1042 nam->sa_len = args.addrlen;
1043 }
1044 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1045 &args.fhsize) == 0) {
1046 has_fh_opt = 1;
1047 }
1048 if (vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
1049 NULL) == 0) {
1050 has_hostname_opt = 1;
1051 }
1052 if (args.hostname == NULL) {
1053 vfs_mount_error(mp, "Invalid hostname");
1054 error = EINVAL;
1055 goto out;
1056 }
1057
1058 if (mp->mnt_flag & MNT_UPDATE) {
1059 struct nfsmount *nmp = VFSTONFS(mp);
1060
1061 if (nmp == NULL) {
1062 error = EIO;
1063 goto out;
1064 }
1065 /*
1066 * When doing an update, we can't change from or to
1067 * v3, switch lockd strategies or change cookie translation
1068 */
1069 args.flags = (args.flags &
1070 ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1071 (nmp->nm_flag &
1072 (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1073 nfs_decode_args(mp, nmp, &args, NULL);
1074 goto out;
1075 }
1076
1077 /*
1078 * Make the nfs_ip_paranoia sysctl serve as the default connection
1079 * or no-connection mode for those protocols that support
1080 * no-connection mode (the flag will be cleared later for protocols
1081 * that do not support no-connection mode). This will allow a client
1082 * to receive replies from a different IP then the request was
1083 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
1084 * not 0.
1085 */
1086 if (nfs_ip_paranoia == 0)
1087 args.flags |= NFSMNT_NOCONN;
1088
1089 if (has_nfs_args_opt) {
1090 /*
1091 * In the 'nfs_args' case, the pointers in the args
1092 * structure are in userland - we copy them in here.
1093 */
1094 if (!has_fh_opt) {
1095 error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1096 args.fhsize);
1097 if (error) {
1098 goto out;
1099 }
1100 args.fh = nfh;
1101 }
1102 if (!has_hostname_opt) {
1103 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
1104 if (error) {
1105 goto out;
1106 }
1107 bzero(&hst[len], MNAMELEN - len);
1108 args.hostname = hst;
1109 }
1110 if (!has_addr_opt) {
1111 /* sockargs() call must be after above copyin() calls */
1112 error = getsockaddr(&nam, (caddr_t)args.addr,
1113 args.addrlen);
1114 if (error) {
1115 goto out;
1116 }
1117 }
1118 }
1119 error = mountnfs(&args, mp, nam, args.hostname, &vp,
1120 curthread->td_ucred);
1121 out:
1122 if (!error) {
1123 MNT_ILOCK(mp);
1124 mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
1125 MNT_IUNLOCK(mp);
1126 }
1127 return (error);
1128 }
1129
1130
1131 /*
1132 * VFS Operations.
1133 *
1134 * mount system call
1135 * It seems a bit dumb to copyinstr() the host and path here and then
1136 * bcopy() them in mountnfs(), but I wanted to detect errors before
1137 * doing the sockargs() call because sockargs() allocates an mbuf and
1138 * an error after that means that I have to release the mbuf.
1139 */
1140 /* ARGSUSED */
1141 static int
1142 nfs_cmount(struct mntarg *ma, void *data, int flags)
1143 {
1144 int error;
1145 struct nfs_args args;
1146
1147 error = copyin(data, &args, sizeof (struct nfs_args));
1148 if (error)
1149 return error;
1150
1151 ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1152
1153 error = kernel_mount(ma, flags);
1154 return (error);
1155 }
1156
1157 /*
1158 * Common code for mount and mountroot
1159 */
1160 static int
1161 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1162 char *hst, struct vnode **vpp, struct ucred *cred)
1163 {
1164 struct nfsmount *nmp;
1165 struct nfsnode *np;
1166 int error;
1167 struct vattr attrs;
1168
1169 if (mp->mnt_flag & MNT_UPDATE) {
1170 nmp = VFSTONFS(mp);
1171 printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1172 free(nam, M_SONAME);
1173 return (0);
1174 } else {
1175 nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
1176 bzero((caddr_t)nmp, sizeof (struct nfsmount));
1177 TAILQ_INIT(&nmp->nm_bufq);
1178 mp->mnt_data = nmp;
1179 }
1180 vfs_getnewfsid(mp);
1181 nmp->nm_mountp = mp;
1182 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
1183
1184 /*
1185 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1186 * high, depending on whether we end up with negative offsets in
1187 * the client or server somewhere. 2GB-1 may be safer.
1188 *
1189 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
1190 * that we can handle until we find out otherwise.
1191 * XXX Our "safe" limit on the client is what we can store in our
1192 * buffer cache using signed(!) block numbers.
1193 */
1194 if ((argp->flags & NFSMNT_NFSV3) == 0)
1195 nmp->nm_maxfilesize = 0xffffffffLL;
1196 else
1197 nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
1198
1199 nmp->nm_timeo = NFS_TIMEO;
1200 nmp->nm_retry = NFS_RETRANS;
1201 if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) {
1202 nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA;
1203 } else {
1204 nmp->nm_wsize = NFS_WSIZE;
1205 nmp->nm_rsize = NFS_RSIZE;
1206 }
1207 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1208 nmp->nm_readdirsize = NFS_READDIRSIZE;
1209 nmp->nm_numgrps = NFS_MAXGRPS;
1210 nmp->nm_readahead = NFS_DEFRAHEAD;
1211 nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
1212 nmp->nm_tprintf_delay = nfs_tprintf_delay;
1213 if (nmp->nm_tprintf_delay < 0)
1214 nmp->nm_tprintf_delay = 0;
1215 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1216 if (nmp->nm_tprintf_initial_delay < 0)
1217 nmp->nm_tprintf_initial_delay = 0;
1218 nmp->nm_fhsize = argp->fhsize;
1219 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1220 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1221 nmp->nm_nam = nam;
1222 /* Set up the sockets and per-host congestion */
1223 nmp->nm_sotype = argp->sotype;
1224 nmp->nm_soproto = argp->proto;
1225 nmp->nm_rpcops = &nfs_rpcops;
1226
1227 nfs_decode_args(mp, nmp, argp, hst);
1228
1229 /*
1230 * For Connection based sockets (TCP,...) defer the connect until
1231 * the first request, in case the server is not responding.
1232 */
1233 if (nmp->nm_sotype == SOCK_DGRAM &&
1234 (error = nfs_connect(nmp)))
1235 goto bad;
1236
1237 /*
1238 * This is silly, but it has to be set so that vinifod() works.
1239 * We do not want to do an nfs_statfs() here since we can get
1240 * stuck on a dead server and we are holding a lock on the mount
1241 * point.
1242 */
1243 mtx_lock(&nmp->nm_mtx);
1244 mp->mnt_stat.f_iosize = nfs_iosize(nmp);
1245 mtx_unlock(&nmp->nm_mtx);
1246 /*
1247 * A reference count is needed on the nfsnode representing the
1248 * remote root. If this object is not persistent, then backward
1249 * traversals of the mount point (i.e. "..") will not work if
1250 * the nfsnode gets flushed out of the cache. Ufs does not have
1251 * this problem, because one can identify root inodes by their
1252 * number == ROOTINO (2).
1253 */
1254 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
1255 if (error)
1256 goto bad;
1257 *vpp = NFSTOV(np);
1258
1259 /*
1260 * Get file attributes and transfer parameters for the
1261 * mountpoint. This has the side effect of filling in
1262 * (*vpp)->v_type with the correct value.
1263 */
1264 if (argp->flags & NFSMNT_NFSV3)
1265 nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread);
1266 else
1267 VOP_GETATTR(*vpp, &attrs, curthread->td_ucred);
1268
1269 /*
1270 * Lose the lock but keep the ref.
1271 */
1272 VOP_UNLOCK(*vpp, 0);
1273
1274 return (0);
1275 bad:
1276 nfs_disconnect(nmp);
1277 mtx_destroy(&nmp->nm_mtx);
1278 uma_zfree(nfsmount_zone, nmp);
1279 free(nam, M_SONAME);
1280 return (error);
1281 }
1282
1283 /*
1284 * unmount system call
1285 */
1286 static int
1287 nfs_unmount(struct mount *mp, int mntflags)
1288 {
1289 struct nfsmount *nmp;
1290 int error, flags = 0;
1291
1292 if (mntflags & MNT_FORCE)
1293 flags |= FORCECLOSE;
1294 nmp = VFSTONFS(mp);
1295 /*
1296 * Goes something like this..
1297 * - Call vflush() to clear out vnodes for this filesystem
1298 * - Close the socket
1299 * - Free up the data structures
1300 */
1301 /* In the forced case, cancel any outstanding requests. */
1302 if (flags & FORCECLOSE) {
1303 error = nfs_nmcancelreqs(nmp);
1304 if (error)
1305 goto out;
1306 }
1307 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1308 error = vflush(mp, 1, flags, curthread);
1309 if (error)
1310 goto out;
1311
1312 /*
1313 * We are now committed to the unmount.
1314 */
1315 nfs_disconnect(nmp);
1316 free(nmp->nm_nam, M_SONAME);
1317
1318 mtx_destroy(&nmp->nm_mtx);
1319 uma_zfree(nfsmount_zone, nmp);
1320 out:
1321 return (error);
1322 }
1323
1324 /*
1325 * Return root of a filesystem
1326 */
1327 static int
1328 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1329 {
1330 struct vnode *vp;
1331 struct nfsmount *nmp;
1332 struct nfsnode *np;
1333 int error;
1334
1335 nmp = VFSTONFS(mp);
1336 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1337 if (error)
1338 return error;
1339 vp = NFSTOV(np);
1340 /*
1341 * Get transfer parameters and attributes for root vnode once.
1342 */
1343 mtx_lock(&nmp->nm_mtx);
1344 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
1345 (nmp->nm_flag & NFSMNT_NFSV3)) {
1346 mtx_unlock(&nmp->nm_mtx);
1347 nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1348 } else
1349 mtx_unlock(&nmp->nm_mtx);
1350 if (vp->v_type == VNON)
1351 vp->v_type = VDIR;
1352 vp->v_vflag |= VV_ROOT;
1353 *vpp = vp;
1354 return (0);
1355 }
1356
1357 /*
1358 * Flush out the buffer cache
1359 */
1360 /* ARGSUSED */
1361 static int
1362 nfs_sync(struct mount *mp, int waitfor)
1363 {
1364 struct vnode *vp, *mvp;
1365 struct thread *td;
1366 int error, allerror = 0;
1367
1368 td = curthread;
1369
1370 /*
1371 * Force stale buffer cache information to be flushed.
1372 */
1373 MNT_ILOCK(mp);
1374 loop:
1375 MNT_VNODE_FOREACH(vp, mp, mvp) {
1376 VI_LOCK(vp);
1377 MNT_IUNLOCK(mp);
1378 /* XXX Racy bv_cnt check. */
1379 if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1380 waitfor == MNT_LAZY) {
1381 VI_UNLOCK(vp);
1382 MNT_ILOCK(mp);
1383 continue;
1384 }
1385 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1386 MNT_ILOCK(mp);
1387 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1388 goto loop;
1389 }
1390 error = VOP_FSYNC(vp, waitfor, td);
1391 if (error)
1392 allerror = error;
1393 VOP_UNLOCK(vp, 0);
1394 vrele(vp);
1395
1396 MNT_ILOCK(mp);
1397 }
1398 MNT_IUNLOCK(mp);
1399 return (allerror);
1400 }
1401
1402 static int
1403 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1404 {
1405 struct nfsmount *nmp = VFSTONFS(mp);
1406 struct vfsquery vq;
1407 int error;
1408
1409 bzero(&vq, sizeof(vq));
1410 switch (op) {
1411 #if 0
1412 case VFS_CTL_NOLOCKS:
1413 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1414 if (req->oldptr != NULL) {
1415 error = SYSCTL_OUT(req, &val, sizeof(val));
1416 if (error)
1417 return (error);
1418 }
1419 if (req->newptr != NULL) {
1420 error = SYSCTL_IN(req, &val, sizeof(val));
1421 if (error)
1422 return (error);
1423 if (val)
1424 nmp->nm_flag |= NFSMNT_NOLOCKS;
1425 else
1426 nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1427 }
1428 break;
1429 #endif
1430 case VFS_CTL_QUERY:
1431 mtx_lock(&nmp->nm_mtx);
1432 if (nmp->nm_state & NFSSTA_TIMEO)
1433 vq.vq_flags |= VQ_NOTRESP;
1434 mtx_unlock(&nmp->nm_mtx);
1435 #if 0
1436 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1437 (nmp->nm_state & NFSSTA_LOCKTIMEO))
1438 vq.vq_flags |= VQ_NOTRESPLOCK;
1439 #endif
1440 error = SYSCTL_OUT(req, &vq, sizeof(vq));
1441 break;
1442 case VFS_CTL_TIMEO:
1443 if (req->oldptr != NULL) {
1444 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1445 sizeof(nmp->nm_tprintf_initial_delay));
1446 if (error)
1447 return (error);
1448 }
1449 if (req->newptr != NULL) {
1450 error = vfs_suser(mp, req->td);
1451 if (error)
1452 return (error);
1453 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1454 sizeof(nmp->nm_tprintf_initial_delay));
1455 if (error)
1456 return (error);
1457 if (nmp->nm_tprintf_initial_delay < 0)
1458 nmp->nm_tprintf_initial_delay = 0;
1459 }
1460 break;
1461 default:
1462 return (ENOTSUP);
1463 }
1464 return (0);
1465 }
Cache object: 9a03fb1151164c453d9b5d9e77016f07
|