FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_mount.c
1 /*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * Copyright (c) 1999 Michael Smith
39 * All rights reserved.
40 * Copyright (c) 1999 Poul-Henning Kamp
41 * All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * $FreeBSD: releng/5.1/sys/kern/vfs_mount.c 113958 2003-04-24 08:16:06Z tjr $
65 */
66
67 #include <sys/param.h>
68 #include <sys/conf.h>
69 #include <sys/cons.h>
70 #include <sys/kernel.h>
71 #include <sys/linker.h>
72 #include <sys/mac.h>
73 #include <sys/malloc.h>
74 #include <sys/mount.h>
75 #include <sys/mutex.h>
76 #include <sys/namei.h>
77 #include <sys/proc.h>
78 #include <sys/filedesc.h>
79 #include <sys/reboot.h>
80 #include <sys/sysproto.h>
81 #include <sys/sx.h>
82 #include <sys/sysctl.h>
83 #include <sys/sysent.h>
84 #include <sys/systm.h>
85 #include <sys/vnode.h>
86
87 #include <geom/geom.h>
88
89 #include <machine/stdarg.h>
90
91 #include "opt_rootdevname.h"
92 #include "opt_ddb.h"
93 #include "opt_mac.h"
94
95 #ifdef DDB
96 #include <ddb/ddb.h>
97 #endif
98
99 #define ROOTNAME "root_device"
100
101 static void checkdirs(struct vnode *olddp, struct vnode *newdp);
102 static int vfs_nmount(struct thread *td, int, struct uio *);
103 static int vfs_mountroot_try(char *mountfrom);
104 static int vfs_mountroot_ask(void);
105 static void gets(char *cp);
106
107 static int usermount = 0; /* if 1, non-root can mount fs. */
108 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
109
110 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
111
112 /* List of mounted filesystems. */
113 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
114
115 /* For any iteration/modification of mountlist */
116 struct mtx mountlist_mtx;
117
118 /* For any iteration/modification of mnt_vnodelist */
119 struct mtx mntvnode_mtx;
120
121 /*
122 * The vnode of the system's root (/ in the filesystem, without chroot
123 * active.)
124 */
125 struct vnode *rootvnode;
126
127 /*
128 * The root filesystem is detailed in the kernel environment variable
129 * vfs.root.mountfrom, which is expected to be in the general format
130 *
131 * <vfsname>:[<path>]
132 * vfsname := the name of a VFS known to the kernel and capable
133 * of being mounted as root
134 * path := disk device name or other data used by the filesystem
135 * to locate its physical store
136 */
137
138 /*
139 * The root specifiers we will try if RB_CDROM is specified.
140 */
141 static char *cdrom_rootdevnames[] = {
142 "cd9660:cd0a",
143 "cd9660:acd0a",
144 "cd9660:wcd0a",
145 NULL
146 };
147
148 /* legacy find-root code */
149 char *rootdevnames[2] = {NULL, NULL};
150 static int setrootbyname(char *name);
151 dev_t rootdev = NODEV;
152
153 /*
154 * Has to be dynamic as the value of rootdev can change; however, it can't
155 * change after the root is mounted, so a user process can't access this
156 * sysctl until after the value is unchangeable.
157 */
158 static int
159 sysctl_rootdev(SYSCTL_HANDLER_ARGS)
160 {
161 int error;
162
163 /* _RD prevents this from happening. */
164 KASSERT(req->newptr == NULL, ("Attempt to change root device name"));
165
166 if (rootdev != NODEV)
167 error = sysctl_handle_string(oidp, rootdev->si_name, 0, req);
168 else
169 error = sysctl_handle_string(oidp, "", 0, req);
170
171 return (error);
172 }
173
174 SYSCTL_PROC(_kern, OID_AUTO, rootdev, CTLTYPE_STRING | CTLFLAG_RD,
175 0, 0, sysctl_rootdev, "A", "Root file system device");
176
177 /* Remove one mount option. */
178 static void
179 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
180 {
181
182 TAILQ_REMOVE(opts, opt, link);
183 free(opt->name, M_MOUNT);
184 if (opt->value != NULL)
185 free(opt->value, M_MOUNT);
186 #ifdef INVARIANTS
187 else if (opt->len != 0)
188 panic("%s: mount option with NULL value but length != 0",
189 __func__);
190 #endif
191 free(opt, M_MOUNT);
192 }
193
194 /* Release all resources related to the mount options. */
195 static void
196 vfs_freeopts(struct vfsoptlist *opts)
197 {
198 struct vfsopt *opt;
199
200 while (!TAILQ_EMPTY(opts)) {
201 opt = TAILQ_FIRST(opts);
202 vfs_freeopt(opts, opt);
203 }
204 free(opts, M_MOUNT);
205 }
206
207 /*
208 * If a mount option is specified several times,
209 * (with or without the "no" prefix) only keep
210 * the last occurence of it.
211 */
212 static void
213 vfs_sanitizeopts(struct vfsoptlist *opts)
214 {
215 struct vfsopt *opt, *opt2, *tmp;
216 int noopt;
217
218 TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
219 if (strncmp(opt->name, "no", 2) == 0)
220 noopt = 1;
221 else
222 noopt = 0;
223 opt2 = TAILQ_PREV(opt, vfsoptlist, link);
224 while (opt2 != NULL) {
225 if (strcmp(opt2->name, opt->name) == 0 ||
226 (noopt && strcmp(opt->name + 2, opt2->name) == 0) ||
227 (!noopt && strncmp(opt2->name, "no", 2) == 0 &&
228 strcmp(opt2->name + 2, opt->name) == 0)) {
229 tmp = TAILQ_PREV(opt2, vfsoptlist, link);
230 vfs_freeopt(opts, opt2);
231 opt2 = tmp;
232 } else {
233 opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
234 }
235 }
236 }
237 }
238
239 /*
240 * Build a linked list of mount options from a struct uio.
241 */
242 static int
243 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
244 {
245 struct vfsoptlist *opts;
246 struct vfsopt *opt;
247 unsigned int i, iovcnt;
248 int error, namelen, optlen;
249
250 iovcnt = auio->uio_iovcnt;
251 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
252 TAILQ_INIT(opts);
253 for (i = 0; i < iovcnt; i += 2) {
254 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
255 namelen = auio->uio_iov[i].iov_len;
256 optlen = auio->uio_iov[i + 1].iov_len;
257 opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
258 opt->value = NULL;
259 if (auio->uio_segflg == UIO_SYSSPACE) {
260 bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
261 } else {
262 error = copyin(auio->uio_iov[i].iov_base, opt->name,
263 namelen);
264 if (error)
265 goto bad;
266 }
267 opt->len = optlen;
268 if (optlen != 0) {
269 opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
270 if (auio->uio_segflg == UIO_SYSSPACE) {
271 bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
272 optlen);
273 } else {
274 error = copyin(auio->uio_iov[i + 1].iov_base,
275 opt->value, optlen);
276 if (error)
277 goto bad;
278 }
279 }
280 TAILQ_INSERT_TAIL(opts, opt, link);
281 }
282 vfs_sanitizeopts(opts);
283 *options = opts;
284 return (0);
285 bad:
286 vfs_freeopts(opts);
287 return (error);
288 }
289
290 /*
291 * Merge the old mount options with the new ones passed
292 * in the MNT_UPDATE case.
293 */
294 static void
295 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
296 {
297 struct vfsopt *opt, *opt2, *new;
298
299 TAILQ_FOREACH(opt, opts, link) {
300 /*
301 * Check that this option hasn't been redefined
302 * nor cancelled with a "no" mount option.
303 */
304 opt2 = TAILQ_FIRST(toopts);
305 while (opt2 != NULL) {
306 if (strcmp(opt2->name, opt->name) == 0)
307 goto next;
308 if (strncmp(opt2->name, "no", 2) == 0 &&
309 strcmp(opt2->name + 2, opt->name) == 0) {
310 vfs_freeopt(toopts, opt2);
311 goto next;
312 }
313 opt2 = TAILQ_NEXT(opt2, link);
314 }
315 /* We want this option, duplicate it. */
316 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
317 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
318 strcpy(new->name, opt->name);
319 if (opt->len != 0) {
320 new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
321 bcopy(opt->value, new->value, opt->len);
322 } else {
323 new->value = NULL;
324 }
325 new->len = opt->len;
326 TAILQ_INSERT_TAIL(toopts, new, link);
327 next:
328 continue;
329 }
330 }
331
332 /*
333 * New mount API.
334 */
335 int
336 nmount(td, uap)
337 struct thread *td;
338 struct nmount_args /* {
339 struct iovec *iovp;
340 unsigned int iovcnt;
341 int flags;
342 } */ *uap;
343 {
344 struct uio auio;
345 struct iovec *iov, *needfree;
346 struct iovec aiov[UIO_SMALLIOV];
347 unsigned int i;
348 int error;
349 u_int iovlen, iovcnt;
350
351 iovcnt = uap->iovcnt;
352 iovlen = iovcnt * sizeof (struct iovec);
353 /*
354 * Check that we have an even number of iovec's
355 * and that we have at least two options.
356 */
357 if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV))
358 return (EINVAL);
359
360 if (iovcnt > UIO_SMALLIOV) {
361 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
362 needfree = iov;
363 } else {
364 iov = aiov;
365 needfree = NULL;
366 }
367 auio.uio_iov = iov;
368 auio.uio_iovcnt = iovcnt;
369 auio.uio_segflg = UIO_USERSPACE;
370 if ((error = copyin(uap->iovp, iov, iovlen)))
371 goto finish;
372
373 for (i = 0; i < iovcnt; i++) {
374 if (iov->iov_len > MMAXOPTIONLEN) {
375 error = EINVAL;
376 goto finish;
377 }
378 iov++;
379 }
380 error = vfs_nmount(td, uap->flags, &auio);
381 finish:
382 if (needfree != NULL)
383 free(needfree, M_TEMP);
384 return (error);
385 }
386
387 int
388 kernel_mount(iovp, iovcnt, flags)
389 struct iovec *iovp;
390 unsigned int iovcnt;
391 int flags;
392 {
393 struct uio auio;
394 int error;
395
396 /*
397 * Check that we have an even number of iovec's
398 * and that we have at least two options.
399 */
400 if ((iovcnt & 1) || (iovcnt < 4))
401 return (EINVAL);
402
403 auio.uio_iov = iovp;
404 auio.uio_iovcnt = iovcnt;
405 auio.uio_segflg = UIO_SYSSPACE;
406
407 error = vfs_nmount(curthread, flags, &auio);
408 return (error);
409 }
410
411 int
412 kernel_vmount(int flags, ...)
413 {
414 struct iovec *iovp;
415 struct uio auio;
416 va_list ap;
417 unsigned int iovcnt, iovlen, len;
418 const char *cp;
419 char *buf, *pos;
420 size_t n;
421 int error, i;
422
423 len = 0;
424 va_start(ap, flags);
425 for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++)
426 len += strlen(cp) + 1;
427 va_end(ap);
428
429 if (iovcnt < 4 || iovcnt & 1)
430 return (EINVAL);
431
432 iovlen = iovcnt * sizeof (struct iovec);
433 MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
434 MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
435 pos = buf;
436 va_start(ap, flags);
437 for (i = 0; i < iovcnt; i++) {
438 cp = va_arg(ap, const char *);
439 copystr(cp, pos, len - (pos - buf), &n);
440 iovp[i].iov_base = pos;
441 iovp[i].iov_len = n;
442 pos += n;
443 }
444 va_end(ap);
445
446 auio.uio_iov = iovp;
447 auio.uio_iovcnt = iovcnt;
448 auio.uio_segflg = UIO_SYSSPACE;
449
450 error = vfs_nmount(curthread, flags, &auio);
451 FREE(iovp, M_MOUNT);
452 FREE(buf, M_MOUNT);
453 return (error);
454 }
455
456 /*
457 * vfs_nmount(): actually attempt a filesystem mount.
458 */
459 static int
460 vfs_nmount(td, fsflags, fsoptions)
461 struct thread *td;
462 int fsflags; /* Flags common to all filesystems. */
463 struct uio *fsoptions; /* Options local to the filesystem. */
464 {
465 linker_file_t lf;
466 struct vnode *vp;
467 struct mount *mp;
468 struct vfsconf *vfsp;
469 struct vfsoptlist *optlist;
470 char *fstype, *fspath;
471 int error, flag = 0, kern_flag = 0;
472 int fstypelen, fspathlen;
473 struct vattr va;
474 struct nameidata nd;
475
476 error = vfs_buildopts(fsoptions, &optlist);
477 if (error)
478 return (error);
479
480 /*
481 * We need these two options before the others,
482 * and they are mandatory for any filesystem.
483 * Ensure they are NUL terminated as well.
484 */
485 fstypelen = 0;
486 error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
487 if (error || fstype[fstypelen - 1] != '\0') {
488 error = EINVAL;
489 goto bad;
490 }
491 fspathlen = 0;
492 error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
493 if (error || fspath[fspathlen - 1] != '\0') {
494 error = EINVAL;
495 goto bad;
496 }
497
498 /*
499 * Be ultra-paranoid about making sure the type and fspath
500 * variables will fit in our mp buffers, including the
501 * terminating NUL.
502 */
503 if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
504 error = ENAMETOOLONG;
505 goto bad;
506 }
507
508 if (usermount == 0) {
509 error = suser(td);
510 if (error)
511 goto bad;
512 }
513 /*
514 * Do not allow NFS export by non-root users.
515 */
516 if (fsflags & MNT_EXPORTED) {
517 error = suser(td);
518 if (error)
519 goto bad;
520 }
521 /*
522 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
523 */
524 if (suser(td))
525 fsflags |= MNT_NOSUID | MNT_NODEV;
526 /*
527 * Get vnode to be covered
528 */
529 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
530 if ((error = namei(&nd)) != 0)
531 goto bad;
532 NDFREE(&nd, NDF_ONLY_PNBUF);
533 vp = nd.ni_vp;
534 if (fsflags & MNT_UPDATE) {
535 if ((vp->v_vflag & VV_ROOT) == 0) {
536 vput(vp);
537 error = EINVAL;
538 goto bad;
539 }
540 mp = vp->v_mount;
541 flag = mp->mnt_flag;
542 kern_flag = mp->mnt_kern_flag;
543 /*
544 * We only allow the filesystem to be reloaded if it
545 * is currently mounted read-only.
546 */
547 if ((fsflags & MNT_RELOAD) &&
548 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
549 vput(vp);
550 error = EOPNOTSUPP; /* Needs translation */
551 goto bad;
552 }
553 /*
554 * Only root, or the user that did the original mount is
555 * permitted to update it.
556 */
557 if (mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) {
558 error = suser(td);
559 if (error) {
560 vput(vp);
561 goto bad;
562 }
563 }
564 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
565 vput(vp);
566 error = EBUSY;
567 goto bad;
568 }
569 VI_LOCK(vp);
570 if ((vp->v_iflag & VI_MOUNT) != 0 ||
571 vp->v_mountedhere != NULL) {
572 VI_UNLOCK(vp);
573 vfs_unbusy(mp, td);
574 vput(vp);
575 error = EBUSY;
576 goto bad;
577 }
578 vp->v_iflag |= VI_MOUNT;
579 VI_UNLOCK(vp);
580 mp->mnt_flag |= fsflags &
581 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
582 VOP_UNLOCK(vp, 0, td);
583 mp->mnt_optnew = optlist;
584 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
585 goto update;
586 }
587 /*
588 * If the user is not root, ensure that they own the directory
589 * onto which we are attempting to mount.
590 */
591 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
592 if (error) {
593 vput(vp);
594 goto bad;
595 }
596 if (va.va_uid != td->td_ucred->cr_uid) {
597 error = suser(td);
598 if (error) {
599 vput(vp);
600 goto bad;
601 }
602 }
603 if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
604 vput(vp);
605 goto bad;
606 }
607 if (vp->v_type != VDIR) {
608 vput(vp);
609 error = ENOTDIR;
610 goto bad;
611 }
612 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
613 if (!strcmp(vfsp->vfc_name, fstype))
614 break;
615 if (vfsp == NULL) {
616 /* Only load modules for root (very important!). */
617 error = suser(td);
618 if (error) {
619 vput(vp);
620 goto bad;
621 }
622 error = securelevel_gt(td->td_ucred, 0);
623 if (error) {
624 vput(vp);
625 goto bad;
626 }
627 error = linker_load_module(NULL, fstype, NULL, NULL, &lf);
628 if (error || lf == NULL) {
629 vput(vp);
630 if (lf == NULL)
631 error = ENODEV;
632 goto bad;
633 }
634 lf->userrefs++;
635 /* Look up again to see if the VFS was loaded. */
636 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
637 if (!strcmp(vfsp->vfc_name, fstype))
638 break;
639 if (vfsp == NULL) {
640 lf->userrefs--;
641 linker_file_unload(lf);
642 vput(vp);
643 error = ENODEV;
644 goto bad;
645 }
646 }
647 VI_LOCK(vp);
648 if ((vp->v_iflag & VI_MOUNT) != 0 ||
649 vp->v_mountedhere != NULL) {
650 VI_UNLOCK(vp);
651 vput(vp);
652 error = EBUSY;
653 goto bad;
654 }
655 vp->v_iflag |= VI_MOUNT;
656 VI_UNLOCK(vp);
657
658 /*
659 * Allocate and initialize the filesystem.
660 */
661 mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
662 TAILQ_INIT(&mp->mnt_nvnodelist);
663 TAILQ_INIT(&mp->mnt_reservedvnlist);
664 mp->mnt_nvnodelistsize = 0;
665 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
666 (void)vfs_busy(mp, LK_NOWAIT, 0, td);
667 mp->mnt_op = vfsp->vfc_vfsops;
668 mp->mnt_vfc = vfsp;
669 vfsp->vfc_refcount++;
670 mp->mnt_stat.f_type = vfsp->vfc_typenum;
671 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
672 strlcpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
673 mp->mnt_vnodecovered = vp;
674 mp->mnt_cred = crdup(td->td_ucred);
675 mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
676 strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
677 mp->mnt_iosize_max = DFLTPHYS;
678 #ifdef MAC
679 mac_init_mount(mp);
680 mac_create_mount(td->td_ucred, mp);
681 #endif
682 VOP_UNLOCK(vp, 0, td);
683 mp->mnt_optnew = optlist; /* XXXMAC: should this be above? */
684
685 update:
686 /*
687 * Check if the fs implements the new VFS_NMOUNT()
688 * function, since the new system call was used.
689 */
690 if (mp->mnt_op->vfs_mount != NULL) {
691 printf("%s doesn't support the new mount syscall\n",
692 mp->mnt_vfc->vfc_name);
693 VI_LOCK(vp);
694 vp->v_iflag &= ~VI_MOUNT;
695 VI_UNLOCK(vp);
696 if (mp->mnt_flag & MNT_UPDATE)
697 vfs_unbusy(mp, td);
698 else {
699 mp->mnt_vfc->vfc_refcount--;
700 vfs_unbusy(mp, td);
701 #ifdef MAC
702 mac_destroy_mount(mp);
703 #endif
704 crfree(mp->mnt_cred);
705 free(mp, M_MOUNT);
706 }
707 vrele(vp);
708 error = EOPNOTSUPP;
709 goto bad;
710 }
711
712 /*
713 * Set the mount level flags.
714 */
715 if (fsflags & MNT_RDONLY)
716 mp->mnt_flag |= MNT_RDONLY;
717 else if (mp->mnt_flag & MNT_RDONLY)
718 mp->mnt_kern_flag |= MNTK_WANTRDWR;
719 mp->mnt_flag &=~ MNT_UPDATEMASK;
720 mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
721 /*
722 * Mount the filesystem.
723 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
724 * get. No freeing of cn_pnbuf.
725 */
726 error = VFS_NMOUNT(mp, &nd, td);
727 if (!error) {
728 if (mp->mnt_opt != NULL)
729 vfs_freeopts(mp->mnt_opt);
730 mp->mnt_opt = mp->mnt_optnew;
731 }
732 /*
733 * Prevent external consumers of mount
734 * options to read mnt_optnew.
735 */
736 mp->mnt_optnew = NULL;
737 if (mp->mnt_flag & MNT_UPDATE) {
738 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
739 mp->mnt_flag &= ~MNT_RDONLY;
740 mp->mnt_flag &=~
741 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
742 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
743 if (error) {
744 mp->mnt_flag = flag;
745 mp->mnt_kern_flag = kern_flag;
746 }
747 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
748 if (mp->mnt_syncer == NULL)
749 error = vfs_allocate_syncvnode(mp);
750 } else {
751 if (mp->mnt_syncer != NULL)
752 vrele(mp->mnt_syncer);
753 mp->mnt_syncer = NULL;
754 }
755 vfs_unbusy(mp, td);
756 VI_LOCK(vp);
757 vp->v_iflag &= ~VI_MOUNT;
758 VI_UNLOCK(vp);
759 vrele(vp);
760 return (error);
761 }
762 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
763 /*
764 * Put the new filesystem on the mount list after root.
765 */
766 cache_purge(vp);
767 if (!error) {
768 struct vnode *newdp;
769
770 VI_LOCK(vp);
771 vp->v_iflag &= ~VI_MOUNT;
772 VI_UNLOCK(vp);
773 vp->v_mountedhere = mp;
774 mtx_lock(&mountlist_mtx);
775 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
776 mtx_unlock(&mountlist_mtx);
777 if (VFS_ROOT(mp, &newdp))
778 panic("mount: lost mount");
779 checkdirs(vp, newdp);
780 vput(newdp);
781 VOP_UNLOCK(vp, 0, td);
782 if ((mp->mnt_flag & MNT_RDONLY) == 0)
783 error = vfs_allocate_syncvnode(mp);
784 vfs_unbusy(mp, td);
785 if ((error = VFS_START(mp, 0, td)) != 0) {
786 vrele(vp);
787 goto bad;
788 }
789 } else {
790 VI_LOCK(vp);
791 vp->v_iflag &= ~VI_MOUNT;
792 VI_UNLOCK(vp);
793 mp->mnt_vfc->vfc_refcount--;
794 vfs_unbusy(mp, td);
795 #ifdef MAC
796 mac_destroy_mount(mp);
797 #endif
798 crfree(mp->mnt_cred);
799 free(mp, M_MOUNT);
800 vput(vp);
801 goto bad;
802 }
803 return (0);
804 bad:
805 vfs_freeopts(optlist);
806 return (error);
807 }
808
809 /*
810 * Old mount API.
811 */
812 #ifndef _SYS_SYSPROTO_H_
813 struct mount_args {
814 char *type;
815 char *path;
816 int flags;
817 caddr_t data;
818 };
819 #endif
820 /* ARGSUSED */
821 int
822 mount(td, uap)
823 struct thread *td;
824 struct mount_args /* {
825 char *type;
826 char *path;
827 int flags;
828 caddr_t data;
829 } */ *uap;
830 {
831 char *fstype;
832 char *fspath;
833 int error;
834
835 fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
836 fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
837
838 /*
839 * vfs_mount() actually takes a kernel string for `type' and
840 * `path' now, so extract them.
841 */
842 error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
843 if (error == 0)
844 error = copyinstr(uap->path, fspath, MNAMELEN, NULL);
845 if (error == 0)
846 error = vfs_mount(td, fstype, fspath, uap->flags, uap->data);
847 free(fstype, M_TEMP);
848 free(fspath, M_TEMP);
849 return (error);
850 }
851
852 /*
853 * vfs_mount(): actually attempt a filesystem mount.
854 *
855 * This routine is designed to be a "generic" entry point for routines
856 * that wish to mount a filesystem. All parameters except `fsdata' are
857 * pointers into kernel space. `fsdata' is currently still a pointer
858 * into userspace.
859 */
860 int
861 vfs_mount(td, fstype, fspath, fsflags, fsdata)
862 struct thread *td;
863 const char *fstype;
864 char *fspath;
865 int fsflags;
866 void *fsdata;
867 {
868 linker_file_t lf;
869 struct vnode *vp;
870 struct mount *mp;
871 struct vfsconf *vfsp;
872 int error, flag = 0, kern_flag = 0;
873 struct vattr va;
874 struct nameidata nd;
875
876 /*
877 * Be ultra-paranoid about making sure the type and fspath
878 * variables will fit in our mp buffers, including the
879 * terminating NUL.
880 */
881 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
882 return (ENAMETOOLONG);
883
884 if (usermount == 0) {
885 error = suser(td);
886 if (error)
887 return (error);
888 }
889 /*
890 * Do not allow NFS export by non-root users.
891 */
892 if (fsflags & MNT_EXPORTED) {
893 error = suser(td);
894 if (error)
895 return (error);
896 }
897 /*
898 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
899 */
900 if (suser(td))
901 fsflags |= MNT_NOSUID | MNT_NODEV;
902 /*
903 * Get vnode to be covered
904 */
905 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
906 if ((error = namei(&nd)) != 0)
907 return (error);
908 NDFREE(&nd, NDF_ONLY_PNBUF);
909 vp = nd.ni_vp;
910 if (fsflags & MNT_UPDATE) {
911 if ((vp->v_vflag & VV_ROOT) == 0) {
912 vput(vp);
913 return (EINVAL);
914 }
915 mp = vp->v_mount;
916 flag = mp->mnt_flag;
917 kern_flag = mp->mnt_kern_flag;
918 /*
919 * We only allow the filesystem to be reloaded if it
920 * is currently mounted read-only.
921 */
922 if ((fsflags & MNT_RELOAD) &&
923 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
924 vput(vp);
925 return (EOPNOTSUPP); /* Needs translation */
926 }
927 /*
928 * Only root, or the user that did the original mount is
929 * permitted to update it.
930 */
931 if (mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) {
932 error = suser(td);
933 if (error) {
934 vput(vp);
935 return (error);
936 }
937 }
938 if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
939 vput(vp);
940 return (EBUSY);
941 }
942 VI_LOCK(vp);
943 if ((vp->v_iflag & VI_MOUNT) != 0 ||
944 vp->v_mountedhere != NULL) {
945 VI_UNLOCK(vp);
946 vfs_unbusy(mp, td);
947 vput(vp);
948 return (EBUSY);
949 }
950 vp->v_iflag |= VI_MOUNT;
951 VI_UNLOCK(vp);
952 mp->mnt_flag |= fsflags &
953 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
954 VOP_UNLOCK(vp, 0, td);
955 goto update;
956 }
957 /*
958 * If the user is not root, ensure that they own the directory
959 * onto which we are attempting to mount.
960 */
961 error = VOP_GETATTR(vp, &va, td->td_ucred, td);
962 if (error) {
963 vput(vp);
964 return (error);
965 }
966 if (va.va_uid != td->td_ucred->cr_uid) {
967 error = suser(td);
968 if (error) {
969 vput(vp);
970 return (error);
971 }
972 }
973 if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
974 vput(vp);
975 return (error);
976 }
977 if (vp->v_type != VDIR) {
978 vput(vp);
979 return (ENOTDIR);
980 }
981 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
982 if (!strcmp(vfsp->vfc_name, fstype))
983 break;
984 if (vfsp == NULL) {
985 /* Only load modules for root (very important!). */
986 error = suser(td);
987 if (error) {
988 vput(vp);
989 return (error);
990 }
991 error = securelevel_gt(td->td_ucred, 0);
992 if (error) {
993 vput(vp);
994 return (error);
995 }
996 error = linker_load_module(NULL, fstype, NULL, NULL, &lf);
997 if (error || lf == NULL) {
998 vput(vp);
999 if (lf == NULL)
1000 error = ENODEV;
1001 return (error);
1002 }
1003 lf->userrefs++;
1004 /* Look up again to see if the VFS was loaded. */
1005 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1006 if (!strcmp(vfsp->vfc_name, fstype))
1007 break;
1008 if (vfsp == NULL) {
1009 lf->userrefs--;
1010 linker_file_unload(lf);
1011 vput(vp);
1012 return (ENODEV);
1013 }
1014 }
1015 VI_LOCK(vp);
1016 if ((vp->v_iflag & VI_MOUNT) != 0 ||
1017 vp->v_mountedhere != NULL) {
1018 VI_UNLOCK(vp);
1019 vput(vp);
1020 return (EBUSY);
1021 }
1022 vp->v_iflag |= VI_MOUNT;
1023 VI_UNLOCK(vp);
1024
1025 /*
1026 * Allocate and initialize the filesystem.
1027 */
1028 mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
1029 TAILQ_INIT(&mp->mnt_nvnodelist);
1030 TAILQ_INIT(&mp->mnt_reservedvnlist);
1031 mp->mnt_nvnodelistsize = 0;
1032 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
1033 (void)vfs_busy(mp, LK_NOWAIT, 0, td);
1034 mp->mnt_op = vfsp->vfc_vfsops;
1035 mp->mnt_vfc = vfsp;
1036 vfsp->vfc_refcount++;
1037 mp->mnt_stat.f_type = vfsp->vfc_typenum;
1038 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
1039 strlcpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
1040 mp->mnt_vnodecovered = vp;
1041 mp->mnt_cred = crdup(td->td_ucred);
1042 mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
1043 strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
1044 mp->mnt_iosize_max = DFLTPHYS;
1045 #ifdef MAC
1046 mac_init_mount(mp);
1047 mac_create_mount(td->td_ucred, mp);
1048 #endif
1049 VOP_UNLOCK(vp, 0, td);
1050 update:
1051 /*
1052 * Check if the fs implements the old VFS_MOUNT()
1053 * function, since the old system call was used.
1054 */
1055 if (mp->mnt_op->vfs_mount == NULL) {
1056 printf("%s doesn't support the old mount syscall\n",
1057 mp->mnt_vfc->vfc_name);
1058 VI_LOCK(vp);
1059 vp->v_iflag &= ~VI_MOUNT;
1060 VI_UNLOCK(vp);
1061 if (mp->mnt_flag & MNT_UPDATE)
1062 vfs_unbusy(mp, td);
1063 else {
1064 mp->mnt_vfc->vfc_refcount--;
1065 vfs_unbusy(mp, td);
1066 #ifdef MAC
1067 mac_destroy_mount(mp);
1068 #endif
1069 crfree(mp->mnt_cred);
1070 free(mp, M_MOUNT);
1071 }
1072 vrele(vp);
1073 return (EOPNOTSUPP);
1074 }
1075
1076 /*
1077 * Set the mount level flags.
1078 */
1079 if (fsflags & MNT_RDONLY)
1080 mp->mnt_flag |= MNT_RDONLY;
1081 else if (mp->mnt_flag & MNT_RDONLY)
1082 mp->mnt_kern_flag |= MNTK_WANTRDWR;
1083 mp->mnt_flag &=~ MNT_UPDATEMASK;
1084 mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
1085 /*
1086 * Mount the filesystem.
1087 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
1088 * get. No freeing of cn_pnbuf.
1089 */
1090 error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
1091 if (mp->mnt_flag & MNT_UPDATE) {
1092 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
1093 mp->mnt_flag &= ~MNT_RDONLY;
1094 mp->mnt_flag &=~
1095 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
1096 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
1097 if (error) {
1098 mp->mnt_flag = flag;
1099 mp->mnt_kern_flag = kern_flag;
1100 }
1101 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1102 if (mp->mnt_syncer == NULL)
1103 error = vfs_allocate_syncvnode(mp);
1104 } else {
1105 if (mp->mnt_syncer != NULL)
1106 vrele(mp->mnt_syncer);
1107 mp->mnt_syncer = NULL;
1108 }
1109 vfs_unbusy(mp, td);
1110 VI_LOCK(vp);
1111 vp->v_iflag &= ~VI_MOUNT;
1112 VI_UNLOCK(vp);
1113 vrele(vp);
1114 return (error);
1115 }
1116 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1117 /*
1118 * Put the new filesystem on the mount list after root.
1119 */
1120 cache_purge(vp);
1121 if (!error) {
1122 struct vnode *newdp;
1123
1124 VI_LOCK(vp);
1125 vp->v_iflag &= ~VI_MOUNT;
1126 VI_UNLOCK(vp);
1127 vp->v_mountedhere = mp;
1128 mtx_lock(&mountlist_mtx);
1129 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1130 mtx_unlock(&mountlist_mtx);
1131 if (VFS_ROOT(mp, &newdp))
1132 panic("mount: lost mount");
1133 checkdirs(vp, newdp);
1134 vput(newdp);
1135 VOP_UNLOCK(vp, 0, td);
1136 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1137 error = vfs_allocate_syncvnode(mp);
1138 vfs_unbusy(mp, td);
1139 if ((error = VFS_START(mp, 0, td)) != 0)
1140 vrele(vp);
1141 } else {
1142 VI_LOCK(vp);
1143 vp->v_iflag &= ~VI_MOUNT;
1144 VI_UNLOCK(vp);
1145 mp->mnt_vfc->vfc_refcount--;
1146 vfs_unbusy(mp, td);
1147 #ifdef MAC
1148 mac_destroy_mount(mp);
1149 #endif
1150 crfree(mp->mnt_cred);
1151 free(mp, M_MOUNT);
1152 vput(vp);
1153 }
1154 return (error);
1155 }
1156
1157 /*
1158 * Scan all active processes to see if any of them have a current
1159 * or root directory of `olddp'. If so, replace them with the new
1160 * mount point.
1161 */
1162 static void
1163 checkdirs(olddp, newdp)
1164 struct vnode *olddp, *newdp;
1165 {
1166 struct filedesc *fdp;
1167 struct proc *p;
1168 int nrele;
1169
1170 if (vrefcnt(olddp) == 1)
1171 return;
1172 sx_slock(&allproc_lock);
1173 LIST_FOREACH(p, &allproc, p_list) {
1174 mtx_lock(&fdesc_mtx);
1175 fdp = p->p_fd;
1176 if (fdp == NULL) {
1177 mtx_unlock(&fdesc_mtx);
1178 continue;
1179 }
1180 nrele = 0;
1181 FILEDESC_LOCK(fdp);
1182 if (fdp->fd_cdir == olddp) {
1183 VREF(newdp);
1184 fdp->fd_cdir = newdp;
1185 nrele++;
1186 }
1187 if (fdp->fd_rdir == olddp) {
1188 VREF(newdp);
1189 fdp->fd_rdir = newdp;
1190 nrele++;
1191 }
1192 FILEDESC_UNLOCK(fdp);
1193 mtx_unlock(&fdesc_mtx);
1194 while (nrele--)
1195 vrele(olddp);
1196 }
1197 sx_sunlock(&allproc_lock);
1198 if (rootvnode == olddp) {
1199 vrele(rootvnode);
1200 VREF(newdp);
1201 rootvnode = newdp;
1202 }
1203 }
1204
1205 /*
1206 * Unmount a filesystem.
1207 *
1208 * Note: unmount takes a path to the vnode mounted on as argument,
1209 * not special file (as before).
1210 */
1211 #ifndef _SYS_SYSPROTO_H_
1212 struct unmount_args {
1213 char *path;
1214 int flags;
1215 };
1216 #endif
1217 /* ARGSUSED */
1218 int
1219 unmount(td, uap)
1220 struct thread *td;
1221 register struct unmount_args /* {
1222 char *path;
1223 int flags;
1224 } */ *uap;
1225 {
1226 register struct vnode *vp;
1227 struct mount *mp;
1228 int error;
1229 struct nameidata nd;
1230
1231 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
1232 if ((error = namei(&nd)) != 0)
1233 return (error);
1234 vp = nd.ni_vp;
1235 NDFREE(&nd, NDF_ONLY_PNBUF);
1236 mp = vp->v_mount;
1237
1238 /*
1239 * Only root, or the user that did the original mount is
1240 * permitted to unmount this filesystem.
1241 */
1242 if (mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) {
1243 error = suser(td);
1244 if (error) {
1245 vput(vp);
1246 return (error);
1247 }
1248 }
1249
1250 /*
1251 * Don't allow unmounting the root filesystem.
1252 */
1253 if (mp->mnt_flag & MNT_ROOTFS) {
1254 vput(vp);
1255 return (EINVAL);
1256 }
1257
1258 /*
1259 * Must be the root of the filesystem
1260 */
1261 if ((vp->v_vflag & VV_ROOT) == 0) {
1262 vput(vp);
1263 return (EINVAL);
1264 }
1265 vput(vp);
1266 return (dounmount(mp, uap->flags, td));
1267 }
1268
1269 /*
1270 * Do the actual filesystem unmount.
1271 */
1272 int
1273 dounmount(mp, flags, td)
1274 struct mount *mp;
1275 int flags;
1276 struct thread *td;
1277 {
1278 struct vnode *coveredvp, *fsrootvp;
1279 int error;
1280 int async_flag;
1281
1282 mtx_lock(&mountlist_mtx);
1283 if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1284 mtx_unlock(&mountlist_mtx);
1285 return (EBUSY);
1286 }
1287 mp->mnt_kern_flag |= MNTK_UNMOUNT;
1288 /* Allow filesystems to detect that a forced unmount is in progress. */
1289 if (flags & MNT_FORCE)
1290 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1291 error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1292 ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
1293 if (error) {
1294 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1295 if (mp->mnt_kern_flag & MNTK_MWAIT)
1296 wakeup(mp);
1297 return (error);
1298 }
1299 vn_start_write(NULL, &mp, V_WAIT);
1300
1301 if (mp->mnt_flag & MNT_EXPUBLIC)
1302 vfs_setpublicfs(NULL, NULL, NULL);
1303
1304 vfs_msync(mp, MNT_WAIT);
1305 async_flag = mp->mnt_flag & MNT_ASYNC;
1306 mp->mnt_flag &=~ MNT_ASYNC;
1307 cache_purgevfs(mp); /* remove cache entries for this file sys */
1308 if (mp->mnt_syncer != NULL)
1309 vrele(mp->mnt_syncer);
1310 /* Move process cdir/rdir refs on fs root to underlying vnode. */
1311 if (VFS_ROOT(mp, &fsrootvp) == 0) {
1312 if (mp->mnt_vnodecovered != NULL)
1313 checkdirs(fsrootvp, mp->mnt_vnodecovered);
1314 if (fsrootvp == rootvnode) {
1315 vrele(rootvnode);
1316 rootvnode = NULL;
1317 }
1318 vput(fsrootvp);
1319 }
1320 if (((mp->mnt_flag & MNT_RDONLY) ||
1321 (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
1322 (flags & MNT_FORCE)) {
1323 error = VFS_UNMOUNT(mp, flags, td);
1324 }
1325 vn_finished_write(mp);
1326 if (error) {
1327 /* Undo cdir/rdir and rootvnode changes made above. */
1328 if (VFS_ROOT(mp, &fsrootvp) == 0) {
1329 if (mp->mnt_vnodecovered != NULL)
1330 checkdirs(mp->mnt_vnodecovered, fsrootvp);
1331 if (rootvnode == NULL) {
1332 rootvnode = fsrootvp;
1333 vref(rootvnode);
1334 }
1335 vput(fsrootvp);
1336 }
1337 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1338 (void) vfs_allocate_syncvnode(mp);
1339 mtx_lock(&mountlist_mtx);
1340 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1341 mp->mnt_flag |= async_flag;
1342 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
1343 &mountlist_mtx, td);
1344 if (mp->mnt_kern_flag & MNTK_MWAIT)
1345 wakeup(mp);
1346 return (error);
1347 }
1348 crfree(mp->mnt_cred);
1349 mtx_lock(&mountlist_mtx);
1350 TAILQ_REMOVE(&mountlist, mp, mnt_list);
1351 if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1352 coveredvp->v_mountedhere = NULL;
1353 mp->mnt_vfc->vfc_refcount--;
1354 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
1355 panic("unmount: dangling vnode");
1356 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
1357 lockdestroy(&mp->mnt_lock);
1358 if (coveredvp != NULL)
1359 vrele(coveredvp);
1360 if (mp->mnt_kern_flag & MNTK_MWAIT)
1361 wakeup(mp);
1362 #ifdef MAC
1363 mac_destroy_mount(mp);
1364 #endif
1365 if (mp->mnt_op->vfs_mount == NULL)
1366 vfs_freeopts(mp->mnt_opt);
1367 free(mp, M_MOUNT);
1368 return (0);
1369 }
1370
1371 /*
1372 * Lookup a filesystem type, and if found allocate and initialize
1373 * a mount structure for it.
1374 *
1375 * Devname is usually updated by mount(8) after booting.
1376 */
1377 int
1378 vfs_rootmountalloc(fstypename, devname, mpp)
1379 char *fstypename;
1380 char *devname;
1381 struct mount **mpp;
1382 {
1383 struct thread *td = curthread; /* XXX */
1384 struct vfsconf *vfsp;
1385 struct mount *mp;
1386
1387 if (fstypename == NULL)
1388 return (ENODEV);
1389 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1390 if (!strcmp(vfsp->vfc_name, fstypename))
1391 break;
1392 if (vfsp == NULL)
1393 return (ENODEV);
1394 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
1395 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
1396 (void)vfs_busy(mp, LK_NOWAIT, 0, td);
1397 TAILQ_INIT(&mp->mnt_nvnodelist);
1398 TAILQ_INIT(&mp->mnt_reservedvnlist);
1399 mp->mnt_nvnodelistsize = 0;
1400 mp->mnt_vfc = vfsp;
1401 mp->mnt_op = vfsp->vfc_vfsops;
1402 mp->mnt_flag = MNT_RDONLY;
1403 mp->mnt_vnodecovered = NULLVP;
1404 mp->mnt_cred = crdup(td->td_ucred);
1405 vfsp->vfc_refcount++;
1406 mp->mnt_iosize_max = DFLTPHYS;
1407 mp->mnt_stat.f_type = vfsp->vfc_typenum;
1408 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
1409 strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
1410 mp->mnt_stat.f_mntonname[0] = '/';
1411 mp->mnt_stat.f_mntonname[1] = 0;
1412 strlcpy(mp->mnt_stat.f_mntfromname, devname, MNAMELEN);
1413 #ifdef MAC
1414 mac_init_mount(mp);
1415 mac_create_mount(td->td_ucred, mp);
1416 #endif
1417 *mpp = mp;
1418 return (0);
1419 }
1420
1421 /*
1422 * Find and mount the root filesystem
1423 */
1424 void
1425 vfs_mountroot(void)
1426 {
1427 char *cp;
1428 int i, error;
1429
1430 g_waitidle();
1431
1432 /*
1433 * The root filesystem information is compiled in, and we are
1434 * booted with instructions to use it.
1435 */
1436 #ifdef ROOTDEVNAME
1437 if ((boothowto & RB_DFLTROOT) &&
1438 !vfs_mountroot_try(ROOTDEVNAME))
1439 return;
1440 #endif
1441 /*
1442 * We are booted with instructions to prompt for the root filesystem,
1443 * or to use the compiled-in default when it doesn't exist.
1444 */
1445 if (boothowto & (RB_DFLTROOT | RB_ASKNAME)) {
1446 if (!vfs_mountroot_ask())
1447 return;
1448 }
1449
1450 /*
1451 * We've been given the generic "use CDROM as root" flag. This is
1452 * necessary because one media may be used in many different
1453 * devices, so we need to search for them.
1454 */
1455 if (boothowto & RB_CDROM) {
1456 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1457 if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1458 return;
1459 }
1460 }
1461
1462 /*
1463 * Try to use the value read by the loader from /etc/fstab, or
1464 * supplied via some other means. This is the preferred
1465 * mechanism.
1466 */
1467 if ((cp = getenv("vfs.root.mountfrom")) != NULL) {
1468 error = vfs_mountroot_try(cp);
1469 freeenv(cp);
1470 if (!error)
1471 return;
1472 }
1473
1474 /*
1475 * Try values that may have been computed by the machine-dependant
1476 * legacy code.
1477 */
1478 if (!vfs_mountroot_try(rootdevnames[0]))
1479 return;
1480 if (!vfs_mountroot_try(rootdevnames[1]))
1481 return;
1482
1483 /*
1484 * If we have a compiled-in default, and haven't already tried it, try
1485 * it now.
1486 */
1487 #ifdef ROOTDEVNAME
1488 if (!(boothowto & RB_DFLTROOT))
1489 if (!vfs_mountroot_try(ROOTDEVNAME))
1490 return;
1491 #endif
1492
1493 /*
1494 * Everything so far has failed, prompt on the console if we haven't
1495 * already tried that.
1496 */
1497 if (!(boothowto & (RB_DFLTROOT | RB_ASKNAME)) && !vfs_mountroot_ask())
1498 return;
1499 panic("Root mount failed, startup aborted.");
1500 }
1501
1502 /*
1503 * Mount (mountfrom) as the root filesystem.
1504 */
1505 static int
1506 vfs_mountroot_try(char *mountfrom)
1507 {
1508 struct mount *mp;
1509 char *vfsname, *path;
1510 const char *devname;
1511 int error;
1512 char patt[32];
1513 int s;
1514
1515 vfsname = NULL;
1516 path = NULL;
1517 mp = NULL;
1518 error = EINVAL;
1519
1520 if (mountfrom == NULL)
1521 return(error); /* don't complain */
1522
1523 s = splcam(); /* Overkill, but annoying without it */
1524 printf("Mounting root from %s\n", mountfrom);
1525 splx(s);
1526
1527 /* parse vfs name and path */
1528 vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1529 path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1530 vfsname[0] = path[0] = 0;
1531 sprintf(patt, "%%%d[a-z0-9]:%%%zds", MFSNAMELEN, MNAMELEN);
1532 if (sscanf(mountfrom, patt, vfsname, path) < 1)
1533 goto done;
1534
1535 /* allocate a root mount */
1536 error = vfs_rootmountalloc(vfsname, path[0] != 0 ? path : ROOTNAME,
1537 &mp);
1538 if (error != 0) {
1539 printf("Can't allocate root mount for filesystem '%s': %d\n",
1540 vfsname, error);
1541 goto done;
1542 }
1543 mp->mnt_flag |= MNT_ROOTFS;
1544
1545 /* do our best to set rootdev */
1546 if ((path[0] != 0) && setrootbyname(path))
1547 printf("setrootbyname failed\n");
1548
1549 /* If the root device is a type "memory disk", mount RW */
1550 if (rootdev != NODEV && devsw(rootdev) != NULL) {
1551 devname = devtoname(rootdev);
1552 if (devname[0] == 'm' && devname[1] == 'd')
1553 mp->mnt_flag &= ~MNT_RDONLY;
1554 }
1555
1556 /*
1557 * Set the mount path to be something useful, because the
1558 * filesystem code isn't responsible now for initialising
1559 * f_mntonname unless they want to override the default
1560 * (which is `path'.)
1561 */
1562 strlcpy(mp->mnt_stat.f_mntonname, "/", MNAMELEN);
1563
1564 error = VFS_MOUNT(mp, NULL, NULL, NULL, curthread);
1565
1566 done:
1567 if (vfsname != NULL)
1568 free(vfsname, M_MOUNT);
1569 if (path != NULL)
1570 free(path, M_MOUNT);
1571 if (error != 0) {
1572 if (mp != NULL) {
1573 vfs_unbusy(mp, curthread);
1574 #ifdef MAC
1575 mac_destroy_mount(mp);
1576 #endif
1577 crfree(mp->mnt_cred);
1578 free(mp, M_MOUNT);
1579 }
1580 printf("Root mount failed: %d\n", error);
1581 } else {
1582
1583 /* register with list of mounted filesystems */
1584 mtx_lock(&mountlist_mtx);
1585 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1586 mtx_unlock(&mountlist_mtx);
1587
1588 /* sanity check system clock against root fs timestamp */
1589 inittodr(mp->mnt_time);
1590 vfs_unbusy(mp, curthread);
1591 error = VFS_START(mp, 0, curthread);
1592 }
1593 return(error);
1594 }
1595
1596 /*
1597 * Spin prompting on the console for a suitable root filesystem
1598 */
1599 static int
1600 vfs_mountroot_ask(void)
1601 {
1602 char name[128];
1603 int i;
1604 dev_t dev;
1605
1606 for(;;) {
1607 printf("\nManual root filesystem specification:\n");
1608 printf(" <fstype>:<device> Mount <device> using filesystem <fstype>\n");
1609 #if defined(__i386__) || defined(__ia64__)
1610 printf(" eg. ufs:da0s1a\n");
1611 #else
1612 printf(" eg. ufs:/dev/da0a\n");
1613 #endif
1614 printf(" ? List valid disk boot devices\n");
1615 printf(" <empty line> Abort manual input\n");
1616 printf("\nmountroot> ");
1617 gets(name);
1618 if (name[0] == 0)
1619 return(1);
1620 if (name[0] == '?') {
1621 if (!g_dev_print()) {
1622 printf("Possibly valid devices for 'ufs' root:\n");
1623 for (i = 0; i < NUMCDEVSW; i++) {
1624 dev = makedev(i, 0);
1625 if (devsw(dev) != NULL)
1626 printf(" \"%s\"", devsw(dev)->d_name);
1627 }
1628 }
1629 printf("\n");
1630 continue;
1631 }
1632 if (!vfs_mountroot_try(name))
1633 return(0);
1634 }
1635 }
1636
1637 /*
1638 * Local helper function for vfs_mountroot_ask.
1639 */
1640 static void
1641 gets(char *cp)
1642 {
1643 char *lp;
1644 int c;
1645
1646 lp = cp;
1647 for (;;) {
1648 printf("%c", c = cngetc() & 0177);
1649 switch (c) {
1650 case -1:
1651 case '\n':
1652 case '\r':
1653 *lp++ = '\0';
1654 return;
1655 case '\b':
1656 case '\177':
1657 if (lp > cp) {
1658 printf(" \b");
1659 lp--;
1660 }
1661 continue;
1662 case '#':
1663 lp--;
1664 if (lp < cp)
1665 lp = cp;
1666 continue;
1667 case '@':
1668 case 'u' & 037:
1669 lp = cp;
1670 printf("%c", '\n');
1671 continue;
1672 default:
1673 *lp++ = c;
1674 }
1675 }
1676 }
1677
1678 /*
1679 * Convert a given name to the dev_t of the disk-like device
1680 * it refers to.
1681 */
1682 dev_t
1683 getdiskbyname(char *name) {
1684 char *cp;
1685 dev_t dev;
1686
1687 cp = name;
1688 if (!bcmp(cp, "/dev/", 5))
1689 cp += 5;
1690
1691 dev = NODEV;
1692 EVENTHANDLER_INVOKE(dev_clone, cp, strlen(cp), &dev);
1693 return (dev);
1694 }
1695
1696 /*
1697 * Set rootdev to match (name), given that we expect it to
1698 * refer to a disk-like device.
1699 */
1700 static int
1701 setrootbyname(char *name)
1702 {
1703 dev_t diskdev;
1704
1705 diskdev = getdiskbyname(name);
1706 if (diskdev != NODEV) {
1707 rootdev = diskdev;
1708 return (0);
1709 }
1710
1711 return (1);
1712 }
1713
1714 /* Show the dev_t for a disk specified by name */
1715 #ifdef DDB
1716 DB_SHOW_COMMAND(disk, db_getdiskbyname)
1717 {
1718 dev_t dev;
1719
1720 if (modif[0] == '\0') {
1721 db_error("usage: show disk/devicename");
1722 return;
1723 }
1724 dev = getdiskbyname(modif);
1725 if (dev != NODEV)
1726 db_printf("dev_t = %p\n", dev);
1727 else
1728 db_printf("No disk device matched.\n");
1729 }
1730 #endif
1731
1732 /*
1733 * Get a mount option by its name.
1734 *
1735 * Return 0 if the option was found, ENOENT otherwise.
1736 * If len is non-NULL it will be filled with the length
1737 * of the option. If buf is non-NULL, it will be filled
1738 * with the address of the option.
1739 */
1740 int
1741 vfs_getopt(opts, name, buf, len)
1742 struct vfsoptlist *opts;
1743 const char *name;
1744 void **buf;
1745 int *len;
1746 {
1747 struct vfsopt *opt;
1748
1749 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1750
1751 TAILQ_FOREACH(opt, opts, link) {
1752 if (strcmp(name, opt->name) == 0) {
1753 if (len != NULL)
1754 *len = opt->len;
1755 if (buf != NULL)
1756 *buf = opt->value;
1757 return (0);
1758 }
1759 }
1760 return (ENOENT);
1761 }
1762
1763 /*
1764 * Find and copy a mount option.
1765 *
1766 * The size of the buffer has to be specified
1767 * in len, if it is not the same length as the
1768 * mount option, EINVAL is returned.
1769 * Returns ENOENT if the option is not found.
1770 */
1771 int
1772 vfs_copyopt(opts, name, dest, len)
1773 struct vfsoptlist *opts;
1774 const char *name;
1775 void *dest;
1776 int len;
1777 {
1778 struct vfsopt *opt;
1779
1780 KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
1781
1782 TAILQ_FOREACH(opt, opts, link) {
1783 if (strcmp(name, opt->name) == 0) {
1784 if (len != opt->len)
1785 return (EINVAL);
1786 bcopy(opt->value, dest, opt->len);
1787 return (0);
1788 }
1789 }
1790 return (ENOENT);
1791 }
Cache object: 0c95bd32065333f5339058e7fcd2c546
|