FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_subr.c
1 /* $NetBSD: vfs_subr.c,v 1.218.2.5 2005/12/29 01:37:32 riz Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Copyright (c) 1989, 1993
42 * The Regents of the University of California. All rights reserved.
43 * (c) UNIX System Laboratories, Inc.
44 * All or some portions of this file are derived from material licensed
45 * to the University of California by American Telephone and Telegraph
46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47 * the permission of UNIX System Laboratories, Inc.
48 *
49 * Redistribution and use in source and binary forms, with or without
50 * modification, are permitted provided that the following conditions
51 * are met:
52 * 1. Redistributions of source code must retain the above copyright
53 * notice, this list of conditions and the following disclaimer.
54 * 2. Redistributions in binary form must reproduce the above copyright
55 * notice, this list of conditions and the following disclaimer in the
56 * documentation and/or other materials provided with the distribution.
57 * 3. Neither the name of the University nor the names of its contributors
58 * may be used to endorse or promote products derived from this software
59 * without specific prior written permission.
60 *
61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
71 * SUCH DAMAGE.
72 *
73 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
74 */
75
76 /*
77 * External virtual filesystem routines
78 */
79
80 #include <sys/cdefs.h>
81 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.218.2.5 2005/12/29 01:37:32 riz Exp $");
82
83 #include "opt_inet.h"
84 #include "opt_ddb.h"
85 #include "opt_compat_netbsd.h"
86 #include "opt_compat_43.h"
87
88 #include <sys/param.h>
89 #include <sys/systm.h>
90 #include <sys/proc.h>
91 #include <sys/kernel.h>
92 #include <sys/mount.h>
93 #include <sys/time.h>
94 #include <sys/event.h>
95 #include <sys/fcntl.h>
96 #include <sys/vnode.h>
97 #include <sys/stat.h>
98 #include <sys/namei.h>
99 #include <sys/ucred.h>
100 #include <sys/buf.h>
101 #include <sys/errno.h>
102 #include <sys/malloc.h>
103 #include <sys/domain.h>
104 #include <sys/mbuf.h>
105 #include <sys/sa.h>
106 #include <sys/syscallargs.h>
107 #include <sys/device.h>
108 #include <sys/dirent.h>
109 #include <sys/filedesc.h>
110
111 #include <miscfs/specfs/specdev.h>
112 #include <miscfs/genfs/genfs.h>
113 #include <miscfs/syncfs/syncfs.h>
114
115 #include <netinet/in.h>
116
117 #include <uvm/uvm.h>
118 #include <uvm/uvm_ddb.h>
119
120 #include <netinet/in.h>
121
122 #include <sys/sysctl.h>
123
124 const enum vtype iftovt_tab[16] = {
125 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
126 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
127 };
128 const int vttoif_tab[9] = {
129 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
130 S_IFSOCK, S_IFIFO, S_IFMT,
131 };
132
133 int doforce = 1; /* 1 => permit forcible unmounting */
134 int prtactive = 0; /* 1 => print out reclaim of active vnodes */
135
136 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */
137
138 /*
139 * Insq/Remq for the vnode usage lists.
140 */
141 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
142 #define bufremvn(bp) { \
143 LIST_REMOVE(bp, b_vnbufs); \
144 (bp)->b_vnbufs.le_next = NOLIST; \
145 }
146 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */
147 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
148 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
149
150 struct mntlist mountlist = /* mounted filesystem list */
151 CIRCLEQ_HEAD_INITIALIZER(mountlist);
152 struct vfs_list_head vfs_list = /* vfs list */
153 LIST_HEAD_INITIALIZER(vfs_list);
154
155 struct nfs_public nfs_pub; /* publicly exported FS */
156
157 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
158 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
159 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
160 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
161 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
162
163 /* XXX - gross; single global lock to protect v_numoutput */
164 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER;
165
166 /*
167 * These define the root filesystem and device.
168 */
169 struct mount *rootfs;
170 struct vnode *rootvnode;
171 struct device *root_device; /* root device */
172
173 struct pool vnode_pool; /* memory pool for vnodes */
174
175 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
176
177 /*
178 * Local declarations.
179 */
180 void insmntque(struct vnode *, struct mount *);
181 int getdevvp(dev_t, struct vnode **, enum vtype);
182 void vgoneall(struct vnode *);
183
184 void vclean(struct vnode *, int, struct proc *);
185
186 static int vfs_hang_addrlist(struct mount *, struct netexport *,
187 struct export_args *);
188 static int vfs_free_netcred(struct radix_node *, void *);
189 static void vfs_free_addrlist(struct netexport *);
190 static struct vnode *getcleanvnode(struct proc *);
191
192 #ifdef DEBUG
193 void printlockedvnodes(void);
194 #endif
195
196 /*
197 * Initialize the vnode management data structures.
198 */
199 void
200 vntblinit()
201 {
202
203 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
204 &pool_allocator_nointr);
205
206 /*
207 * Initialize the filesystem syncer.
208 */
209 vn_initialize_syncerd();
210 }
211
212 int
213 vfs_drainvnodes(long target, struct proc *p)
214 {
215
216 simple_lock(&vnode_free_list_slock);
217 while (numvnodes > target) {
218 struct vnode *vp;
219
220 vp = getcleanvnode(p);
221 if (vp == NULL)
222 return EBUSY; /* give up */
223 pool_put(&vnode_pool, vp);
224 simple_lock(&vnode_free_list_slock);
225 numvnodes--;
226 }
227 simple_unlock(&vnode_free_list_slock);
228
229 return 0;
230 }
231
232 /*
233 * grab a vnode from freelist and clean it.
234 */
235 struct vnode *
236 getcleanvnode(p)
237 struct proc *p;
238 {
239 struct vnode *vp;
240 struct mount *mp;
241 struct freelst *listhd;
242
243 LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock));
244 if ((vp = TAILQ_FIRST(listhd = &vnode_free_list)) == NULL)
245 vp = TAILQ_FIRST(listhd = &vnode_hold_list);
246 for (; vp != NULL; vp = TAILQ_NEXT(vp, v_freelist)) {
247 if (!simple_lock_try(&vp->v_interlock))
248 continue;
249 /*
250 * as our lwp might hold the underlying vnode locked,
251 * don't try to reclaim the VLAYER vnode if it's locked.
252 */
253 if ((vp->v_flag & VXLOCK) == 0 &&
254 ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
255 if (vn_start_write(vp, &mp, V_NOWAIT) == 0)
256 break;
257 }
258 mp = NULL;
259 simple_unlock(&vp->v_interlock);
260 }
261
262 if (vp == NULLVP) {
263 simple_unlock(&vnode_free_list_slock);
264 return NULLVP;
265 }
266
267 if (vp->v_usecount)
268 panic("free vnode isn't, vp %p", vp);
269 TAILQ_REMOVE(listhd, vp, v_freelist);
270 /* see comment on why 0xdeadb is set at end of vgone (below) */
271 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
272 simple_unlock(&vnode_free_list_slock);
273 vp->v_lease = NULL;
274
275 if (vp->v_type != VBAD)
276 vgonel(vp, p);
277 else
278 simple_unlock(&vp->v_interlock);
279 vn_finished_write(mp, 0);
280 #ifdef DIAGNOSTIC
281 if (vp->v_data || vp->v_uobj.uo_npages ||
282 TAILQ_FIRST(&vp->v_uobj.memq))
283 panic("cleaned vnode isn't, vp %p", vp);
284 if (vp->v_numoutput)
285 panic("clean vnode has pending I/O's, vp %p", vp);
286 #endif
287 KASSERT((vp->v_flag & VONWORKLST) == 0);
288
289 return vp;
290 }
291
292 /*
293 * Mark a mount point as busy. Used to synchronize access and to delay
294 * unmounting. Interlock is not released on failure.
295 */
296 int
297 vfs_busy(mp, flags, interlkp)
298 struct mount *mp;
299 int flags;
300 struct simplelock *interlkp;
301 {
302 int lkflags;
303
304 while (mp->mnt_iflag & IMNT_UNMOUNT) {
305 int gone;
306
307 if (flags & LK_NOWAIT)
308 return (ENOENT);
309 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
310 && mp->mnt_unmounter == curproc)
311 return (EDEADLK);
312 if (interlkp)
313 simple_unlock(interlkp);
314 /*
315 * Since all busy locks are shared except the exclusive
316 * lock granted when unmounting, the only place that a
317 * wakeup needs to be done is at the release of the
318 * exclusive lock at the end of dounmount.
319 *
320 * XXX MP: add spinlock protecting mnt_wcnt here once you
321 * can atomically unlock-and-sleep.
322 */
323 mp->mnt_wcnt++;
324 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
325 mp->mnt_wcnt--;
326 gone = mp->mnt_iflag & IMNT_GONE;
327
328 if (mp->mnt_wcnt == 0)
329 wakeup(&mp->mnt_wcnt);
330 if (interlkp)
331 simple_lock(interlkp);
332 if (gone)
333 return (ENOENT);
334 }
335 lkflags = LK_SHARED;
336 if (interlkp)
337 lkflags |= LK_INTERLOCK;
338 if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
339 panic("vfs_busy: unexpected lock failure");
340 return (0);
341 }
342
343 /*
344 * Free a busy filesystem.
345 */
346 void
347 vfs_unbusy(mp)
348 struct mount *mp;
349 {
350
351 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
352 }
353
354 /*
355 * Lookup a filesystem type, and if found allocate and initialize
356 * a mount structure for it.
357 *
358 * Devname is usually updated by mount(8) after booting.
359 */
360 int
361 vfs_rootmountalloc(fstypename, devname, mpp)
362 char *fstypename;
363 char *devname;
364 struct mount **mpp;
365 {
366 struct vfsops *vfsp = NULL;
367 struct mount *mp;
368
369 LIST_FOREACH(vfsp, &vfs_list, vfs_list)
370 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
371 break;
372
373 if (vfsp == NULL)
374 return (ENODEV);
375 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
376 memset((char *)mp, 0, (u_long)sizeof(struct mount));
377 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
378 (void)vfs_busy(mp, LK_NOWAIT, 0);
379 LIST_INIT(&mp->mnt_vnodelist);
380 mp->mnt_op = vfsp;
381 mp->mnt_flag = MNT_RDONLY;
382 mp->mnt_vnodecovered = NULLVP;
383 vfsp->vfs_refcount++;
384 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
385 mp->mnt_stat.f_mntonname[0] = '/';
386 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
387 *mpp = mp;
388 return (0);
389 }
390
391 /*
392 * Lookup a mount point by filesystem identifier.
393 */
394 struct mount *
395 vfs_getvfs(fsid)
396 fsid_t *fsid;
397 {
398 struct mount *mp;
399
400 simple_lock(&mountlist_slock);
401 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
402 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
403 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
404 simple_unlock(&mountlist_slock);
405 return (mp);
406 }
407 }
408 simple_unlock(&mountlist_slock);
409 return ((struct mount *)0);
410 }
411
412 /*
413 * Get a new unique fsid
414 */
415 void
416 vfs_getnewfsid(mp)
417 struct mount *mp;
418 {
419 static u_short xxxfs_mntid;
420 fsid_t tfsid;
421 int mtype;
422
423 simple_lock(&mntid_slock);
424 mtype = makefstype(mp->mnt_op->vfs_name);
425 mp->mnt_stat.f_fsid.val[0] = makedev(mtype, 0);
426 mp->mnt_stat.f_fsid.val[1] = mtype;
427 if (xxxfs_mntid == 0)
428 ++xxxfs_mntid;
429 tfsid.val[0] = makedev(mtype & 0xff, xxxfs_mntid);
430 tfsid.val[1] = mtype;
431 if (!CIRCLEQ_EMPTY(&mountlist)) {
432 while (vfs_getvfs(&tfsid)) {
433 tfsid.val[0]++;
434 xxxfs_mntid++;
435 }
436 }
437 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
438 simple_unlock(&mntid_slock);
439 }
440
441 /*
442 * Make a 'unique' number from a mount type name.
443 */
444 long
445 makefstype(type)
446 const char *type;
447 {
448 long rv;
449
450 for (rv = 0; *type; type++) {
451 rv <<= 2;
452 rv ^= *type;
453 }
454 return rv;
455 }
456
457
458 /*
459 * Set vnode attributes to VNOVAL
460 */
461 void
462 vattr_null(vap)
463 struct vattr *vap;
464 {
465
466 vap->va_type = VNON;
467
468 /*
469 * Assign individually so that it is safe even if size and
470 * sign of each member are varied.
471 */
472 vap->va_mode = VNOVAL;
473 vap->va_nlink = VNOVAL;
474 vap->va_uid = VNOVAL;
475 vap->va_gid = VNOVAL;
476 vap->va_fsid = VNOVAL;
477 vap->va_fileid = VNOVAL;
478 vap->va_size = VNOVAL;
479 vap->va_blocksize = VNOVAL;
480 vap->va_atime.tv_sec =
481 vap->va_mtime.tv_sec =
482 vap->va_ctime.tv_sec =
483 vap->va_birthtime.tv_sec = VNOVAL;
484 vap->va_atime.tv_nsec =
485 vap->va_mtime.tv_nsec =
486 vap->va_ctime.tv_nsec =
487 vap->va_birthtime.tv_nsec = VNOVAL;
488 vap->va_gen = VNOVAL;
489 vap->va_flags = VNOVAL;
490 vap->va_rdev = VNOVAL;
491 vap->va_bytes = VNOVAL;
492 vap->va_vaflags = 0;
493 }
494
495 /*
496 * Routines having to do with the management of the vnode table.
497 */
498 extern int (**dead_vnodeop_p)(void *);
499 long numvnodes;
500
501 /*
502 * Return the next vnode from the free list.
503 */
504 int
505 getnewvnode(tag, mp, vops, vpp)
506 enum vtagtype tag;
507 struct mount *mp;
508 int (**vops)(void *);
509 struct vnode **vpp;
510 {
511 extern struct uvm_pagerops uvm_vnodeops;
512 struct uvm_object *uobj;
513 struct proc *p = curproc; /* XXX */
514 static int toggle;
515 struct vnode *vp;
516 int error = 0, tryalloc;
517
518 try_again:
519 if (mp) {
520 /*
521 * Mark filesystem busy while we're creating a vnode.
522 * If unmount is in progress, this will wait; if the
523 * unmount succeeds (only if umount -f), this will
524 * return an error. If the unmount fails, we'll keep
525 * going afterwards.
526 * (This puts the per-mount vnode list logically under
527 * the protection of the vfs_busy lock).
528 */
529 error = vfs_busy(mp, LK_RECURSEFAIL, 0);
530 if (error && error != EDEADLK)
531 return error;
532 }
533
534 /*
535 * We must choose whether to allocate a new vnode or recycle an
536 * existing one. The criterion for allocating a new one is that
537 * the total number of vnodes is less than the number desired or
538 * there are no vnodes on either free list. Generally we only
539 * want to recycle vnodes that have no buffers associated with
540 * them, so we look first on the vnode_free_list. If it is empty,
541 * we next consider vnodes with referencing buffers on the
542 * vnode_hold_list. The toggle ensures that half the time we
543 * will use a buffer from the vnode_hold_list, and half the time
544 * we will allocate a new one unless the list has grown to twice
545 * the desired size. We are reticent to recycle vnodes from the
546 * vnode_hold_list because we will lose the identity of all its
547 * referencing buffers.
548 */
549
550 vp = NULL;
551
552 simple_lock(&vnode_free_list_slock);
553
554 toggle ^= 1;
555 if (numvnodes > 2 * desiredvnodes)
556 toggle = 0;
557
558 tryalloc = numvnodes < desiredvnodes ||
559 (TAILQ_FIRST(&vnode_free_list) == NULL &&
560 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
561
562 if (tryalloc &&
563 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
564 numvnodes++;
565 simple_unlock(&vnode_free_list_slock);
566 memset(vp, 0, sizeof(*vp));
567 simple_lock_init(&vp->v_interlock);
568 uobj = &vp->v_uobj;
569 uobj->pgops = &uvm_vnodeops;
570 uobj->uo_npages = 0;
571 TAILQ_INIT(&uobj->memq);
572 } else {
573 vp = getcleanvnode(p);
574 /*
575 * Unless this is a bad time of the month, at most
576 * the first NCPUS items on the free list are
577 * locked, so this is close enough to being empty.
578 */
579 if (vp == NULLVP) {
580 if (mp && error != EDEADLK)
581 vfs_unbusy(mp);
582 if (tryalloc) {
583 printf("WARNING: unable to allocate new "
584 "vnode, retrying...\n");
585 (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
586 goto try_again;
587 }
588 tablefull("vnode", "increase kern.maxvnodes or NVNODE");
589 *vpp = 0;
590 return (ENFILE);
591 }
592 vp->v_flag = 0;
593 vp->v_socket = NULL;
594 #ifdef VERIFIED_EXEC
595 vp->fp_status = FINGERPRINT_INVALID;
596 #endif
597 }
598 vp->v_type = VNON;
599 vp->v_vnlock = &vp->v_lock;
600 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
601 cache_purge(vp);
602 vp->v_tag = tag;
603 vp->v_op = vops;
604 insmntque(vp, mp);
605 *vpp = vp;
606 vp->v_usecount = 1;
607 vp->v_data = 0;
608 simple_lock_init(&vp->v_uobj.vmobjlock);
609
610 /*
611 * initialize uvm_object within vnode.
612 */
613
614 uobj = &vp->v_uobj;
615 KASSERT(uobj->pgops == &uvm_vnodeops);
616 KASSERT(uobj->uo_npages == 0);
617 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
618 vp->v_size = VSIZENOTSET;
619
620 if (mp && error != EDEADLK)
621 vfs_unbusy(mp);
622 return (0);
623 }
624
625 /*
626 * This is really just the reverse of getnewvnode(). Needed for
627 * VFS_VGET functions who may need to push back a vnode in case
628 * of a locking race.
629 */
630 void
631 ungetnewvnode(vp)
632 struct vnode *vp;
633 {
634 #ifdef DIAGNOSTIC
635 if (vp->v_usecount != 1)
636 panic("ungetnewvnode: busy vnode");
637 #endif
638 vp->v_usecount--;
639 insmntque(vp, NULL);
640 vp->v_type = VBAD;
641
642 simple_lock(&vp->v_interlock);
643 /*
644 * Insert at head of LRU list
645 */
646 simple_lock(&vnode_free_list_slock);
647 if (vp->v_holdcnt > 0)
648 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
649 else
650 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
651 simple_unlock(&vnode_free_list_slock);
652 simple_unlock(&vp->v_interlock);
653 }
654
655 /*
656 * Move a vnode from one mount queue to another.
657 */
658 void
659 insmntque(vp, mp)
660 struct vnode *vp;
661 struct mount *mp;
662 {
663
664 #ifdef DIAGNOSTIC
665 if ((mp != NULL) &&
666 (mp->mnt_iflag & IMNT_UNMOUNT) &&
667 !(mp->mnt_flag & MNT_SOFTDEP) &&
668 vp->v_tag != VT_VFS) {
669 panic("insmntque into dying filesystem");
670 }
671 #endif
672
673 simple_lock(&mntvnode_slock);
674 /*
675 * Delete from old mount point vnode list, if on one.
676 */
677 if (vp->v_mount != NULL)
678 LIST_REMOVE(vp, v_mntvnodes);
679 /*
680 * Insert into list of vnodes for the new mount point, if available.
681 */
682 if ((vp->v_mount = mp) != NULL)
683 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
684 simple_unlock(&mntvnode_slock);
685 }
686
687 /*
688 * Update outstanding I/O count and do wakeup if requested.
689 */
690 void
691 vwakeup(bp)
692 struct buf *bp;
693 {
694 struct vnode *vp;
695
696 if ((vp = bp->b_vp) != NULL) {
697 /* XXX global lock hack
698 * can't use v_interlock here since this is called
699 * in interrupt context from biodone().
700 */
701 simple_lock(&global_v_numoutput_slock);
702 if (--vp->v_numoutput < 0)
703 panic("vwakeup: neg numoutput, vp %p", vp);
704 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
705 vp->v_flag &= ~VBWAIT;
706 wakeup((caddr_t)&vp->v_numoutput);
707 }
708 simple_unlock(&global_v_numoutput_slock);
709 }
710 }
711
712 /*
713 * Flush out and invalidate all buffers associated with a vnode.
714 * Called with the underlying vnode locked, which should prevent new dirty
715 * buffers from being queued.
716 */
717 int
718 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
719 struct vnode *vp;
720 int flags;
721 struct ucred *cred;
722 struct proc *p;
723 int slpflag, slptimeo;
724 {
725 struct buf *bp, *nbp;
726 int s, error;
727 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
728 (flags & V_SAVE ? PGO_CLEANIT : 0);
729
730 /* XXXUBC this doesn't look at flags or slp* */
731 simple_lock(&vp->v_interlock);
732 error = VOP_PUTPAGES(vp, 0, 0, flushflags);
733 if (error) {
734 return error;
735 }
736
737 if (flags & V_SAVE) {
738 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p);
739 if (error)
740 return (error);
741 #ifdef DIAGNOSTIC
742 s = splbio();
743 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
744 panic("vinvalbuf: dirty bufs, vp %p", vp);
745 splx(s);
746 #endif
747 }
748
749 s = splbio();
750
751 restart:
752 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
753 nbp = LIST_NEXT(bp, b_vnbufs);
754 simple_lock(&bp->b_interlock);
755 if (bp->b_flags & B_BUSY) {
756 bp->b_flags |= B_WANTED;
757 error = ltsleep((caddr_t)bp,
758 slpflag | (PRIBIO + 1) | PNORELOCK,
759 "vinvalbuf", slptimeo, &bp->b_interlock);
760 if (error) {
761 splx(s);
762 return (error);
763 }
764 goto restart;
765 }
766 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
767 simple_unlock(&bp->b_interlock);
768 brelse(bp);
769 }
770
771 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
772 nbp = LIST_NEXT(bp, b_vnbufs);
773 simple_lock(&bp->b_interlock);
774 if (bp->b_flags & B_BUSY) {
775 bp->b_flags |= B_WANTED;
776 error = ltsleep((caddr_t)bp,
777 slpflag | (PRIBIO + 1) | PNORELOCK,
778 "vinvalbuf", slptimeo, &bp->b_interlock);
779 if (error) {
780 splx(s);
781 return (error);
782 }
783 goto restart;
784 }
785 /*
786 * XXX Since there are no node locks for NFS, I believe
787 * there is a slight chance that a delayed write will
788 * occur while sleeping just above, so check for it.
789 */
790 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
791 #ifdef DEBUG
792 printf("buffer still DELWRI\n");
793 #endif
794 bp->b_flags |= B_BUSY | B_VFLUSH;
795 simple_unlock(&bp->b_interlock);
796 VOP_BWRITE(bp);
797 goto restart;
798 }
799 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
800 simple_unlock(&bp->b_interlock);
801 brelse(bp);
802 }
803
804 #ifdef DIAGNOSTIC
805 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
806 panic("vinvalbuf: flush failed, vp %p", vp);
807 #endif
808
809 splx(s);
810
811 return (0);
812 }
813
814 /*
815 * Destroy any in core blocks past the truncation length.
816 * Called with the underlying vnode locked, which should prevent new dirty
817 * buffers from being queued.
818 */
819 int
820 vtruncbuf(vp, lbn, slpflag, slptimeo)
821 struct vnode *vp;
822 daddr_t lbn;
823 int slpflag, slptimeo;
824 {
825 struct buf *bp, *nbp;
826 int s, error;
827 voff_t off;
828
829 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
830 simple_lock(&vp->v_interlock);
831 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
832 if (error) {
833 return error;
834 }
835
836 s = splbio();
837
838 restart:
839 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
840 nbp = LIST_NEXT(bp, b_vnbufs);
841 if (bp->b_lblkno < lbn)
842 continue;
843 simple_lock(&bp->b_interlock);
844 if (bp->b_flags & B_BUSY) {
845 bp->b_flags |= B_WANTED;
846 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
847 "vtruncbuf", slptimeo, &bp->b_interlock);
848 if (error) {
849 splx(s);
850 return (error);
851 }
852 goto restart;
853 }
854 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
855 simple_unlock(&bp->b_interlock);
856 brelse(bp);
857 }
858
859 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
860 nbp = LIST_NEXT(bp, b_vnbufs);
861 if (bp->b_lblkno < lbn)
862 continue;
863 simple_lock(&bp->b_interlock);
864 if (bp->b_flags & B_BUSY) {
865 bp->b_flags |= B_WANTED;
866 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
867 "vtruncbuf", slptimeo, &bp->b_interlock);
868 if (error) {
869 splx(s);
870 return (error);
871 }
872 goto restart;
873 }
874 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
875 simple_unlock(&bp->b_interlock);
876 brelse(bp);
877 }
878
879 splx(s);
880
881 return (0);
882 }
883
884 void
885 vflushbuf(vp, sync)
886 struct vnode *vp;
887 int sync;
888 {
889 struct buf *bp, *nbp;
890 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
891 int s;
892
893 simple_lock(&vp->v_interlock);
894 (void) VOP_PUTPAGES(vp, 0, 0, flags);
895
896 loop:
897 s = splbio();
898 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
899 nbp = LIST_NEXT(bp, b_vnbufs);
900 simple_lock(&bp->b_interlock);
901 if ((bp->b_flags & B_BUSY)) {
902 simple_unlock(&bp->b_interlock);
903 continue;
904 }
905 if ((bp->b_flags & B_DELWRI) == 0)
906 panic("vflushbuf: not dirty, bp %p", bp);
907 bp->b_flags |= B_BUSY | B_VFLUSH;
908 simple_unlock(&bp->b_interlock);
909 splx(s);
910 /*
911 * Wait for I/O associated with indirect blocks to complete,
912 * since there is no way to quickly wait for them below.
913 */
914 if (bp->b_vp == vp || sync == 0)
915 (void) bawrite(bp);
916 else
917 (void) bwrite(bp);
918 goto loop;
919 }
920 if (sync == 0) {
921 splx(s);
922 return;
923 }
924 simple_lock(&global_v_numoutput_slock);
925 while (vp->v_numoutput) {
926 vp->v_flag |= VBWAIT;
927 ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0,
928 &global_v_numoutput_slock);
929 }
930 simple_unlock(&global_v_numoutput_slock);
931 splx(s);
932 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
933 vprint("vflushbuf: dirty", vp);
934 goto loop;
935 }
936 }
937
938 /*
939 * Associate a buffer with a vnode.
940 */
941 void
942 bgetvp(vp, bp)
943 struct vnode *vp;
944 struct buf *bp;
945 {
946 int s;
947
948 if (bp->b_vp)
949 panic("bgetvp: not free, bp %p", bp);
950 VHOLD(vp);
951 s = splbio();
952 bp->b_vp = vp;
953 if (vp->v_type == VBLK || vp->v_type == VCHR)
954 bp->b_dev = vp->v_rdev;
955 else
956 bp->b_dev = NODEV;
957 /*
958 * Insert onto list for new vnode.
959 */
960 bufinsvn(bp, &vp->v_cleanblkhd);
961 splx(s);
962 }
963
964 /*
965 * Disassociate a buffer from a vnode.
966 */
967 void
968 brelvp(bp)
969 struct buf *bp;
970 {
971 struct vnode *vp;
972 int s;
973
974 if (bp->b_vp == NULL)
975 panic("brelvp: vp NULL, bp %p", bp);
976
977 s = splbio();
978 vp = bp->b_vp;
979 /*
980 * Delete from old vnode list, if on one.
981 */
982 if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
983 bufremvn(bp);
984
985 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) &&
986 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
987 vp->v_flag &= ~VONWORKLST;
988 LIST_REMOVE(vp, v_synclist);
989 }
990
991 bp->b_vp = NULL;
992 HOLDRELE(vp);
993 splx(s);
994 }
995
996 /*
997 * Reassign a buffer from one vnode to another.
998 * Used to assign file specific control information
999 * (indirect blocks) to the vnode to which they belong.
1000 *
1001 * This function must be called at splbio().
1002 */
1003 void
1004 reassignbuf(bp, newvp)
1005 struct buf *bp;
1006 struct vnode *newvp;
1007 {
1008 struct buflists *listheadp;
1009 int delay;
1010
1011 /*
1012 * Delete from old vnode list, if on one.
1013 */
1014 if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1015 bufremvn(bp);
1016 /*
1017 * If dirty, put on list of dirty buffers;
1018 * otherwise insert onto list of clean buffers.
1019 */
1020 if ((bp->b_flags & B_DELWRI) == 0) {
1021 listheadp = &newvp->v_cleanblkhd;
1022 if (TAILQ_EMPTY(&newvp->v_uobj.memq) &&
1023 (newvp->v_flag & VONWORKLST) &&
1024 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
1025 newvp->v_flag &= ~VONWORKLST;
1026 LIST_REMOVE(newvp, v_synclist);
1027 }
1028 } else {
1029 listheadp = &newvp->v_dirtyblkhd;
1030 if ((newvp->v_flag & VONWORKLST) == 0) {
1031 switch (newvp->v_type) {
1032 case VDIR:
1033 delay = dirdelay;
1034 break;
1035 case VBLK:
1036 if (newvp->v_specmountpoint != NULL) {
1037 delay = metadelay;
1038 break;
1039 }
1040 /* fall through */
1041 default:
1042 delay = filedelay;
1043 break;
1044 }
1045 if (!newvp->v_mount ||
1046 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
1047 vn_syncer_add_to_worklist(newvp, delay);
1048 }
1049 }
1050 bufinsvn(bp, listheadp);
1051 }
1052
1053 /*
1054 * Create a vnode for a block device.
1055 * Used for root filesystem and swap areas.
1056 * Also used for memory file system special devices.
1057 */
1058 int
1059 bdevvp(dev, vpp)
1060 dev_t dev;
1061 struct vnode **vpp;
1062 {
1063
1064 return (getdevvp(dev, vpp, VBLK));
1065 }
1066
1067 /*
1068 * Create a vnode for a character device.
1069 * Used for kernfs and some console handling.
1070 */
1071 int
1072 cdevvp(dev, vpp)
1073 dev_t dev;
1074 struct vnode **vpp;
1075 {
1076
1077 return (getdevvp(dev, vpp, VCHR));
1078 }
1079
1080 /*
1081 * Create a vnode for a device.
1082 * Used by bdevvp (block device) for root file system etc.,
1083 * and by cdevvp (character device) for console and kernfs.
1084 */
1085 int
1086 getdevvp(dev, vpp, type)
1087 dev_t dev;
1088 struct vnode **vpp;
1089 enum vtype type;
1090 {
1091 struct vnode *vp;
1092 struct vnode *nvp;
1093 int error;
1094
1095 if (dev == NODEV) {
1096 *vpp = NULLVP;
1097 return (0);
1098 }
1099 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1100 if (error) {
1101 *vpp = NULLVP;
1102 return (error);
1103 }
1104 vp = nvp;
1105 vp->v_type = type;
1106 if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1107 vput(vp);
1108 vp = nvp;
1109 }
1110 *vpp = vp;
1111 return (0);
1112 }
1113
1114 /*
1115 * Check to see if the new vnode represents a special device
1116 * for which we already have a vnode (either because of
1117 * bdevvp() or because of a different vnode representing
1118 * the same block device). If such an alias exists, deallocate
1119 * the existing contents and return the aliased vnode. The
1120 * caller is responsible for filling it with its new contents.
1121 */
1122 struct vnode *
1123 checkalias(nvp, nvp_rdev, mp)
1124 struct vnode *nvp;
1125 dev_t nvp_rdev;
1126 struct mount *mp;
1127 {
1128 struct proc *p = curproc; /* XXX */
1129 struct vnode *vp;
1130 struct vnode **vpp;
1131
1132 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
1133 return (NULLVP);
1134
1135 vpp = &speclisth[SPECHASH(nvp_rdev)];
1136 loop:
1137 simple_lock(&spechash_slock);
1138 for (vp = *vpp; vp; vp = vp->v_specnext) {
1139 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
1140 continue;
1141 /*
1142 * Alias, but not in use, so flush it out.
1143 */
1144 simple_lock(&vp->v_interlock);
1145 simple_unlock(&spechash_slock);
1146 if (vp->v_usecount == 0) {
1147 vgonel(vp, p);
1148 goto loop;
1149 }
1150 /*
1151 * What we're interested to know here is if someone else has
1152 * removed this vnode from the device hash list while we were
1153 * waiting. This can only happen if vclean() did it, and
1154 * this requires the vnode to be locked. Therefore, we use
1155 * LK_SLEEPFAIL and retry.
1156 */
1157 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL))
1158 goto loop;
1159 simple_lock(&spechash_slock);
1160 break;
1161 }
1162 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1163 MALLOC(nvp->v_specinfo, struct specinfo *,
1164 sizeof(struct specinfo), M_VNODE, M_NOWAIT);
1165 /* XXX Erg. */
1166 if (nvp->v_specinfo == NULL) {
1167 simple_unlock(&spechash_slock);
1168 uvm_wait("checkalias");
1169 goto loop;
1170 }
1171
1172 nvp->v_rdev = nvp_rdev;
1173 nvp->v_hashchain = vpp;
1174 nvp->v_specnext = *vpp;
1175 nvp->v_specmountpoint = NULL;
1176 simple_unlock(&spechash_slock);
1177 nvp->v_speclockf = NULL;
1178 simple_lock_init(&nvp->v_spec_cow_slock);
1179 SLIST_INIT(&nvp->v_spec_cow_head);
1180 nvp->v_spec_cow_req = 0;
1181 nvp->v_spec_cow_count = 0;
1182
1183 *vpp = nvp;
1184 if (vp != NULLVP) {
1185 nvp->v_flag |= VALIASED;
1186 vp->v_flag |= VALIASED;
1187 vput(vp);
1188 }
1189 return (NULLVP);
1190 }
1191 simple_unlock(&spechash_slock);
1192 VOP_UNLOCK(vp, 0);
1193 simple_lock(&vp->v_interlock);
1194 vclean(vp, 0, p);
1195 vp->v_op = nvp->v_op;
1196 vp->v_tag = nvp->v_tag;
1197 vp->v_vnlock = &vp->v_lock;
1198 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1199 nvp->v_type = VNON;
1200 insmntque(vp, mp);
1201 return (vp);
1202 }
1203
1204 /*
1205 * Grab a particular vnode from the free list, increment its
1206 * reference count and lock it. If the vnode lock bit is set the
1207 * vnode is being eliminated in vgone. In that case, we can not
1208 * grab the vnode, so the process is awakened when the transition is
1209 * completed, and an error returned to indicate that the vnode is no
1210 * longer usable (possibly having been changed to a new file system type).
1211 */
1212 int
1213 vget(vp, flags)
1214 struct vnode *vp;
1215 int flags;
1216 {
1217 int error;
1218
1219 /*
1220 * If the vnode is in the process of being cleaned out for
1221 * another use, we wait for the cleaning to finish and then
1222 * return failure. Cleaning is determined by checking that
1223 * the VXLOCK flag is set.
1224 */
1225
1226 if ((flags & LK_INTERLOCK) == 0)
1227 simple_lock(&vp->v_interlock);
1228 if (vp->v_flag & VXLOCK) {
1229 if (flags & LK_NOWAIT) {
1230 simple_unlock(&vp->v_interlock);
1231 return EBUSY;
1232 }
1233 vp->v_flag |= VXWANT;
1234 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
1235 return (ENOENT);
1236 }
1237 if (vp->v_usecount == 0) {
1238 simple_lock(&vnode_free_list_slock);
1239 if (vp->v_holdcnt > 0)
1240 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1241 else
1242 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1243 simple_unlock(&vnode_free_list_slock);
1244 }
1245 vp->v_usecount++;
1246 #ifdef DIAGNOSTIC
1247 if (vp->v_usecount == 0) {
1248 vprint("vget", vp);
1249 panic("vget: usecount overflow, vp %p", vp);
1250 }
1251 #endif
1252 if (flags & LK_TYPE_MASK) {
1253 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
1254 /*
1255 * must expand vrele here because we do not want
1256 * to call VOP_INACTIVE if the reference count
1257 * drops back to zero since it was never really
1258 * active. We must remove it from the free list
1259 * before sleeping so that multiple processes do
1260 * not try to recycle it.
1261 */
1262 simple_lock(&vp->v_interlock);
1263 vp->v_usecount--;
1264 if (vp->v_usecount > 0) {
1265 simple_unlock(&vp->v_interlock);
1266 return (error);
1267 }
1268 /*
1269 * insert at tail of LRU list
1270 */
1271 simple_lock(&vnode_free_list_slock);
1272 if (vp->v_holdcnt > 0)
1273 TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
1274 v_freelist);
1275 else
1276 TAILQ_INSERT_TAIL(&vnode_free_list, vp,
1277 v_freelist);
1278 simple_unlock(&vnode_free_list_slock);
1279 simple_unlock(&vp->v_interlock);
1280 }
1281 return (error);
1282 }
1283 simple_unlock(&vp->v_interlock);
1284 return (0);
1285 }
1286
1287 /*
1288 * vput(), just unlock and vrele()
1289 */
1290 void
1291 vput(vp)
1292 struct vnode *vp;
1293 {
1294 struct proc *p = curproc; /* XXX */
1295
1296 #ifdef DIAGNOSTIC
1297 if (vp == NULL)
1298 panic("vput: null vp");
1299 #endif
1300 simple_lock(&vp->v_interlock);
1301 vp->v_usecount--;
1302 if (vp->v_usecount > 0) {
1303 simple_unlock(&vp->v_interlock);
1304 VOP_UNLOCK(vp, 0);
1305 return;
1306 }
1307 #ifdef DIAGNOSTIC
1308 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1309 vprint("vput: bad ref count", vp);
1310 panic("vput: ref cnt");
1311 }
1312 #endif
1313 /*
1314 * Insert at tail of LRU list.
1315 */
1316 simple_lock(&vnode_free_list_slock);
1317 if (vp->v_holdcnt > 0)
1318 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1319 else
1320 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1321 simple_unlock(&vnode_free_list_slock);
1322 if (vp->v_flag & VEXECMAP) {
1323 uvmexp.execpages -= vp->v_uobj.uo_npages;
1324 uvmexp.filepages += vp->v_uobj.uo_npages;
1325 }
1326 vp->v_flag &= ~(VTEXT|VEXECMAP);
1327 simple_unlock(&vp->v_interlock);
1328 VOP_INACTIVE(vp, p);
1329 }
1330
1331 /*
1332 * Vnode release.
1333 * If count drops to zero, call inactive routine and return to freelist.
1334 */
1335 void
1336 vrele(vp)
1337 struct vnode *vp;
1338 {
1339 struct proc *p = curproc; /* XXX */
1340
1341 #ifdef DIAGNOSTIC
1342 if (vp == NULL)
1343 panic("vrele: null vp");
1344 #endif
1345 simple_lock(&vp->v_interlock);
1346 vp->v_usecount--;
1347 if (vp->v_usecount > 0) {
1348 simple_unlock(&vp->v_interlock);
1349 return;
1350 }
1351 #ifdef DIAGNOSTIC
1352 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1353 vprint("vrele: bad ref count", vp);
1354 panic("vrele: ref cnt vp %p", vp);
1355 }
1356 #endif
1357 /*
1358 * Insert at tail of LRU list.
1359 */
1360 simple_lock(&vnode_free_list_slock);
1361 if (vp->v_holdcnt > 0)
1362 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1363 else
1364 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1365 simple_unlock(&vnode_free_list_slock);
1366 if (vp->v_flag & VEXECMAP) {
1367 uvmexp.execpages -= vp->v_uobj.uo_npages;
1368 uvmexp.filepages += vp->v_uobj.uo_npages;
1369 }
1370 vp->v_flag &= ~(VTEXT|VEXECMAP);
1371 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
1372 VOP_INACTIVE(vp, p);
1373 }
1374
1375 #ifdef DIAGNOSTIC
1376 /*
1377 * Page or buffer structure gets a reference.
1378 */
1379 void
1380 vholdl(vp)
1381 struct vnode *vp;
1382 {
1383
1384 /*
1385 * If it is on the freelist and the hold count is currently
1386 * zero, move it to the hold list. The test of the back
1387 * pointer and the use reference count of zero is because
1388 * it will be removed from a free list by getnewvnode,
1389 * but will not have its reference count incremented until
1390 * after calling vgone. If the reference count were
1391 * incremented first, vgone would (incorrectly) try to
1392 * close the previous instance of the underlying object.
1393 * So, the back pointer is explicitly set to `0xdeadb' in
1394 * getnewvnode after removing it from a freelist to ensure
1395 * that we do not try to move it here.
1396 */
1397 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1398 vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1399 simple_lock(&vnode_free_list_slock);
1400 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1401 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1402 simple_unlock(&vnode_free_list_slock);
1403 }
1404 vp->v_holdcnt++;
1405 }
1406
1407 /*
1408 * Page or buffer structure frees a reference.
1409 */
1410 void
1411 holdrelel(vp)
1412 struct vnode *vp;
1413 {
1414
1415 if (vp->v_holdcnt <= 0)
1416 panic("holdrelel: holdcnt vp %p", vp);
1417 vp->v_holdcnt--;
1418
1419 /*
1420 * If it is on the holdlist and the hold count drops to
1421 * zero, move it to the free list. The test of the back
1422 * pointer and the use reference count of zero is because
1423 * it will be removed from a free list by getnewvnode,
1424 * but will not have its reference count incremented until
1425 * after calling vgone. If the reference count were
1426 * incremented first, vgone would (incorrectly) try to
1427 * close the previous instance of the underlying object.
1428 * So, the back pointer is explicitly set to `0xdeadb' in
1429 * getnewvnode after removing it from a freelist to ensure
1430 * that we do not try to move it here.
1431 */
1432
1433 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1434 vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1435 simple_lock(&vnode_free_list_slock);
1436 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1437 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1438 simple_unlock(&vnode_free_list_slock);
1439 }
1440 }
1441
1442 /*
1443 * Vnode reference.
1444 */
1445 void
1446 vref(vp)
1447 struct vnode *vp;
1448 {
1449
1450 simple_lock(&vp->v_interlock);
1451 if (vp->v_usecount <= 0)
1452 panic("vref used where vget required, vp %p", vp);
1453 vp->v_usecount++;
1454 #ifdef DIAGNOSTIC
1455 if (vp->v_usecount == 0) {
1456 vprint("vref", vp);
1457 panic("vref: usecount overflow, vp %p", vp);
1458 }
1459 #endif
1460 simple_unlock(&vp->v_interlock);
1461 }
1462 #endif /* DIAGNOSTIC */
1463
1464 /*
1465 * Remove any vnodes in the vnode table belonging to mount point mp.
1466 *
1467 * If FORCECLOSE is not specified, there should not be any active ones,
1468 * return error if any are found (nb: this is a user error, not a
1469 * system error). If FORCECLOSE is specified, detach any active vnodes
1470 * that are found.
1471 *
1472 * If WRITECLOSE is set, only flush out regular file vnodes open for
1473 * writing.
1474 *
1475 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1476 */
1477 #ifdef DEBUG
1478 int busyprt = 0; /* print out busy vnodes */
1479 struct ctldebug debug1 = { "busyprt", &busyprt };
1480 #endif
1481
1482 int
1483 vflush(mp, skipvp, flags)
1484 struct mount *mp;
1485 struct vnode *skipvp;
1486 int flags;
1487 {
1488 struct proc *p = curproc; /* XXX */
1489 struct vnode *vp, *nvp;
1490 int busy = 0;
1491
1492 simple_lock(&mntvnode_slock);
1493 loop:
1494 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1495 if (vp->v_mount != mp)
1496 goto loop;
1497 nvp = LIST_NEXT(vp, v_mntvnodes);
1498 /*
1499 * Skip over a selected vnode.
1500 */
1501 if (vp == skipvp)
1502 continue;
1503 simple_lock(&vp->v_interlock);
1504 /*
1505 * Skip over a vnodes marked VSYSTEM.
1506 */
1507 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1508 simple_unlock(&vp->v_interlock);
1509 continue;
1510 }
1511 /*
1512 * If WRITECLOSE is set, only flush out regular file
1513 * vnodes open for writing.
1514 */
1515 if ((flags & WRITECLOSE) &&
1516 (vp->v_writecount == 0 || vp->v_type != VREG)) {
1517 simple_unlock(&vp->v_interlock);
1518 continue;
1519 }
1520 /*
1521 * With v_usecount == 0, all we need to do is clear
1522 * out the vnode data structures and we are done.
1523 */
1524 if (vp->v_usecount == 0) {
1525 simple_unlock(&mntvnode_slock);
1526 vgonel(vp, p);
1527 simple_lock(&mntvnode_slock);
1528 continue;
1529 }
1530 /*
1531 * If FORCECLOSE is set, forcibly close the vnode.
1532 * For block or character devices, revert to an
1533 * anonymous device. For all other files, just kill them.
1534 */
1535 if (flags & FORCECLOSE) {
1536 simple_unlock(&mntvnode_slock);
1537 if (vp->v_type != VBLK && vp->v_type != VCHR) {
1538 vgonel(vp, p);
1539 } else {
1540 vclean(vp, 0, p);
1541 vp->v_op = spec_vnodeop_p;
1542 insmntque(vp, (struct mount *)0);
1543 }
1544 simple_lock(&mntvnode_slock);
1545 continue;
1546 }
1547 #ifdef DEBUG
1548 if (busyprt)
1549 vprint("vflush: busy vnode", vp);
1550 #endif
1551 simple_unlock(&vp->v_interlock);
1552 busy++;
1553 }
1554 simple_unlock(&mntvnode_slock);
1555 if (busy)
1556 return (EBUSY);
1557 return (0);
1558 }
1559
1560 /*
1561 * Disassociate the underlying file system from a vnode.
1562 */
1563 void
1564 vclean(vp, flags, p)
1565 struct vnode *vp;
1566 int flags;
1567 struct proc *p;
1568 {
1569 struct mount *mp;
1570 int active;
1571
1572 LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1573
1574 /*
1575 * Check to see if the vnode is in use.
1576 * If so we have to reference it before we clean it out
1577 * so that its count cannot fall to zero and generate a
1578 * race against ourselves to recycle it.
1579 */
1580
1581 if ((active = vp->v_usecount) != 0) {
1582 vp->v_usecount++;
1583 #ifdef DIAGNOSTIC
1584 if (vp->v_usecount == 0) {
1585 vprint("vclean", vp);
1586 panic("vclean: usecount overflow");
1587 }
1588 #endif
1589 }
1590
1591 /*
1592 * Prevent the vnode from being recycled or
1593 * brought into use while we clean it out.
1594 */
1595 if (vp->v_flag & VXLOCK)
1596 panic("vclean: deadlock, vp %p", vp);
1597 vp->v_flag |= VXLOCK;
1598 if (vp->v_flag & VEXECMAP) {
1599 uvmexp.execpages -= vp->v_uobj.uo_npages;
1600 uvmexp.filepages += vp->v_uobj.uo_npages;
1601 }
1602 vp->v_flag &= ~(VTEXT|VEXECMAP);
1603
1604 /*
1605 * Even if the count is zero, the VOP_INACTIVE routine may still
1606 * have the object locked while it cleans it out. The VOP_LOCK
1607 * ensures that the VOP_INACTIVE routine is done with its work.
1608 * For active vnodes, it ensures that no other activity can
1609 * occur while the underlying object is being cleaned out.
1610 */
1611 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
1612
1613 /*
1614 * Clean out any cached data associated with the vnode.
1615 * If special device, remove it from special device alias list.
1616 * if it is on one.
1617 */
1618 if (flags & DOCLOSE) {
1619 int error;
1620 struct vnode *vq, *vx;
1621
1622 vn_start_write(vp, &mp, V_WAIT | V_LOWER);
1623 error = vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1624 vn_finished_write(mp, V_LOWER);
1625 if (error)
1626 error = vinvalbuf(vp, 0, NOCRED, p, 0, 0);
1627 KASSERT(error == 0);
1628 KASSERT((vp->v_flag & VONWORKLST) == 0);
1629
1630 if (active)
1631 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1632
1633 if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1634 vp->v_specinfo != 0) {
1635 simple_lock(&spechash_slock);
1636 if (vp->v_hashchain != NULL) {
1637 if (*vp->v_hashchain == vp) {
1638 *vp->v_hashchain = vp->v_specnext;
1639 } else {
1640 for (vq = *vp->v_hashchain; vq;
1641 vq = vq->v_specnext) {
1642 if (vq->v_specnext != vp)
1643 continue;
1644 vq->v_specnext = vp->v_specnext;
1645 break;
1646 }
1647 if (vq == NULL)
1648 panic("missing bdev");
1649 }
1650 if (vp->v_flag & VALIASED) {
1651 vx = NULL;
1652 for (vq = *vp->v_hashchain; vq;
1653 vq = vq->v_specnext) {
1654 if (vq->v_rdev != vp->v_rdev ||
1655 vq->v_type != vp->v_type)
1656 continue;
1657 if (vx)
1658 break;
1659 vx = vq;
1660 }
1661 if (vx == NULL)
1662 panic("missing alias");
1663 if (vq == NULL)
1664 vx->v_flag &= ~VALIASED;
1665 vp->v_flag &= ~VALIASED;
1666 }
1667 }
1668 simple_unlock(&spechash_slock);
1669 FREE(vp->v_specinfo, M_VNODE);
1670 vp->v_specinfo = NULL;
1671 }
1672 }
1673 LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
1674
1675 /*
1676 * If purging an active vnode, it must be closed and
1677 * deactivated before being reclaimed. Note that the
1678 * VOP_INACTIVE will unlock the vnode.
1679 */
1680 if (active) {
1681 VOP_INACTIVE(vp, p);
1682 } else {
1683 /*
1684 * Any other processes trying to obtain this lock must first
1685 * wait for VXLOCK to clear, then call the new lock operation.
1686 */
1687 VOP_UNLOCK(vp, 0);
1688 }
1689 /*
1690 * Reclaim the vnode.
1691 */
1692 if (VOP_RECLAIM(vp, p))
1693 panic("vclean: cannot reclaim, vp %p", vp);
1694 if (active) {
1695 /*
1696 * Inline copy of vrele() since VOP_INACTIVE
1697 * has already been called.
1698 */
1699 simple_lock(&vp->v_interlock);
1700 if (--vp->v_usecount <= 0) {
1701 #ifdef DIAGNOSTIC
1702 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1703 vprint("vclean: bad ref count", vp);
1704 panic("vclean: ref cnt");
1705 }
1706 #endif
1707 /*
1708 * Insert at tail of LRU list.
1709 */
1710
1711 simple_unlock(&vp->v_interlock);
1712 simple_lock(&vnode_free_list_slock);
1713 #ifdef DIAGNOSTIC
1714 if (vp->v_holdcnt > 0)
1715 panic("vclean: not clean, vp %p", vp);
1716 #endif
1717 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1718 simple_unlock(&vnode_free_list_slock);
1719 } else
1720 simple_unlock(&vp->v_interlock);
1721 }
1722
1723 KASSERT(vp->v_uobj.uo_npages == 0);
1724 cache_purge(vp);
1725
1726 /*
1727 * Done with purge, notify sleepers of the grim news.
1728 */
1729 vp->v_op = dead_vnodeop_p;
1730 vp->v_tag = VT_NON;
1731 simple_lock(&vp->v_interlock);
1732 VN_KNOTE(vp, NOTE_REVOKE); /* FreeBSD has this in vn_pollgone() */
1733 vp->v_flag &= ~VXLOCK;
1734 if (vp->v_flag & VXWANT) {
1735 vp->v_flag &= ~VXWANT;
1736 simple_unlock(&vp->v_interlock);
1737 wakeup((caddr_t)vp);
1738 } else
1739 simple_unlock(&vp->v_interlock);
1740 }
1741
1742 /*
1743 * Recycle an unused vnode to the front of the free list.
1744 * Release the passed interlock if the vnode will be recycled.
1745 */
1746 int
1747 vrecycle(vp, inter_lkp, p)
1748 struct vnode *vp;
1749 struct simplelock *inter_lkp;
1750 struct proc *p;
1751 {
1752
1753 simple_lock(&vp->v_interlock);
1754 if (vp->v_usecount == 0) {
1755 if (inter_lkp)
1756 simple_unlock(inter_lkp);
1757 vgonel(vp, p);
1758 return (1);
1759 }
1760 simple_unlock(&vp->v_interlock);
1761 return (0);
1762 }
1763
1764 /*
1765 * Eliminate all activity associated with a vnode
1766 * in preparation for reuse.
1767 */
1768 void
1769 vgone(vp)
1770 struct vnode *vp;
1771 {
1772 struct proc *p = curproc; /* XXX */
1773
1774 simple_lock(&vp->v_interlock);
1775 vgonel(vp, p);
1776 }
1777
1778 /*
1779 * vgone, with the vp interlock held.
1780 */
1781 void
1782 vgonel(vp, p)
1783 struct vnode *vp;
1784 struct proc *p;
1785 {
1786
1787 LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1788
1789 /*
1790 * If a vgone (or vclean) is already in progress,
1791 * wait until it is done and return.
1792 */
1793
1794 if (vp->v_flag & VXLOCK) {
1795 vp->v_flag |= VXWANT;
1796 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1797 return;
1798 }
1799
1800 /*
1801 * Clean out the filesystem specific data.
1802 */
1803
1804 vclean(vp, DOCLOSE, p);
1805 KASSERT((vp->v_flag & VONWORKLST) == 0);
1806
1807 /*
1808 * Delete from old mount point vnode list, if on one.
1809 */
1810
1811 if (vp->v_mount != NULL)
1812 insmntque(vp, (struct mount *)0);
1813
1814 /*
1815 * The test of the back pointer and the reference count of
1816 * zero is because it will be removed from the free list by
1817 * getcleanvnode, but will not have its reference count
1818 * incremented until after calling vgone. If the reference
1819 * count were incremented first, vgone would (incorrectly)
1820 * try to close the previous instance of the underlying object.
1821 * So, the back pointer is explicitly set to `0xdeadb' in
1822 * getnewvnode after removing it from the freelist to ensure
1823 * that we do not try to move it here.
1824 */
1825
1826 vp->v_type = VBAD;
1827 if (vp->v_usecount == 0) {
1828 boolean_t dofree;
1829
1830 simple_lock(&vnode_free_list_slock);
1831 if (vp->v_holdcnt > 0)
1832 panic("vgonel: not clean, vp %p", vp);
1833 /*
1834 * if it isn't on the freelist, we're called by getcleanvnode
1835 * and vnode is being re-used. otherwise, we'll free it.
1836 */
1837 dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb;
1838 if (dofree) {
1839 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1840 numvnodes--;
1841 }
1842 simple_unlock(&vnode_free_list_slock);
1843 if (dofree)
1844 pool_put(&vnode_pool, vp);
1845 }
1846 }
1847
1848 /*
1849 * Lookup a vnode by device number.
1850 */
1851 int
1852 vfinddev(dev, type, vpp)
1853 dev_t dev;
1854 enum vtype type;
1855 struct vnode **vpp;
1856 {
1857 struct vnode *vp;
1858 int rc = 0;
1859
1860 simple_lock(&spechash_slock);
1861 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1862 if (dev != vp->v_rdev || type != vp->v_type)
1863 continue;
1864 *vpp = vp;
1865 rc = 1;
1866 break;
1867 }
1868 simple_unlock(&spechash_slock);
1869 return (rc);
1870 }
1871
1872 /*
1873 * Revoke all the vnodes corresponding to the specified minor number
1874 * range (endpoints inclusive) of the specified major.
1875 */
1876 void
1877 vdevgone(maj, minl, minh, type)
1878 int maj, minl, minh;
1879 enum vtype type;
1880 {
1881 struct vnode *vp;
1882 int mn;
1883
1884 for (mn = minl; mn <= minh; mn++)
1885 if (vfinddev(makedev(maj, mn), type, &vp))
1886 VOP_REVOKE(vp, REVOKEALL);
1887 }
1888
1889 /*
1890 * Calculate the total number of references to a special device.
1891 */
1892 int
1893 vcount(vp)
1894 struct vnode *vp;
1895 {
1896 struct vnode *vq, *vnext;
1897 int count;
1898
1899 loop:
1900 if ((vp->v_flag & VALIASED) == 0)
1901 return (vp->v_usecount);
1902 simple_lock(&spechash_slock);
1903 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1904 vnext = vq->v_specnext;
1905 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1906 continue;
1907 /*
1908 * Alias, but not in use, so flush it out.
1909 */
1910 if (vq->v_usecount == 0 && vq != vp &&
1911 (vq->v_flag & VXLOCK) == 0) {
1912 simple_unlock(&spechash_slock);
1913 vgone(vq);
1914 goto loop;
1915 }
1916 count += vq->v_usecount;
1917 }
1918 simple_unlock(&spechash_slock);
1919 return (count);
1920 }
1921
1922 /*
1923 * Print out a description of a vnode.
1924 */
1925 const char * const vnode_types[] = {
1926 "VNON",
1927 "VREG",
1928 "VDIR",
1929 "VBLK",
1930 "VCHR",
1931 "VLNK",
1932 "VSOCK",
1933 "VFIFO",
1934 "VBAD"
1935 };
1936
1937 void
1938 vprint(label, vp)
1939 char *label;
1940 struct vnode *vp;
1941 {
1942 char buf[96];
1943
1944 if (label != NULL)
1945 printf("%s: ", label);
1946 printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,",
1947 vp->v_tag, vnode_types[vp->v_type],
1948 vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
1949 buf[0] = '\0';
1950 if (vp->v_flag & VROOT)
1951 strlcat(buf, "|VROOT", sizeof(buf));
1952 if (vp->v_flag & VTEXT)
1953 strlcat(buf, "|VTEXT", sizeof(buf));
1954 if (vp->v_flag & VEXECMAP)
1955 strlcat(buf, "|VEXECMAP", sizeof(buf));
1956 if (vp->v_flag & VSYSTEM)
1957 strlcat(buf, "|VSYSTEM", sizeof(buf));
1958 if (vp->v_flag & VXLOCK)
1959 strlcat(buf, "|VXLOCK", sizeof(buf));
1960 if (vp->v_flag & VXWANT)
1961 strlcat(buf, "|VXWANT", sizeof(buf));
1962 if (vp->v_flag & VBWAIT)
1963 strlcat(buf, "|VBWAIT", sizeof(buf));
1964 if (vp->v_flag & VALIASED)
1965 strlcat(buf, "|VALIASED", sizeof(buf));
1966 if (buf[0] != '\0')
1967 printf(" flags (%s)", &buf[1]);
1968 if (vp->v_data == NULL) {
1969 printf("\n");
1970 } else {
1971 printf("\n\t");
1972 VOP_PRINT(vp);
1973 }
1974 }
1975
1976 #ifdef DEBUG
1977 /*
1978 * List all of the locked vnodes in the system.
1979 * Called when debugging the kernel.
1980 */
1981 void
1982 printlockedvnodes()
1983 {
1984 struct mount *mp, *nmp;
1985 struct vnode *vp;
1986
1987 printf("Locked vnodes\n");
1988 simple_lock(&mountlist_slock);
1989 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1990 mp = nmp) {
1991 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1992 nmp = CIRCLEQ_NEXT(mp, mnt_list);
1993 continue;
1994 }
1995 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1996 if (VOP_ISLOCKED(vp))
1997 vprint(NULL, vp);
1998 }
1999 simple_lock(&mountlist_slock);
2000 nmp = CIRCLEQ_NEXT(mp, mnt_list);
2001 vfs_unbusy(mp);
2002 }
2003 simple_unlock(&mountlist_slock);
2004 }
2005 #endif
2006
2007 /*
2008 * sysctl helper routine for vfs.generic.conf lookups.
2009 */
2010 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2011 static int
2012 sysctl_vfs_generic_conf(SYSCTLFN_ARGS)
2013 {
2014 struct vfsconf vfc;
2015 extern const char * const mountcompatnames[];
2016 extern int nmountcompatnames;
2017 struct sysctlnode node;
2018 struct vfsops *vfsp;
2019 u_int vfsnum;
2020
2021 if (namelen != 1)
2022 return (ENOTDIR);
2023 vfsnum = name[0];
2024 if (vfsnum >= nmountcompatnames ||
2025 mountcompatnames[vfsnum] == NULL)
2026 return (EOPNOTSUPP);
2027 vfsp = vfs_getopsbyname(mountcompatnames[vfsnum]);
2028 if (vfsp == NULL)
2029 return (EOPNOTSUPP);
2030
2031 vfc.vfc_vfsops = vfsp;
2032 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
2033 vfc.vfc_typenum = vfsnum;
2034 vfc.vfc_refcount = vfsp->vfs_refcount;
2035 vfc.vfc_flags = 0;
2036 vfc.vfc_mountroot = vfsp->vfs_mountroot;
2037 vfc.vfc_next = NULL;
2038
2039 node = *rnode;
2040 node.sysctl_data = &vfc;
2041 return (sysctl_lookup(SYSCTLFN_CALL(&node)));
2042 }
2043 #endif
2044
2045 /*
2046 * Top level filesystem related information gathering.
2047 */
2048 SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
2049 {
2050 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2051 extern int nmountcompatnames;
2052 #endif
2053
2054 sysctl_createv(clog, 0, NULL, NULL,
2055 CTLFLAG_PERMANENT,
2056 CTLTYPE_NODE, "vfs", NULL,
2057 NULL, 0, NULL, 0,
2058 CTL_VFS, CTL_EOL);
2059 sysctl_createv(clog, 0, NULL, NULL,
2060 CTLFLAG_PERMANENT,
2061 CTLTYPE_NODE, "generic",
2062 SYSCTL_DESCR("Non-specific vfs related information"),
2063 NULL, 0, NULL, 0,
2064 CTL_VFS, VFS_GENERIC, CTL_EOL);
2065
2066 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2067 sysctl_createv(clog, 0, NULL, NULL,
2068 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
2069 CTLTYPE_INT, "maxtypenum",
2070 SYSCTL_DESCR("Highest valid filesystem type number"),
2071 NULL, nmountcompatnames, NULL, 0,
2072 CTL_VFS, VFS_GENERIC, VFS_MAXTYPENUM, CTL_EOL);
2073 #endif
2074 sysctl_createv(clog, 0, NULL, NULL,
2075 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2076 CTLTYPE_INT, "usermount",
2077 SYSCTL_DESCR("Whether unprivileged users may mount "
2078 "filesystems"),
2079 NULL, 0, &dovfsusermount, 0,
2080 CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
2081 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
2082 sysctl_createv(clog, 0, NULL, NULL,
2083 CTLFLAG_PERMANENT,
2084 CTLTYPE_STRUCT, "conf",
2085 SYSCTL_DESCR("Filesystem configuration information"),
2086 sysctl_vfs_generic_conf, 0, NULL,
2087 sizeof(struct vfsconf),
2088 CTL_VFS, VFS_GENERIC, VFS_CONF, CTL_EOL);
2089 #endif
2090 }
2091
2092
2093 int kinfo_vdebug = 1;
2094 int kinfo_vgetfailed;
2095 #define KINFO_VNODESLOP 10
2096 /*
2097 * Dump vnode list (via sysctl).
2098 * Copyout address of vnode followed by vnode.
2099 */
2100 /* ARGSUSED */
2101 int
2102 sysctl_kern_vnode(SYSCTLFN_ARGS)
2103 {
2104 char *where = oldp;
2105 size_t *sizep = oldlenp;
2106 struct mount *mp, *nmp;
2107 struct vnode *nvp, *vp;
2108 char *bp = where, *savebp;
2109 char *ewhere;
2110 int error;
2111
2112 if (namelen != 0)
2113 return (EOPNOTSUPP);
2114 if (newp != NULL)
2115 return (EPERM);
2116
2117 #define VPTRSZ sizeof(struct vnode *)
2118 #define VNODESZ sizeof(struct vnode)
2119 if (where == NULL) {
2120 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2121 return (0);
2122 }
2123 ewhere = where + *sizep;
2124
2125 simple_lock(&mountlist_slock);
2126 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2127 mp = nmp) {
2128 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
2129 nmp = CIRCLEQ_NEXT(mp, mnt_list);
2130 continue;
2131 }
2132 savebp = bp;
2133 again:
2134 simple_lock(&mntvnode_slock);
2135 for (vp = LIST_FIRST(&mp->mnt_vnodelist);
2136 vp != NULL;
2137 vp = nvp) {
2138 /*
2139 * Check that the vp is still associated with
2140 * this filesystem. RACE: could have been
2141 * recycled onto the same filesystem.
2142 */
2143 if (vp->v_mount != mp) {
2144 simple_unlock(&mntvnode_slock);
2145 if (kinfo_vdebug)
2146 printf("kinfo: vp changed\n");
2147 bp = savebp;
2148 goto again;
2149 }
2150 nvp = LIST_NEXT(vp, v_mntvnodes);
2151 if (bp + VPTRSZ + VNODESZ > ewhere) {
2152 simple_unlock(&mntvnode_slock);
2153 *sizep = bp - where;
2154 return (ENOMEM);
2155 }
2156 simple_unlock(&mntvnode_slock);
2157 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2158 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2159 return (error);
2160 bp += VPTRSZ + VNODESZ;
2161 simple_lock(&mntvnode_slock);
2162 }
2163 simple_unlock(&mntvnode_slock);
2164 simple_lock(&mountlist_slock);
2165 nmp = CIRCLEQ_NEXT(mp, mnt_list);
2166 vfs_unbusy(mp);
2167 }
2168 simple_unlock(&mountlist_slock);
2169
2170 *sizep = bp - where;
2171 return (0);
2172 }
2173
2174 /*
2175 * Check to see if a filesystem is mounted on a block device.
2176 */
2177 int
2178 vfs_mountedon(vp)
2179 struct vnode *vp;
2180 {
2181 struct vnode *vq;
2182 int error = 0;
2183
2184 if (vp->v_specmountpoint != NULL)
2185 return (EBUSY);
2186 if (vp->v_flag & VALIASED) {
2187 simple_lock(&spechash_slock);
2188 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2189 if (vq->v_rdev != vp->v_rdev ||
2190 vq->v_type != vp->v_type)
2191 continue;
2192 if (vq->v_specmountpoint != NULL) {
2193 error = EBUSY;
2194 break;
2195 }
2196 }
2197 simple_unlock(&spechash_slock);
2198 }
2199 return (error);
2200 }
2201
2202 static int
2203 sacheck(struct sockaddr *sa)
2204 {
2205 switch (sa->sa_family) {
2206 #ifdef INET
2207 case AF_INET: {
2208 struct sockaddr_in *sin = (struct sockaddr_in *)sa;
2209 char *p = (char *)sin->sin_zero;
2210 size_t i;
2211
2212 if (sin->sin_len != sizeof(*sin))
2213 return -1;
2214 if (sin->sin_port != 0)
2215 return -1;
2216 for (i = 0; i < sizeof(sin->sin_zero); i++)
2217 if (*p++ != '\0')
2218 return -1;
2219 return 0;
2220 }
2221 #endif
2222 #ifdef INET6
2223 case AF_INET6: {
2224 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
2225
2226 if (sin6->sin6_len != sizeof(*sin6))
2227 return -1;
2228 if (sin6->sin6_port != 0)
2229 return -1;
2230 return 0;
2231 }
2232 #endif
2233 default:
2234 return -1;
2235 }
2236 }
2237
2238 /*
2239 * Build hash lists of net addresses and hang them off the mount point.
2240 * Called by ufs_mount() to set up the lists of export addresses.
2241 */
2242 static int
2243 vfs_hang_addrlist(mp, nep, argp)
2244 struct mount *mp;
2245 struct netexport *nep;
2246 struct export_args *argp;
2247 {
2248 struct netcred *np, *enp;
2249 struct radix_node_head *rnh;
2250 int i;
2251 struct sockaddr *saddr, *smask = 0;
2252 struct domain *dom;
2253 int error;
2254
2255 if (argp->ex_addrlen == 0) {
2256 if (mp->mnt_flag & MNT_DEFEXPORTED)
2257 return (EPERM);
2258 np = &nep->ne_defexported;
2259 np->netc_exflags = argp->ex_flags;
2260 crcvt(&np->netc_anon, &argp->ex_anon);
2261 np->netc_anon.cr_ref = 1;
2262 mp->mnt_flag |= MNT_DEFEXPORTED;
2263 return (0);
2264 }
2265
2266 if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN)
2267 return (EINVAL);
2268
2269 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2270 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
2271 memset((caddr_t)np, 0, i);
2272 saddr = (struct sockaddr *)(np + 1);
2273 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
2274 if (error)
2275 goto out;
2276 if (saddr->sa_len > argp->ex_addrlen)
2277 saddr->sa_len = argp->ex_addrlen;
2278 if (sacheck(saddr) == -1)
2279 return EINVAL;
2280 if (argp->ex_masklen) {
2281 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2282 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
2283 if (error)
2284 goto out;
2285 if (smask->sa_len > argp->ex_masklen)
2286 smask->sa_len = argp->ex_masklen;
2287 if (smask->sa_family != saddr->sa_family)
2288 return EINVAL;
2289 if (sacheck(smask) == -1)
2290 return EINVAL;
2291 }
2292 i = saddr->sa_family;
2293 if ((rnh = nep->ne_rtable[i]) == 0) {
2294 /*
2295 * Seems silly to initialize every AF when most are not
2296 * used, do so on demand here
2297 */
2298 for (dom = domains; dom; dom = dom->dom_next)
2299 if (dom->dom_family == i && dom->dom_rtattach) {
2300 dom->dom_rtattach((void **)&nep->ne_rtable[i],
2301 dom->dom_rtoffset);
2302 break;
2303 }
2304 if ((rnh = nep->ne_rtable[i]) == 0) {
2305 error = ENOBUFS;
2306 goto out;
2307 }
2308 }
2309
2310 enp = (struct netcred *)(*rnh->rnh_addaddr)(saddr, smask, rnh,
2311 np->netc_rnodes);
2312 if (enp != np) {
2313 if (enp == NULL) {
2314 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
2315 smask, rnh);
2316 if (enp == NULL) {
2317 error = EPERM;
2318 goto out;
2319 }
2320 } else
2321 enp->netc_refcnt++;
2322
2323 goto check;
2324 } else
2325 enp->netc_refcnt = 1;
2326
2327 np->netc_exflags = argp->ex_flags;
2328 crcvt(&np->netc_anon, &argp->ex_anon);
2329 np->netc_anon.cr_ref = 1;
2330 return 0;
2331 check:
2332 if (enp->netc_exflags != argp->ex_flags ||
2333 crcmp(&enp->netc_anon, &argp->ex_anon) != 0)
2334 error = EPERM;
2335 else
2336 error = 0;
2337 out:
2338 free(np, M_NETADDR);
2339 return error;
2340 }
2341
2342 /* ARGSUSED */
2343 static int
2344 vfs_free_netcred(rn, w)
2345 struct radix_node *rn;
2346 void *w;
2347 {
2348 struct radix_node_head *rnh = (struct radix_node_head *)w;
2349 struct netcred *np = (struct netcred *)(void *)rn;
2350
2351 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2352 if (--(np->netc_refcnt) <= 0)
2353 free(np, M_NETADDR);
2354 return (0);
2355 }
2356
2357 /*
2358 * Free the net address hash lists that are hanging off the mount points.
2359 */
2360 static void
2361 vfs_free_addrlist(nep)
2362 struct netexport *nep;
2363 {
2364 int i;
2365 struct radix_node_head *rnh;
2366
2367 for (i = 0; i <= AF_MAX; i++)
2368 if ((rnh = nep->ne_rtable[i]) != NULL) {
2369 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
2370 free((caddr_t)rnh, M_RTABLE);
2371 nep->ne_rtable[i] = 0;
2372 }
2373 }
2374
2375 int
2376 vfs_export(mp, nep, argp)
2377 struct mount *mp;
2378 struct netexport *nep;
2379 struct export_args *argp;
2380 {
2381 int error;
2382
2383 if (argp->ex_flags & MNT_DELEXPORT) {
2384 if (mp->mnt_flag & MNT_EXPUBLIC) {
2385 vfs_setpublicfs(NULL, NULL, NULL);
2386 mp->mnt_flag &= ~MNT_EXPUBLIC;
2387 }
2388 vfs_free_addrlist(nep);
2389 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2390 }
2391 if (argp->ex_flags & MNT_EXPORTED) {
2392 if (argp->ex_flags & MNT_EXPUBLIC) {
2393 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
2394 return (error);
2395 mp->mnt_flag |= MNT_EXPUBLIC;
2396 }
2397 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
2398 return (error);
2399 mp->mnt_flag |= MNT_EXPORTED;
2400 }
2401 return (0);
2402 }
2403
2404 /*
2405 * Set the publicly exported filesystem (WebNFS). Currently, only
2406 * one public filesystem is possible in the spec (RFC 2054 and 2055)
2407 */
2408 int
2409 vfs_setpublicfs(mp, nep, argp)
2410 struct mount *mp;
2411 struct netexport *nep;
2412 struct export_args *argp;
2413 {
2414 int error;
2415 struct vnode *rvp;
2416 char *cp;
2417
2418 /*
2419 * mp == NULL -> invalidate the current info, the FS is
2420 * no longer exported. May be called from either vfs_export
2421 * or unmount, so check if it hasn't already been done.
2422 */
2423 if (mp == NULL) {
2424 if (nfs_pub.np_valid) {
2425 nfs_pub.np_valid = 0;
2426 if (nfs_pub.np_index != NULL) {
2427 FREE(nfs_pub.np_index, M_TEMP);
2428 nfs_pub.np_index = NULL;
2429 }
2430 }
2431 return (0);
2432 }
2433
2434 /*
2435 * Only one allowed at a time.
2436 */
2437 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
2438 return (EBUSY);
2439
2440 /*
2441 * Get real filehandle for root of exported FS.
2442 */
2443 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
2444 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
2445
2446 if ((error = VFS_ROOT(mp, &rvp)))
2447 return (error);
2448
2449 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
2450 return (error);
2451
2452 vput(rvp);
2453
2454 /*
2455 * If an indexfile was specified, pull it in.
2456 */
2457 if (argp->ex_indexfile != NULL) {
2458 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2459 M_WAITOK);
2460 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2461 MAXNAMLEN, (size_t *)0);
2462 if (!error) {
2463 /*
2464 * Check for illegal filenames.
2465 */
2466 for (cp = nfs_pub.np_index; *cp; cp++) {
2467 if (*cp == '/') {
2468 error = EINVAL;
2469 break;
2470 }
2471 }
2472 }
2473 if (error) {
2474 FREE(nfs_pub.np_index, M_TEMP);
2475 return (error);
2476 }
2477 }
2478
2479 nfs_pub.np_mount = mp;
2480 nfs_pub.np_valid = 1;
2481 return (0);
2482 }
2483
2484 struct netcred *
2485 vfs_export_lookup(mp, nep, nam)
2486 struct mount *mp;
2487 struct netexport *nep;
2488 struct mbuf *nam;
2489 {
2490 struct netcred *np;
2491 struct radix_node_head *rnh;
2492 struct sockaddr *saddr;
2493
2494 np = NULL;
2495 if (mp->mnt_flag & MNT_EXPORTED) {
2496 /*
2497 * Lookup in the export list first.
2498 */
2499 if (nam != NULL) {
2500 saddr = mtod(nam, struct sockaddr *);
2501 rnh = nep->ne_rtable[saddr->sa_family];
2502 if (rnh != NULL) {
2503 np = (struct netcred *)
2504 (*rnh->rnh_matchaddr)((caddr_t)saddr,
2505 rnh);
2506 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2507 np = NULL;
2508 }
2509 }
2510 /*
2511 * If no address match, use the default if it exists.
2512 */
2513 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2514 np = &nep->ne_defexported;
2515 }
2516 return (np);
2517 }
2518
2519 /*
2520 * Do the usual access checking.
2521 * file_mode, uid and gid are from the vnode in question,
2522 * while acc_mode and cred are from the VOP_ACCESS parameter list
2523 */
2524 int
2525 vaccess(type, file_mode, uid, gid, acc_mode, cred)
2526 enum vtype type;
2527 mode_t file_mode;
2528 uid_t uid;
2529 gid_t gid;
2530 mode_t acc_mode;
2531 struct ucred *cred;
2532 {
2533 mode_t mask;
2534
2535 /*
2536 * Super-user always gets read/write access, but execute access depends
2537 * on at least one execute bit being set.
2538 */
2539 if (cred->cr_uid == 0) {
2540 if ((acc_mode & VEXEC) && type != VDIR &&
2541 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
2542 return (EACCES);
2543 return (0);
2544 }
2545
2546 mask = 0;
2547
2548 /* Otherwise, check the owner. */
2549 if (cred->cr_uid == uid) {
2550 if (acc_mode & VEXEC)
2551 mask |= S_IXUSR;
2552 if (acc_mode & VREAD)
2553 mask |= S_IRUSR;
2554 if (acc_mode & VWRITE)
2555 mask |= S_IWUSR;
2556 return ((file_mode & mask) == mask ? 0 : EACCES);
2557 }
2558
2559 /* Otherwise, check the groups. */
2560 if (cred->cr_gid == gid || groupmember(gid, cred)) {
2561 if (acc_mode & VEXEC)
2562 mask |= S_IXGRP;
2563 if (acc_mode & VREAD)
2564 mask |= S_IRGRP;
2565 if (acc_mode & VWRITE)
2566 mask |= S_IWGRP;
2567 return ((file_mode & mask) == mask ? 0 : EACCES);
2568 }
2569
2570 /* Otherwise, check everyone else. */
2571 if (acc_mode & VEXEC)
2572 mask |= S_IXOTH;
2573 if (acc_mode & VREAD)
2574 mask |= S_IROTH;
2575 if (acc_mode & VWRITE)
2576 mask |= S_IWOTH;
2577 return ((file_mode & mask) == mask ? 0 : EACCES);
2578 }
2579
2580 /*
2581 * Unmount all file systems.
2582 * We traverse the list in reverse order under the assumption that doing so
2583 * will avoid needing to worry about dependencies.
2584 */
2585 void
2586 vfs_unmountall(p)
2587 struct proc *p;
2588 {
2589 struct mount *mp, *nmp;
2590 int allerror, error;
2591
2592 for (allerror = 0,
2593 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2594 nmp = mp->mnt_list.cqe_prev;
2595 #ifdef DEBUG
2596 printf("unmounting %s (%s)...\n",
2597 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
2598 #endif
2599 /*
2600 * XXX Freeze syncer. Must do this before locking the
2601 * mount point. See dounmount() for details.
2602 */
2603 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
2604 if (vfs_busy(mp, 0, 0)) {
2605 lockmgr(&syncer_lock, LK_RELEASE, NULL);
2606 continue;
2607 }
2608 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
2609 printf("unmount of %s failed with error %d\n",
2610 mp->mnt_stat.f_mntonname, error);
2611 allerror = 1;
2612 }
2613 }
2614 if (allerror)
2615 printf("WARNING: some file systems would not unmount\n");
2616 }
2617
2618 extern struct simplelock bqueue_slock; /* XXX */
2619
2620 /*
2621 * Sync and unmount file systems before shutting down.
2622 */
2623 void
2624 vfs_shutdown()
2625 {
2626 struct lwp *l = curlwp;
2627 struct proc *p;
2628
2629 /* XXX we're certainly not running in proc0's context! */
2630 if (l == NULL || (p = l->l_proc) == NULL)
2631 p = &proc0;
2632
2633 printf("syncing disks... ");
2634
2635 /* remove user process from run queue */
2636 suspendsched();
2637 (void) spl0();
2638
2639 /* avoid coming back this way again if we panic. */
2640 doing_shutdown = 1;
2641
2642 sys_sync(l, NULL, NULL);
2643
2644 /* Wait for sync to finish. */
2645 if (buf_syncwait() != 0) {
2646 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
2647 Debugger();
2648 #endif
2649 printf("giving up\n");
2650 return;
2651 } else
2652 printf("done\n");
2653
2654 /*
2655 * If we've panic'd, don't make the situation potentially
2656 * worse by unmounting the file systems.
2657 */
2658 if (panicstr != NULL)
2659 return;
2660
2661 /* Release inodes held by texts before update. */
2662 #ifdef notdef
2663 vnshutdown();
2664 #endif
2665 /* Unmount file systems. */
2666 vfs_unmountall(p);
2667 }
2668
2669 /*
2670 * Mount the root file system. If the operator didn't specify a
2671 * file system to use, try all possible file systems until one
2672 * succeeds.
2673 */
2674 int
2675 vfs_mountroot()
2676 {
2677 struct vfsops *v;
2678
2679 if (root_device == NULL)
2680 panic("vfs_mountroot: root device unknown");
2681
2682 switch (root_device->dv_class) {
2683 case DV_IFNET:
2684 if (rootdev != NODEV)
2685 panic("vfs_mountroot: rootdev set for DV_IFNET "
2686 "(0x%08x -> %d,%d)", rootdev,
2687 major(rootdev), minor(rootdev));
2688 break;
2689
2690 case DV_DISK:
2691 if (rootdev == NODEV)
2692 panic("vfs_mountroot: rootdev not set for DV_DISK");
2693 break;
2694
2695 default:
2696 printf("%s: inappropriate for root file system\n",
2697 root_device->dv_xname);
2698 return (ENODEV);
2699 }
2700
2701 /*
2702 * If user specified a file system, use it.
2703 */
2704 if (mountroot != NULL)
2705 return ((*mountroot)());
2706
2707 /*
2708 * Try each file system currently configured into the kernel.
2709 */
2710 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2711 if (v->vfs_mountroot == NULL)
2712 continue;
2713 #ifdef DEBUG
2714 aprint_normal("mountroot: trying %s...\n", v->vfs_name);
2715 #endif
2716 if ((*v->vfs_mountroot)() == 0) {
2717 aprint_normal("root file system type: %s\n",
2718 v->vfs_name);
2719 break;
2720 }
2721 }
2722
2723 if (v == NULL) {
2724 printf("no file system for %s", root_device->dv_xname);
2725 if (root_device->dv_class == DV_DISK)
2726 printf(" (dev 0x%x)", rootdev);
2727 printf("\n");
2728 return (EFTYPE);
2729 }
2730 return (0);
2731 }
2732
2733 /*
2734 * Given a file system name, look up the vfsops for that
2735 * file system, or return NULL if file system isn't present
2736 * in the kernel.
2737 */
2738 struct vfsops *
2739 vfs_getopsbyname(name)
2740 const char *name;
2741 {
2742 struct vfsops *v;
2743
2744 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2745 if (strcmp(v->vfs_name, name) == 0)
2746 break;
2747 }
2748
2749 return (v);
2750 }
2751
2752 /*
2753 * Establish a file system and initialize it.
2754 */
2755 int
2756 vfs_attach(vfs)
2757 struct vfsops *vfs;
2758 {
2759 struct vfsops *v;
2760 int error = 0;
2761
2762
2763 /*
2764 * Make sure this file system doesn't already exist.
2765 */
2766 LIST_FOREACH(v, &vfs_list, vfs_list) {
2767 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
2768 error = EEXIST;
2769 goto out;
2770 }
2771 }
2772
2773 /*
2774 * Initialize the vnode operations for this file system.
2775 */
2776 vfs_opv_init(vfs->vfs_opv_descs);
2777
2778 /*
2779 * Now initialize the file system itself.
2780 */
2781 (*vfs->vfs_init)();
2782
2783 /*
2784 * ...and link it into the kernel's list.
2785 */
2786 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
2787
2788 /*
2789 * Sanity: make sure the reference count is 0.
2790 */
2791 vfs->vfs_refcount = 0;
2792
2793 out:
2794 return (error);
2795 }
2796
2797 /*
2798 * Remove a file system from the kernel.
2799 */
2800 int
2801 vfs_detach(vfs)
2802 struct vfsops *vfs;
2803 {
2804 struct vfsops *v;
2805
2806 /*
2807 * Make sure no one is using the filesystem.
2808 */
2809 if (vfs->vfs_refcount != 0)
2810 return (EBUSY);
2811
2812 /*
2813 * ...and remove it from the kernel's list.
2814 */
2815 LIST_FOREACH(v, &vfs_list, vfs_list) {
2816 if (v == vfs) {
2817 LIST_REMOVE(v, vfs_list);
2818 break;
2819 }
2820 }
2821
2822 if (v == NULL)
2823 return (ESRCH);
2824
2825 /*
2826 * Now run the file system-specific cleanups.
2827 */
2828 (*vfs->vfs_done)();
2829
2830 /*
2831 * Free the vnode operations vector.
2832 */
2833 vfs_opv_free(vfs->vfs_opv_descs);
2834 return (0);
2835 }
2836
2837 void
2838 vfs_reinit(void)
2839 {
2840 struct vfsops *vfs;
2841
2842 LIST_FOREACH(vfs, &vfs_list, vfs_list) {
2843 if (vfs->vfs_reinit) {
2844 (*vfs->vfs_reinit)();
2845 }
2846 }
2847 }
2848
2849 /*
2850 * Request a filesystem to suspend write operations.
2851 */
2852 int
2853 vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo)
2854 {
2855 struct proc *p = curproc; /* XXX */
2856 int error;
2857
2858 while ((mp->mnt_iflag & IMNT_SUSPEND)) {
2859 if (slptimeo < 0)
2860 return EWOULDBLOCK;
2861 error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo);
2862 if (error)
2863 return error;
2864 }
2865 mp->mnt_iflag |= IMNT_SUSPEND;
2866
2867 if (mp->mnt_writeopcountupper > 0)
2868 tsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt", 0);
2869
2870 error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
2871 if (error) {
2872 vfs_write_resume(mp);
2873 return error;
2874 }
2875 mp->mnt_iflag |= IMNT_SUSPENDLOW;
2876
2877 if (mp->mnt_writeopcountlower > 0)
2878 tsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt", 0);
2879 mp->mnt_iflag |= IMNT_SUSPENDED;
2880
2881 return 0;
2882 }
2883
2884 /*
2885 * Request a filesystem to resume write operations.
2886 */
2887 void
2888 vfs_write_resume(struct mount *mp)
2889 {
2890
2891 if ((mp->mnt_iflag & IMNT_SUSPEND) == 0)
2892 return;
2893 mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED);
2894 wakeup(&mp->mnt_flag);
2895 }
2896
2897 void
2898 copy_statfs_info(struct statfs *sbp, const struct mount *mp)
2899 {
2900 const struct statfs *mbp;
2901
2902 if (sbp == (mbp = &mp->mnt_stat))
2903 return;
2904
2905 sbp->f_oflags = mbp->f_oflags;
2906 sbp->f_type = mbp->f_type;
2907 (void)memcpy(&sbp->f_fsid, &mbp->f_fsid, sizeof(sbp->f_fsid));
2908 sbp->f_owner = mbp->f_owner;
2909 sbp->f_flags = mbp->f_flags;
2910 sbp->f_syncwrites = mbp->f_syncwrites;
2911 sbp->f_asyncwrites = mbp->f_asyncwrites;
2912 sbp->f_spare[0] = mbp->f_spare[0];
2913 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
2914 sizeof(sbp->f_fstypename));
2915 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
2916 sizeof(sbp->f_mntonname));
2917 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
2918 sizeof(sbp->f_mntfromname));
2919 }
2920
2921 int
2922 set_statfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
2923 struct mount *mp, struct proc *p)
2924 {
2925 int error;
2926 size_t size;
2927 struct statfs *sfs = &mp->mnt_stat;
2928 int (*fun)(const void *, void *, size_t, size_t *);
2929
2930 (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name,
2931 sizeof(mp->mnt_stat.f_fstypename));
2932
2933 if (onp) {
2934 struct cwdinfo *cwdi = p->p_cwdi;
2935 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
2936 if (cwdi->cwdi_rdir != NULL) {
2937 size_t len;
2938 char *bp;
2939 char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
2940
2941 if (!path) /* XXX can't happen with M_WAITOK */
2942 return ENOMEM;
2943
2944 bp = path + MAXPATHLEN;
2945 *--bp = '\0';
2946 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
2947 path, MAXPATHLEN / 2, 0, p);
2948 if (error) {
2949 free(path, M_TEMP);
2950 return error;
2951 }
2952
2953 len = strlen(bp);
2954 if (len > sizeof(sfs->f_mntonname) - 1)
2955 len = sizeof(sfs->f_mntonname) - 1;
2956 (void)strncpy(sfs->f_mntonname, bp, len);
2957 free(path, M_TEMP);
2958
2959 if (len < sizeof(sfs->f_mntonname) - 1) {
2960 error = (*fun)(onp, &sfs->f_mntonname[len],
2961 sizeof(sfs->f_mntonname) - len - 1, &size);
2962 if (error)
2963 return error;
2964 size += len;
2965 } else {
2966 size = len;
2967 }
2968 } else {
2969 error = (*fun)(onp, &sfs->f_mntonname,
2970 sizeof(sfs->f_mntonname) - 1, &size);
2971 if (error)
2972 return error;
2973 }
2974 (void)memset(sfs->f_mntonname + size, 0,
2975 sizeof(sfs->f_mntonname) - size);
2976 }
2977
2978 if (fromp) {
2979 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
2980 error = (*fun)(fromp, sfs->f_mntfromname,
2981 sizeof(sfs->f_mntfromname) - 1, &size);
2982 if (error)
2983 return error;
2984 (void)memset(sfs->f_mntfromname + size, 0,
2985 sizeof(sfs->f_mntfromname) - size);
2986 }
2987 return 0;
2988 }
2989
2990 #ifdef DDB
2991 const char buf_flagbits[] =
2992 "\2\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI"
2993 "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE"
2994 "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED"
2995 "\32XXX\33VFLUSH";
2996
2997 void
2998 vfs_buf_print(bp, full, pr)
2999 struct buf *bp;
3000 int full;
3001 void (*pr)(const char *, ...);
3002 {
3003 char buf[1024];
3004
3005 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" dev 0x%x\n",
3006 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
3007
3008 bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf));
3009 (*pr)(" error %d flags 0x%s\n", bp->b_error, buf);
3010
3011 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
3012 bp->b_bufsize, bp->b_bcount, bp->b_resid);
3013 (*pr)(" data %p saveaddr %p dep %p\n",
3014 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
3015 (*pr)(" iodone %p\n", bp->b_iodone);
3016 }
3017
3018
3019 const char vnode_flagbits[] =
3020 "\2\1ROOT\2TEXT\3SYSTEM\4ISTTY\5EXECMAP"
3021 "\11XLOCK\12XWANT\13BWAIT\14ALIASED"
3022 "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY";
3023
3024 const char * const vnode_tags[] = {
3025 "VT_NON",
3026 "VT_UFS",
3027 "VT_NFS",
3028 "VT_MFS",
3029 "VT_MSDOSFS",
3030 "VT_LFS",
3031 "VT_LOFS",
3032 "VT_FDESC",
3033 "VT_PORTAL",
3034 "VT_NULL",
3035 "VT_UMAP",
3036 "VT_KERNFS",
3037 "VT_PROCFS",
3038 "VT_AFS",
3039 "VT_ISOFS",
3040 "VT_UNION",
3041 "VT_ADOSFS",
3042 "VT_EXT2FS",
3043 "VT_CODA",
3044 "VT_FILECORE",
3045 "VT_NTFS",
3046 "VT_VFS",
3047 "VT_OVERLAY",
3048 "VT_SMBFS"
3049 };
3050
3051 void
3052 vfs_vnode_print(vp, full, pr)
3053 struct vnode *vp;
3054 int full;
3055 void (*pr)(const char *, ...);
3056 {
3057 char buf[256];
3058 const char *vtype, *vtag;
3059
3060 uvm_object_printit(&vp->v_uobj, full, pr);
3061 bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
3062 (*pr)("\nVNODE flags %s\n", buf);
3063 (*pr)("mp %p numoutput %d size 0x%llx\n",
3064 vp->v_mount, vp->v_numoutput, vp->v_size);
3065
3066 (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
3067 vp->v_data, vp->v_usecount, vp->v_writecount,
3068 vp->v_holdcnt, vp->v_numoutput);
3069
3070 vtype = (vp->v_type >= 0 &&
3071 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ?
3072 vnode_types[vp->v_type] : "UNKNOWN";
3073 vtag = (vp->v_tag >= 0 &&
3074 vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ?
3075 vnode_tags[vp->v_tag] : "UNKNOWN";
3076
3077 (*pr)("type %s(%d) tag %s(%d) mount %p typedata %p\n",
3078 vtype, vp->v_type, vtag, vp->v_tag,
3079 vp->v_mount, vp->v_mountedhere);
3080
3081 if (full) {
3082 struct buf *bp;
3083
3084 (*pr)("clean bufs:\n");
3085 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
3086 (*pr)(" bp %p\n", bp);
3087 vfs_buf_print(bp, full, pr);
3088 }
3089
3090 (*pr)("dirty bufs:\n");
3091 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
3092 (*pr)(" bp %p\n", bp);
3093 vfs_buf_print(bp, full, pr);
3094 }
3095 }
3096 }
3097
3098 void
3099 vfs_mount_print(mp, full, pr)
3100 struct mount *mp;
3101 int full;
3102 void (*pr)(const char *, ...);
3103 {
3104 char sbuf[256];
3105
3106 (*pr)("vnodecovered = %p syncer = %p data = %p\n",
3107 mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
3108
3109 (*pr)("fs_bshift %d dev_bshift = %d maxsymlinklen = %d\n",
3110 mp->mnt_fs_bshift,mp->mnt_dev_bshift,mp->mnt_maxsymlinklen);
3111
3112 bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf));
3113 (*pr)("flag = %s\n", sbuf);
3114
3115 bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf));
3116 (*pr)("iflag = %s\n", sbuf);
3117
3118 /* XXX use lockmgr_printinfo */
3119 if (mp->mnt_lock.lk_sharecount)
3120 (*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg,
3121 mp->mnt_lock.lk_sharecount);
3122 else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) {
3123 (*pr)(" lock type %s: EXCL (count %d) by ",
3124 mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount);
3125 if (mp->mnt_lock.lk_flags & LK_SPIN)
3126 (*pr)("processor %lu", mp->mnt_lock.lk_cpu);
3127 else
3128 (*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder,
3129 mp->mnt_lock.lk_locklwp);
3130 } else
3131 (*pr)(" not locked");
3132 if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0)
3133 (*pr)(" with %d pending", mp->mnt_lock.lk_waitcount);
3134
3135 (*pr)("\n");
3136
3137 if (mp->mnt_unmounter) {
3138 (*pr)("unmounter pid = %d ",mp->mnt_unmounter->p_pid);
3139 }
3140 (*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n",
3141 mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower);
3142
3143 (*pr)("statfs cache:\n");
3144 (*pr)("\ttype = %d\n",mp->mnt_stat.f_type);
3145 (*pr)("\toflags = 0x%04x\n",mp->mnt_stat.f_oflags);
3146 (*pr)("\tbsize = %d\n",mp->mnt_stat.f_bsize);
3147 (*pr)("\tiosize = %d\n",mp->mnt_stat.f_iosize);
3148 (*pr)("\tblocks = %d\n",mp->mnt_stat.f_blocks);
3149 (*pr)("\tbfree = %d\n",mp->mnt_stat.f_bfree);
3150 (*pr)("\tbavail = %d\n",mp->mnt_stat.f_bavail);
3151 (*pr)("\tfiles = %d\n",mp->mnt_stat.f_files);
3152 (*pr)("\tffree = %d\n",mp->mnt_stat.f_ffree);
3153 (*pr)("\tf_fsid = { 0x%"PRIx32", 0x%"PRIx32" }\n",
3154 mp->mnt_stat.f_fsid.val[0],mp->mnt_stat.f_fsid.val[1]);
3155 (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
3156 bitmask_snprintf(mp->mnt_stat.f_flags, __MNT_FLAG_BITS, sbuf, sizeof(sbuf));
3157 (*pr)("\tflags = %s\n",sbuf);
3158 (*pr)("\tsyncwrites = %d\n",mp->mnt_stat.f_syncwrites);
3159 (*pr)("\tasyncwrites = %d\n",mp->mnt_stat.f_asyncwrites);
3160 (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
3161 (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
3162 (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
3163
3164 {
3165 int cnt = 0;
3166 struct vnode *vp;
3167 (*pr)("locked vnodes =");
3168 /* XXX would take mountlist lock, except ddb may not have context */
3169 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3170 if (VOP_ISLOCKED(vp)) {
3171 if ((++cnt % 6) == 0) {
3172 (*pr)(" %p,\n\t", vp);
3173 } else {
3174 (*pr)(" %p,", vp);
3175 }
3176 }
3177 }
3178 (*pr)("\n");
3179 }
3180
3181 if (full) {
3182 int cnt = 0;
3183 struct vnode *vp;
3184 (*pr)("all vnodes =");
3185 /* XXX would take mountlist lock, except ddb may not have context */
3186 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3187 if (!LIST_NEXT(vp, v_mntvnodes)) {
3188 (*pr)(" %p", vp);
3189 } else if ((++cnt % 6) == 0) {
3190 (*pr)(" %p,\n\t", vp);
3191 } else {
3192 (*pr)(" %p,", vp);
3193 }
3194 }
3195 (*pr)("\n", vp);
3196 }
3197 }
3198
3199 #endif
Cache object: 156f814c972efc8a5a706f3172e6c08c
|