FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_subr.c
1 /* $NetBSD: vfs_subr.c,v 1.276.2.3 2007/08/01 14:45:47 liamjfoy Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 2004, 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 * This code is derived from software contributed to The NetBSD Foundation
11 * by Charles M. Hannum.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by the NetBSD
24 * Foundation, Inc. and its contributors.
25 * 4. Neither the name of The NetBSD Foundation nor the names of its
26 * contributors may be used to endorse or promote products derived
27 * from this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
30 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
31 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
32 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
33 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42 /*
43 * Copyright (c) 1989, 1993
44 * The Regents of the University of California. All rights reserved.
45 * (c) UNIX System Laboratories, Inc.
46 * All or some portions of this file are derived from material licensed
47 * to the University of California by American Telephone and Telegraph
48 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
49 * the permission of UNIX System Laboratories, Inc.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions
53 * are met:
54 * 1. Redistributions of source code must retain the above copyright
55 * notice, this list of conditions and the following disclaimer.
56 * 2. Redistributions in binary form must reproduce the above copyright
57 * notice, this list of conditions and the following disclaimer in the
58 * documentation and/or other materials provided with the distribution.
59 * 3. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
76 */
77
78 /*
79 * External virtual filesystem routines
80 */
81
82 #include <sys/cdefs.h>
83 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.276.2.3 2007/08/01 14:45:47 liamjfoy Exp $");
84
85 #include "opt_inet.h"
86 #include "opt_ddb.h"
87 #include "opt_compat_netbsd.h"
88 #include "opt_compat_43.h"
89
90 #include <sys/param.h>
91 #include <sys/systm.h>
92 #include <sys/proc.h>
93 #include <sys/kernel.h>
94 #include <sys/mount.h>
95 #include <sys/fcntl.h>
96 #include <sys/vnode.h>
97 #include <sys/stat.h>
98 #include <sys/namei.h>
99 #include <sys/ucred.h>
100 #include <sys/buf.h>
101 #include <sys/errno.h>
102 #include <sys/malloc.h>
103 #include <sys/domain.h>
104 #include <sys/mbuf.h>
105 #include <sys/sa.h>
106 #include <sys/syscallargs.h>
107 #include <sys/device.h>
108 #include <sys/filedesc.h>
109 #include <sys/kauth.h>
110
111 #include <miscfs/specfs/specdev.h>
112 #include <miscfs/genfs/genfs.h>
113 #include <miscfs/syncfs/syncfs.h>
114
115 #include <uvm/uvm.h>
116 #include <uvm/uvm_readahead.h>
117 #include <uvm/uvm_ddb.h>
118
119 #include <sys/sysctl.h>
120
121 const enum vtype iftovt_tab[16] = {
122 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
123 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
124 };
125 const int vttoif_tab[9] = {
126 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
127 S_IFSOCK, S_IFIFO, S_IFMT,
128 };
129
130 int doforce = 1; /* 1 => permit forcible unmounting */
131 int prtactive = 0; /* 1 => print out reclaim of active vnodes */
132
133 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */
134 extern int vfs_magiclinks; /* 1 => expand "magic" symlinks */
135
136 /*
137 * Insq/Remq for the vnode usage lists.
138 */
139 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
140 #define bufremvn(bp) { \
141 LIST_REMOVE(bp, b_vnbufs); \
142 (bp)->b_vnbufs.le_next = NOLIST; \
143 }
144 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */
145 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
146 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
147
148 struct mntlist mountlist = /* mounted filesystem list */
149 CIRCLEQ_HEAD_INITIALIZER(mountlist);
150 struct vfs_list_head vfs_list = /* vfs list */
151 LIST_HEAD_INITIALIZER(vfs_list);
152
153 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
154 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
155 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
156 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
157 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
158
159 /* XXX - gross; single global lock to protect v_numoutput */
160 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER;
161
162 /*
163 * These define the root filesystem and device.
164 */
165 struct mount *rootfs;
166 struct vnode *rootvnode;
167 struct device *root_device; /* root device */
168
169 POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
170 &pool_allocator_nointr);
171
172 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
173
174 /*
175 * Local declarations.
176 */
177
178 static specificdata_domain_t mount_specificdata_domain;
179
180 static void insmntque(struct vnode *, struct mount *);
181 static int getdevvp(dev_t, struct vnode **, enum vtype);
182 static void vclean(struct vnode *, int, struct lwp *);
183 static struct vnode *getcleanvnode(struct lwp *);
184
185 #ifdef DEBUG
186 void printlockedvnodes(void);
187 #endif
188
189 /*
190 * Initialize the vnode management data structures.
191 */
192 void
193 vntblinit(void)
194 {
195
196 mount_specificdata_domain = specificdata_domain_create();
197
198 /*
199 * Initialize the filesystem syncer.
200 */
201 vn_initialize_syncerd();
202 }
203
204 int
205 vfs_drainvnodes(long target, struct lwp *l)
206 {
207
208 simple_lock(&vnode_free_list_slock);
209 while (numvnodes > target) {
210 struct vnode *vp;
211
212 vp = getcleanvnode(l);
213 if (vp == NULL)
214 return EBUSY; /* give up */
215 pool_put(&vnode_pool, vp);
216 simple_lock(&vnode_free_list_slock);
217 numvnodes--;
218 }
219 simple_unlock(&vnode_free_list_slock);
220
221 return 0;
222 }
223
224 /*
225 * grab a vnode from freelist and clean it.
226 */
227 struct vnode *
228 getcleanvnode(struct lwp *l)
229 {
230 struct vnode *vp;
231 struct mount *mp;
232 struct freelst *listhd;
233
234 LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock));
235
236 listhd = &vnode_free_list;
237 try_nextlist:
238 TAILQ_FOREACH(vp, listhd, v_freelist) {
239 if (!simple_lock_try(&vp->v_interlock))
240 continue;
241 /*
242 * as our lwp might hold the underlying vnode locked,
243 * don't try to reclaim the VLAYER vnode if it's locked.
244 */
245 if ((vp->v_flag & VXLOCK) == 0 &&
246 ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
247 if (vn_start_write(vp, &mp, V_NOWAIT) == 0)
248 break;
249 }
250 mp = NULL;
251 simple_unlock(&vp->v_interlock);
252 }
253
254 if (vp == NULLVP) {
255 if (listhd == &vnode_free_list) {
256 listhd = &vnode_hold_list;
257 goto try_nextlist;
258 }
259 simple_unlock(&vnode_free_list_slock);
260 return NULLVP;
261 }
262
263 if (vp->v_usecount)
264 panic("free vnode isn't, vp %p", vp);
265 TAILQ_REMOVE(listhd, vp, v_freelist);
266 /* see comment on why 0xdeadb is set at end of vgone (below) */
267 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
268 simple_unlock(&vnode_free_list_slock);
269 vp->v_lease = NULL;
270
271 if (vp->v_type != VBAD)
272 vgonel(vp, l);
273 else
274 simple_unlock(&vp->v_interlock);
275 vn_finished_write(mp, 0);
276 #ifdef DIAGNOSTIC
277 if (vp->v_data || vp->v_uobj.uo_npages ||
278 TAILQ_FIRST(&vp->v_uobj.memq))
279 panic("cleaned vnode isn't, vp %p", vp);
280 if (vp->v_numoutput)
281 panic("clean vnode has pending I/O's, vp %p", vp);
282 #endif
283 KASSERT((vp->v_flag & VONWORKLST) == 0);
284
285 return vp;
286 }
287
288 /*
289 * Mark a mount point as busy. Used to synchronize access and to delay
290 * unmounting. Interlock is not released on failure.
291 */
292 int
293 vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp)
294 {
295 int lkflags;
296
297 while (mp->mnt_iflag & IMNT_UNMOUNT) {
298 int gone, n;
299
300 if (flags & LK_NOWAIT)
301 return (ENOENT);
302 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
303 && mp->mnt_unmounter == curlwp)
304 return (EDEADLK);
305 if (interlkp)
306 simple_unlock(interlkp);
307 /*
308 * Since all busy locks are shared except the exclusive
309 * lock granted when unmounting, the only place that a
310 * wakeup needs to be done is at the release of the
311 * exclusive lock at the end of dounmount.
312 */
313 simple_lock(&mp->mnt_slock);
314 mp->mnt_wcnt++;
315 ltsleep((caddr_t)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock);
316 n = --mp->mnt_wcnt;
317 simple_unlock(&mp->mnt_slock);
318 gone = mp->mnt_iflag & IMNT_GONE;
319
320 if (n == 0)
321 wakeup(&mp->mnt_wcnt);
322 if (interlkp)
323 simple_lock(interlkp);
324 if (gone)
325 return (ENOENT);
326 }
327 lkflags = LK_SHARED;
328 if (interlkp)
329 lkflags |= LK_INTERLOCK;
330 if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
331 panic("vfs_busy: unexpected lock failure");
332 return (0);
333 }
334
335 /*
336 * Free a busy filesystem.
337 */
338 void
339 vfs_unbusy(struct mount *mp)
340 {
341
342 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
343 }
344
345 /*
346 * Lookup a filesystem type, and if found allocate and initialize
347 * a mount structure for it.
348 *
349 * Devname is usually updated by mount(8) after booting.
350 */
351 int
352 vfs_rootmountalloc(const char *fstypename, const char *devname,
353 struct mount **mpp)
354 {
355 struct vfsops *vfsp = NULL;
356 struct mount *mp;
357
358 LIST_FOREACH(vfsp, &vfs_list, vfs_list)
359 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
360 break;
361
362 if (vfsp == NULL)
363 return (ENODEV);
364 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
365 memset((char *)mp, 0, (u_long)sizeof(struct mount));
366 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
367 simple_lock_init(&mp->mnt_slock);
368 (void)vfs_busy(mp, LK_NOWAIT, 0);
369 TAILQ_INIT(&mp->mnt_vnodelist);
370 mp->mnt_op = vfsp;
371 mp->mnt_flag = MNT_RDONLY;
372 mp->mnt_vnodecovered = NULLVP;
373 mp->mnt_leaf = mp;
374 vfsp->vfs_refcount++;
375 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
376 mp->mnt_stat.f_mntonname[0] = '/';
377 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
378 mount_initspecific(mp);
379 *mpp = mp;
380 return (0);
381 }
382
383 /*
384 * Lookup a mount point by filesystem identifier.
385 */
386 struct mount *
387 vfs_getvfs(fsid_t *fsid)
388 {
389 struct mount *mp;
390
391 simple_lock(&mountlist_slock);
392 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
393 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
394 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
395 simple_unlock(&mountlist_slock);
396 return (mp);
397 }
398 }
399 simple_unlock(&mountlist_slock);
400 return ((struct mount *)0);
401 }
402
403 /*
404 * Get a new unique fsid
405 */
406 void
407 vfs_getnewfsid(struct mount *mp)
408 {
409 static u_short xxxfs_mntid;
410 fsid_t tfsid;
411 int mtype;
412
413 simple_lock(&mntid_slock);
414 mtype = makefstype(mp->mnt_op->vfs_name);
415 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
416 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
417 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
418 if (xxxfs_mntid == 0)
419 ++xxxfs_mntid;
420 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
421 tfsid.__fsid_val[1] = mtype;
422 if (!CIRCLEQ_EMPTY(&mountlist)) {
423 while (vfs_getvfs(&tfsid)) {
424 tfsid.__fsid_val[0]++;
425 xxxfs_mntid++;
426 }
427 }
428 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
429 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
430 simple_unlock(&mntid_slock);
431 }
432
433 /*
434 * Make a 'unique' number from a mount type name.
435 */
436 long
437 makefstype(const char *type)
438 {
439 long rv;
440
441 for (rv = 0; *type; type++) {
442 rv <<= 2;
443 rv ^= *type;
444 }
445 return rv;
446 }
447
448
449 /*
450 * Set vnode attributes to VNOVAL
451 */
452 void
453 vattr_null(struct vattr *vap)
454 {
455
456 vap->va_type = VNON;
457
458 /*
459 * Assign individually so that it is safe even if size and
460 * sign of each member are varied.
461 */
462 vap->va_mode = VNOVAL;
463 vap->va_nlink = VNOVAL;
464 vap->va_uid = VNOVAL;
465 vap->va_gid = VNOVAL;
466 vap->va_fsid = VNOVAL;
467 vap->va_fileid = VNOVAL;
468 vap->va_size = VNOVAL;
469 vap->va_blocksize = VNOVAL;
470 vap->va_atime.tv_sec =
471 vap->va_mtime.tv_sec =
472 vap->va_ctime.tv_sec =
473 vap->va_birthtime.tv_sec = VNOVAL;
474 vap->va_atime.tv_nsec =
475 vap->va_mtime.tv_nsec =
476 vap->va_ctime.tv_nsec =
477 vap->va_birthtime.tv_nsec = VNOVAL;
478 vap->va_gen = VNOVAL;
479 vap->va_flags = VNOVAL;
480 vap->va_rdev = VNOVAL;
481 vap->va_bytes = VNOVAL;
482 vap->va_vaflags = 0;
483 }
484
485 /*
486 * Routines having to do with the management of the vnode table.
487 */
488 extern int (**dead_vnodeop_p)(void *);
489 long numvnodes;
490
491 /*
492 * Return the next vnode from the free list.
493 */
494 int
495 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
496 struct vnode **vpp)
497 {
498 extern struct uvm_pagerops uvm_vnodeops;
499 struct uvm_object *uobj;
500 struct lwp *l = curlwp; /* XXX */
501 static int toggle;
502 struct vnode *vp;
503 int error = 0, tryalloc;
504
505 try_again:
506 if (mp) {
507 /*
508 * Mark filesystem busy while we're creating a vnode.
509 * If unmount is in progress, this will wait; if the
510 * unmount succeeds (only if umount -f), this will
511 * return an error. If the unmount fails, we'll keep
512 * going afterwards.
513 * (This puts the per-mount vnode list logically under
514 * the protection of the vfs_busy lock).
515 */
516 error = vfs_busy(mp, LK_RECURSEFAIL, 0);
517 if (error && error != EDEADLK)
518 return error;
519 }
520
521 /*
522 * We must choose whether to allocate a new vnode or recycle an
523 * existing one. The criterion for allocating a new one is that
524 * the total number of vnodes is less than the number desired or
525 * there are no vnodes on either free list. Generally we only
526 * want to recycle vnodes that have no buffers associated with
527 * them, so we look first on the vnode_free_list. If it is empty,
528 * we next consider vnodes with referencing buffers on the
529 * vnode_hold_list. The toggle ensures that half the time we
530 * will use a buffer from the vnode_hold_list, and half the time
531 * we will allocate a new one unless the list has grown to twice
532 * the desired size. We are reticent to recycle vnodes from the
533 * vnode_hold_list because we will lose the identity of all its
534 * referencing buffers.
535 */
536
537 vp = NULL;
538
539 simple_lock(&vnode_free_list_slock);
540
541 toggle ^= 1;
542 if (numvnodes > 2 * desiredvnodes)
543 toggle = 0;
544
545 tryalloc = numvnodes < desiredvnodes ||
546 (TAILQ_FIRST(&vnode_free_list) == NULL &&
547 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
548
549 if (tryalloc &&
550 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
551 numvnodes++;
552 simple_unlock(&vnode_free_list_slock);
553 memset(vp, 0, sizeof(*vp));
554 UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 1);
555 /*
556 * done by memset() above.
557 * LIST_INIT(&vp->v_nclist);
558 * LIST_INIT(&vp->v_dnclist);
559 */
560 } else {
561 vp = getcleanvnode(l);
562 /*
563 * Unless this is a bad time of the month, at most
564 * the first NCPUS items on the free list are
565 * locked, so this is close enough to being empty.
566 */
567 if (vp == NULLVP) {
568 if (mp && error != EDEADLK)
569 vfs_unbusy(mp);
570 if (tryalloc) {
571 printf("WARNING: unable to allocate new "
572 "vnode, retrying...\n");
573 (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
574 goto try_again;
575 }
576 tablefull("vnode", "increase kern.maxvnodes or NVNODE");
577 *vpp = 0;
578 return (ENFILE);
579 }
580 vp->v_usecount = 1;
581 vp->v_flag = 0;
582 vp->v_socket = NULL;
583 }
584 vp->v_type = VNON;
585 vp->v_vnlock = &vp->v_lock;
586 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
587 KASSERT(LIST_EMPTY(&vp->v_nclist));
588 KASSERT(LIST_EMPTY(&vp->v_dnclist));
589 vp->v_tag = tag;
590 vp->v_op = vops;
591 insmntque(vp, mp);
592 *vpp = vp;
593 vp->v_data = 0;
594 simple_lock_init(&vp->v_interlock);
595
596 /*
597 * initialize uvm_object within vnode.
598 */
599
600 uobj = &vp->v_uobj;
601 KASSERT(uobj->pgops == &uvm_vnodeops);
602 KASSERT(uobj->uo_npages == 0);
603 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
604 vp->v_size = VSIZENOTSET;
605
606 if (mp && error != EDEADLK)
607 vfs_unbusy(mp);
608 return (0);
609 }
610
611 /*
612 * This is really just the reverse of getnewvnode(). Needed for
613 * VFS_VGET functions who may need to push back a vnode in case
614 * of a locking race.
615 */
616 void
617 ungetnewvnode(struct vnode *vp)
618 {
619 #ifdef DIAGNOSTIC
620 if (vp->v_usecount != 1)
621 panic("ungetnewvnode: busy vnode");
622 #endif
623 vp->v_usecount--;
624 insmntque(vp, NULL);
625 vp->v_type = VBAD;
626
627 simple_lock(&vp->v_interlock);
628 /*
629 * Insert at head of LRU list
630 */
631 simple_lock(&vnode_free_list_slock);
632 if (vp->v_holdcnt > 0)
633 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
634 else
635 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
636 simple_unlock(&vnode_free_list_slock);
637 simple_unlock(&vp->v_interlock);
638 }
639
640 /*
641 * Move a vnode from one mount queue to another.
642 */
643 static void
644 insmntque(struct vnode *vp, struct mount *mp)
645 {
646
647 #ifdef DIAGNOSTIC
648 if ((mp != NULL) &&
649 (mp->mnt_iflag & IMNT_UNMOUNT) &&
650 !(mp->mnt_flag & MNT_SOFTDEP) &&
651 vp->v_tag != VT_VFS) {
652 panic("insmntque into dying filesystem");
653 }
654 #endif
655
656 simple_lock(&mntvnode_slock);
657 /*
658 * Delete from old mount point vnode list, if on one.
659 */
660 if (vp->v_mount != NULL)
661 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
662 /*
663 * Insert into list of vnodes for the new mount point, if available.
664 */
665 if ((vp->v_mount = mp) != NULL) {
666 if (TAILQ_EMPTY(&mp->mnt_vnodelist)) {
667 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
668 } else {
669 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
670 }
671 }
672 simple_unlock(&mntvnode_slock);
673 }
674
675 /*
676 * Update outstanding I/O count and do wakeup if requested.
677 */
678 void
679 vwakeup(struct buf *bp)
680 {
681 struct vnode *vp;
682
683 if ((vp = bp->b_vp) != NULL) {
684 /* XXX global lock hack
685 * can't use v_interlock here since this is called
686 * in interrupt context from biodone().
687 */
688 simple_lock(&global_v_numoutput_slock);
689 if (--vp->v_numoutput < 0)
690 panic("vwakeup: neg numoutput, vp %p", vp);
691 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
692 vp->v_flag &= ~VBWAIT;
693 wakeup((caddr_t)&vp->v_numoutput);
694 }
695 simple_unlock(&global_v_numoutput_slock);
696 }
697 }
698
699 /*
700 * Flush out and invalidate all buffers associated with a vnode.
701 * Called with the underlying vnode locked, which should prevent new dirty
702 * buffers from being queued.
703 */
704 int
705 vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
706 int slpflag, int slptimeo)
707 {
708 struct buf *bp, *nbp;
709 int s, error;
710 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
711 (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
712
713 /* XXXUBC this doesn't look at flags or slp* */
714 simple_lock(&vp->v_interlock);
715 error = VOP_PUTPAGES(vp, 0, 0, flushflags);
716 if (error) {
717 return error;
718 }
719
720 if (flags & V_SAVE) {
721 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, l);
722 if (error)
723 return (error);
724 #ifdef DIAGNOSTIC
725 s = splbio();
726 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
727 panic("vinvalbuf: dirty bufs, vp %p", vp);
728 splx(s);
729 #endif
730 }
731
732 s = splbio();
733
734 restart:
735 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
736 nbp = LIST_NEXT(bp, b_vnbufs);
737 simple_lock(&bp->b_interlock);
738 if (bp->b_flags & B_BUSY) {
739 bp->b_flags |= B_WANTED;
740 error = ltsleep((caddr_t)bp,
741 slpflag | (PRIBIO + 1) | PNORELOCK,
742 "vinvalbuf", slptimeo, &bp->b_interlock);
743 if (error) {
744 splx(s);
745 return (error);
746 }
747 goto restart;
748 }
749 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
750 simple_unlock(&bp->b_interlock);
751 brelse(bp);
752 }
753
754 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
755 nbp = LIST_NEXT(bp, b_vnbufs);
756 simple_lock(&bp->b_interlock);
757 if (bp->b_flags & B_BUSY) {
758 bp->b_flags |= B_WANTED;
759 error = ltsleep((caddr_t)bp,
760 slpflag | (PRIBIO + 1) | PNORELOCK,
761 "vinvalbuf", slptimeo, &bp->b_interlock);
762 if (error) {
763 splx(s);
764 return (error);
765 }
766 goto restart;
767 }
768 /*
769 * XXX Since there are no node locks for NFS, I believe
770 * there is a slight chance that a delayed write will
771 * occur while sleeping just above, so check for it.
772 */
773 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
774 #ifdef DEBUG
775 printf("buffer still DELWRI\n");
776 #endif
777 bp->b_flags |= B_BUSY | B_VFLUSH;
778 simple_unlock(&bp->b_interlock);
779 VOP_BWRITE(bp);
780 goto restart;
781 }
782 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
783 simple_unlock(&bp->b_interlock);
784 brelse(bp);
785 }
786
787 #ifdef DIAGNOSTIC
788 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
789 panic("vinvalbuf: flush failed, vp %p", vp);
790 #endif
791
792 splx(s);
793
794 return (0);
795 }
796
797 /*
798 * Destroy any in core blocks past the truncation length.
799 * Called with the underlying vnode locked, which should prevent new dirty
800 * buffers from being queued.
801 */
802 int
803 vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo)
804 {
805 struct buf *bp, *nbp;
806 int s, error;
807 voff_t off;
808
809 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
810 simple_lock(&vp->v_interlock);
811 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
812 if (error) {
813 return error;
814 }
815
816 s = splbio();
817
818 restart:
819 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
820 nbp = LIST_NEXT(bp, b_vnbufs);
821 if (bp->b_lblkno < lbn)
822 continue;
823 simple_lock(&bp->b_interlock);
824 if (bp->b_flags & B_BUSY) {
825 bp->b_flags |= B_WANTED;
826 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
827 "vtruncbuf", slptimeo, &bp->b_interlock);
828 if (error) {
829 splx(s);
830 return (error);
831 }
832 goto restart;
833 }
834 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
835 simple_unlock(&bp->b_interlock);
836 brelse(bp);
837 }
838
839 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
840 nbp = LIST_NEXT(bp, b_vnbufs);
841 if (bp->b_lblkno < lbn)
842 continue;
843 simple_lock(&bp->b_interlock);
844 if (bp->b_flags & B_BUSY) {
845 bp->b_flags |= B_WANTED;
846 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
847 "vtruncbuf", slptimeo, &bp->b_interlock);
848 if (error) {
849 splx(s);
850 return (error);
851 }
852 goto restart;
853 }
854 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
855 simple_unlock(&bp->b_interlock);
856 brelse(bp);
857 }
858
859 splx(s);
860
861 return (0);
862 }
863
864 void
865 vflushbuf(struct vnode *vp, int sync)
866 {
867 struct buf *bp, *nbp;
868 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
869 int s;
870
871 simple_lock(&vp->v_interlock);
872 (void) VOP_PUTPAGES(vp, 0, 0, flags);
873
874 loop:
875 s = splbio();
876 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
877 nbp = LIST_NEXT(bp, b_vnbufs);
878 simple_lock(&bp->b_interlock);
879 if ((bp->b_flags & B_BUSY)) {
880 simple_unlock(&bp->b_interlock);
881 continue;
882 }
883 if ((bp->b_flags & B_DELWRI) == 0)
884 panic("vflushbuf: not dirty, bp %p", bp);
885 bp->b_flags |= B_BUSY | B_VFLUSH;
886 simple_unlock(&bp->b_interlock);
887 splx(s);
888 /*
889 * Wait for I/O associated with indirect blocks to complete,
890 * since there is no way to quickly wait for them below.
891 */
892 if (bp->b_vp == vp || sync == 0)
893 (void) bawrite(bp);
894 else
895 (void) bwrite(bp);
896 goto loop;
897 }
898 if (sync == 0) {
899 splx(s);
900 return;
901 }
902 simple_lock(&global_v_numoutput_slock);
903 while (vp->v_numoutput) {
904 vp->v_flag |= VBWAIT;
905 ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0,
906 &global_v_numoutput_slock);
907 }
908 simple_unlock(&global_v_numoutput_slock);
909 splx(s);
910 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
911 vprint("vflushbuf: dirty", vp);
912 goto loop;
913 }
914 }
915
916 /*
917 * Associate a buffer with a vnode.
918 */
919 void
920 bgetvp(struct vnode *vp, struct buf *bp)
921 {
922 int s;
923
924 if (bp->b_vp)
925 panic("bgetvp: not free, bp %p", bp);
926 VHOLD(vp);
927 s = splbio();
928 bp->b_vp = vp;
929 if (vp->v_type == VBLK || vp->v_type == VCHR)
930 bp->b_dev = vp->v_rdev;
931 else
932 bp->b_dev = NODEV;
933 /*
934 * Insert onto list for new vnode.
935 */
936 bufinsvn(bp, &vp->v_cleanblkhd);
937 splx(s);
938 }
939
940 /*
941 * Disassociate a buffer from a vnode.
942 */
943 void
944 brelvp(struct buf *bp)
945 {
946 struct vnode *vp;
947 int s;
948
949 if (bp->b_vp == NULL)
950 panic("brelvp: vp NULL, bp %p", bp);
951
952 s = splbio();
953 vp = bp->b_vp;
954 /*
955 * Delete from old vnode list, if on one.
956 */
957 if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
958 bufremvn(bp);
959
960 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) &&
961 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
962 vp->v_flag &= ~VWRITEMAPDIRTY;
963 vn_syncer_remove_from_worklist(vp);
964 }
965
966 bp->b_vp = NULL;
967 HOLDRELE(vp);
968 splx(s);
969 }
970
971 /*
972 * Reassign a buffer from one vnode to another.
973 * Used to assign file specific control information
974 * (indirect blocks) to the vnode to which they belong.
975 *
976 * This function must be called at splbio().
977 */
978 void
979 reassignbuf(struct buf *bp, struct vnode *newvp)
980 {
981 struct buflists *listheadp;
982 int delayx;
983
984 /*
985 * Delete from old vnode list, if on one.
986 */
987 if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
988 bufremvn(bp);
989 /*
990 * If dirty, put on list of dirty buffers;
991 * otherwise insert onto list of clean buffers.
992 */
993 if ((bp->b_flags & B_DELWRI) == 0) {
994 listheadp = &newvp->v_cleanblkhd;
995 if (TAILQ_EMPTY(&newvp->v_uobj.memq) &&
996 (newvp->v_flag & VONWORKLST) &&
997 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
998 newvp->v_flag &= ~VWRITEMAPDIRTY;
999 vn_syncer_remove_from_worklist(newvp);
1000 }
1001 } else {
1002 listheadp = &newvp->v_dirtyblkhd;
1003 if ((newvp->v_flag & VONWORKLST) == 0) {
1004 switch (newvp->v_type) {
1005 case VDIR:
1006 delayx = dirdelay;
1007 break;
1008 case VBLK:
1009 if (newvp->v_specmountpoint != NULL) {
1010 delayx = metadelay;
1011 break;
1012 }
1013 /* fall through */
1014 default:
1015 delayx = filedelay;
1016 break;
1017 }
1018 if (!newvp->v_mount ||
1019 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
1020 vn_syncer_add_to_worklist(newvp, delayx);
1021 }
1022 }
1023 bufinsvn(bp, listheadp);
1024 }
1025
1026 /*
1027 * Create a vnode for a block device.
1028 * Used for root filesystem and swap areas.
1029 * Also used for memory file system special devices.
1030 */
1031 int
1032 bdevvp(dev_t dev, struct vnode **vpp)
1033 {
1034
1035 return (getdevvp(dev, vpp, VBLK));
1036 }
1037
1038 /*
1039 * Create a vnode for a character device.
1040 * Used for kernfs and some console handling.
1041 */
1042 int
1043 cdevvp(dev_t dev, struct vnode **vpp)
1044 {
1045
1046 return (getdevvp(dev, vpp, VCHR));
1047 }
1048
1049 /*
1050 * Create a vnode for a device.
1051 * Used by bdevvp (block device) for root file system etc.,
1052 * and by cdevvp (character device) for console and kernfs.
1053 */
1054 static int
1055 getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
1056 {
1057 struct vnode *vp;
1058 struct vnode *nvp;
1059 int error;
1060
1061 if (dev == NODEV) {
1062 *vpp = NULLVP;
1063 return (0);
1064 }
1065 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1066 if (error) {
1067 *vpp = NULLVP;
1068 return (error);
1069 }
1070 vp = nvp;
1071 vp->v_type = type;
1072 if ((nvp = checkalias(vp, dev, NULL)) != 0) {
1073 vput(vp);
1074 vp = nvp;
1075 }
1076 *vpp = vp;
1077 return (0);
1078 }
1079
1080 /*
1081 * Check to see if the new vnode represents a special device
1082 * for which we already have a vnode (either because of
1083 * bdevvp() or because of a different vnode representing
1084 * the same block device). If such an alias exists, deallocate
1085 * the existing contents and return the aliased vnode. The
1086 * caller is responsible for filling it with its new contents.
1087 */
1088 struct vnode *
1089 checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
1090 {
1091 struct lwp *l = curlwp; /* XXX */
1092 struct vnode *vp;
1093 struct vnode **vpp;
1094
1095 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
1096 return (NULLVP);
1097
1098 vpp = &speclisth[SPECHASH(nvp_rdev)];
1099 loop:
1100 simple_lock(&spechash_slock);
1101 for (vp = *vpp; vp; vp = vp->v_specnext) {
1102 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
1103 continue;
1104 /*
1105 * Alias, but not in use, so flush it out.
1106 */
1107 simple_lock(&vp->v_interlock);
1108 simple_unlock(&spechash_slock);
1109 if (vp->v_usecount == 0) {
1110 vgonel(vp, l);
1111 goto loop;
1112 }
1113 /*
1114 * What we're interested to know here is if someone else has
1115 * removed this vnode from the device hash list while we were
1116 * waiting. This can only happen if vclean() did it, and
1117 * this requires the vnode to be locked.
1118 */
1119 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
1120 goto loop;
1121 if (vp->v_specinfo == NULL) {
1122 vput(vp);
1123 goto loop;
1124 }
1125 simple_lock(&spechash_slock);
1126 break;
1127 }
1128 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
1129 MALLOC(nvp->v_specinfo, struct specinfo *,
1130 sizeof(struct specinfo), M_VNODE, M_NOWAIT);
1131 /* XXX Erg. */
1132 if (nvp->v_specinfo == NULL) {
1133 simple_unlock(&spechash_slock);
1134 uvm_wait("checkalias");
1135 goto loop;
1136 }
1137
1138 nvp->v_rdev = nvp_rdev;
1139 nvp->v_hashchain = vpp;
1140 nvp->v_specnext = *vpp;
1141 nvp->v_specmountpoint = NULL;
1142 simple_unlock(&spechash_slock);
1143 nvp->v_speclockf = NULL;
1144 simple_lock_init(&nvp->v_spec_cow_slock);
1145 SLIST_INIT(&nvp->v_spec_cow_head);
1146 nvp->v_spec_cow_req = 0;
1147 nvp->v_spec_cow_count = 0;
1148
1149 *vpp = nvp;
1150 if (vp != NULLVP) {
1151 nvp->v_flag |= VALIASED;
1152 vp->v_flag |= VALIASED;
1153 vput(vp);
1154 }
1155 return (NULLVP);
1156 }
1157 simple_unlock(&spechash_slock);
1158 VOP_UNLOCK(vp, 0);
1159 simple_lock(&vp->v_interlock);
1160 vclean(vp, 0, l);
1161 vp->v_op = nvp->v_op;
1162 vp->v_tag = nvp->v_tag;
1163 vp->v_vnlock = &vp->v_lock;
1164 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1165 nvp->v_type = VNON;
1166 insmntque(vp, mp);
1167 return (vp);
1168 }
1169
1170 /*
1171 * Grab a particular vnode from the free list, increment its
1172 * reference count and lock it. If the vnode lock bit is set the
1173 * vnode is being eliminated in vgone. In that case, we can not
1174 * grab the vnode, so the process is awakened when the transition is
1175 * completed, and an error returned to indicate that the vnode is no
1176 * longer usable (possibly having been changed to a new file system type).
1177 */
1178 int
1179 vget(struct vnode *vp, int flags)
1180 {
1181 int error;
1182
1183 /*
1184 * If the vnode is in the process of being cleaned out for
1185 * another use, we wait for the cleaning to finish and then
1186 * return failure. Cleaning is determined by checking that
1187 * the VXLOCK flag is set.
1188 */
1189
1190 if ((flags & LK_INTERLOCK) == 0)
1191 simple_lock(&vp->v_interlock);
1192 if ((vp->v_flag & (VXLOCK | VFREEING)) != 0) {
1193 if (flags & LK_NOWAIT) {
1194 simple_unlock(&vp->v_interlock);
1195 return EBUSY;
1196 }
1197 vp->v_flag |= VXWANT;
1198 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
1199 return (ENOENT);
1200 }
1201 if (vp->v_usecount == 0) {
1202 simple_lock(&vnode_free_list_slock);
1203 if (vp->v_holdcnt > 0)
1204 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1205 else
1206 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1207 simple_unlock(&vnode_free_list_slock);
1208 }
1209 vp->v_usecount++;
1210 #ifdef DIAGNOSTIC
1211 if (vp->v_usecount == 0) {
1212 vprint("vget", vp);
1213 panic("vget: usecount overflow, vp %p", vp);
1214 }
1215 #endif
1216 if (flags & LK_TYPE_MASK) {
1217 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
1218 vrele(vp);
1219 }
1220 return (error);
1221 }
1222 simple_unlock(&vp->v_interlock);
1223 return (0);
1224 }
1225
1226 /*
1227 * vput(), just unlock and vrele()
1228 */
1229 void
1230 vput(struct vnode *vp)
1231 {
1232 struct lwp *l = curlwp; /* XXX */
1233
1234 #ifdef DIAGNOSTIC
1235 if (vp == NULL)
1236 panic("vput: null vp");
1237 #endif
1238 simple_lock(&vp->v_interlock);
1239 vp->v_usecount--;
1240 if (vp->v_usecount > 0) {
1241 simple_unlock(&vp->v_interlock);
1242 VOP_UNLOCK(vp, 0);
1243 return;
1244 }
1245 #ifdef DIAGNOSTIC
1246 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1247 vprint("vput: bad ref count", vp);
1248 panic("vput: ref cnt");
1249 }
1250 #endif
1251 /*
1252 * Insert at tail of LRU list.
1253 */
1254 simple_lock(&vnode_free_list_slock);
1255 if (vp->v_holdcnt > 0)
1256 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1257 else
1258 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1259 simple_unlock(&vnode_free_list_slock);
1260 if (vp->v_flag & VEXECMAP) {
1261 uvmexp.execpages -= vp->v_uobj.uo_npages;
1262 uvmexp.filepages += vp->v_uobj.uo_npages;
1263 }
1264 vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP|VMAPPED);
1265 simple_unlock(&vp->v_interlock);
1266 VOP_INACTIVE(vp, l);
1267 }
1268
1269 /*
1270 * Vnode release.
1271 * If count drops to zero, call inactive routine and return to freelist.
1272 */
1273 void
1274 vrele(struct vnode *vp)
1275 {
1276 struct lwp *l = curlwp; /* XXX */
1277
1278 #ifdef DIAGNOSTIC
1279 if (vp == NULL)
1280 panic("vrele: null vp");
1281 #endif
1282 simple_lock(&vp->v_interlock);
1283 vp->v_usecount--;
1284 if (vp->v_usecount > 0) {
1285 simple_unlock(&vp->v_interlock);
1286 return;
1287 }
1288 #ifdef DIAGNOSTIC
1289 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1290 vprint("vrele: bad ref count", vp);
1291 panic("vrele: ref cnt vp %p", vp);
1292 }
1293 #endif
1294 /*
1295 * Insert at tail of LRU list.
1296 */
1297 simple_lock(&vnode_free_list_slock);
1298 if (vp->v_holdcnt > 0)
1299 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1300 else
1301 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1302 simple_unlock(&vnode_free_list_slock);
1303 if (vp->v_flag & VEXECMAP) {
1304 uvmexp.execpages -= vp->v_uobj.uo_npages;
1305 uvmexp.filepages += vp->v_uobj.uo_npages;
1306 }
1307 vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP|VMAPPED);
1308 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
1309 VOP_INACTIVE(vp, l);
1310 }
1311
1312 /*
1313 * Page or buffer structure gets a reference.
1314 * Called with v_interlock held.
1315 */
1316 void
1317 vholdl(struct vnode *vp)
1318 {
1319
1320 /*
1321 * If it is on the freelist and the hold count is currently
1322 * zero, move it to the hold list. The test of the back
1323 * pointer and the use reference count of zero is because
1324 * it will be removed from a free list by getnewvnode,
1325 * but will not have its reference count incremented until
1326 * after calling vgone. If the reference count were
1327 * incremented first, vgone would (incorrectly) try to
1328 * close the previous instance of the underlying object.
1329 * So, the back pointer is explicitly set to `0xdeadb' in
1330 * getnewvnode after removing it from a freelist to ensure
1331 * that we do not try to move it here.
1332 */
1333 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1334 vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1335 simple_lock(&vnode_free_list_slock);
1336 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1337 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1338 simple_unlock(&vnode_free_list_slock);
1339 }
1340 vp->v_holdcnt++;
1341 }
1342
1343 /*
1344 * Page or buffer structure frees a reference.
1345 * Called with v_interlock held.
1346 */
1347 void
1348 holdrelel(struct vnode *vp)
1349 {
1350
1351 if (vp->v_holdcnt <= 0)
1352 panic("holdrelel: holdcnt vp %p", vp);
1353 vp->v_holdcnt--;
1354
1355 /*
1356 * If it is on the holdlist and the hold count drops to
1357 * zero, move it to the free list. The test of the back
1358 * pointer and the use reference count of zero is because
1359 * it will be removed from a free list by getnewvnode,
1360 * but will not have its reference count incremented until
1361 * after calling vgone. If the reference count were
1362 * incremented first, vgone would (incorrectly) try to
1363 * close the previous instance of the underlying object.
1364 * So, the back pointer is explicitly set to `0xdeadb' in
1365 * getnewvnode after removing it from a freelist to ensure
1366 * that we do not try to move it here.
1367 */
1368
1369 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1370 vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1371 simple_lock(&vnode_free_list_slock);
1372 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
1373 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1374 simple_unlock(&vnode_free_list_slock);
1375 }
1376 }
1377
1378 /*
1379 * Vnode reference.
1380 */
1381 void
1382 vref(struct vnode *vp)
1383 {
1384
1385 simple_lock(&vp->v_interlock);
1386 if (vp->v_usecount <= 0)
1387 panic("vref used where vget required, vp %p", vp);
1388 vp->v_usecount++;
1389 #ifdef DIAGNOSTIC
1390 if (vp->v_usecount == 0) {
1391 vprint("vref", vp);
1392 panic("vref: usecount overflow, vp %p", vp);
1393 }
1394 #endif
1395 simple_unlock(&vp->v_interlock);
1396 }
1397
1398 /*
1399 * Remove any vnodes in the vnode table belonging to mount point mp.
1400 *
1401 * If FORCECLOSE is not specified, there should not be any active ones,
1402 * return error if any are found (nb: this is a user error, not a
1403 * system error). If FORCECLOSE is specified, detach any active vnodes
1404 * that are found.
1405 *
1406 * If WRITECLOSE is set, only flush out regular file vnodes open for
1407 * writing.
1408 *
1409 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1410 */
1411 #ifdef DEBUG
1412 int busyprt = 0; /* print out busy vnodes */
1413 struct ctldebug debug1 = { "busyprt", &busyprt };
1414 #endif
1415
1416 int
1417 vflush(struct mount *mp, struct vnode *skipvp, int flags)
1418 {
1419 struct lwp *l = curlwp; /* XXX */
1420 struct vnode *vp, *nvp;
1421 int busy = 0;
1422
1423 simple_lock(&mntvnode_slock);
1424 loop:
1425 /*
1426 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1427 * and vclean() are called
1428 */
1429 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1430 if (vp->v_mount != mp)
1431 goto loop;
1432 nvp = TAILQ_NEXT(vp, v_mntvnodes);
1433 /*
1434 * Skip over a selected vnode.
1435 */
1436 if (vp == skipvp)
1437 continue;
1438 simple_lock(&vp->v_interlock);
1439 /*
1440 * Skip over a vnodes marked VSYSTEM.
1441 */
1442 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1443 simple_unlock(&vp->v_interlock);
1444 continue;
1445 }
1446 /*
1447 * If WRITECLOSE is set, only flush out regular file
1448 * vnodes open for writing.
1449 */
1450 if ((flags & WRITECLOSE) &&
1451 (vp->v_writecount == 0 || vp->v_type != VREG)) {
1452 simple_unlock(&vp->v_interlock);
1453 continue;
1454 }
1455 /*
1456 * With v_usecount == 0, all we need to do is clear
1457 * out the vnode data structures and we are done.
1458 */
1459 if (vp->v_usecount == 0) {
1460 simple_unlock(&mntvnode_slock);
1461 vgonel(vp, l);
1462 simple_lock(&mntvnode_slock);
1463 continue;
1464 }
1465 /*
1466 * If FORCECLOSE is set, forcibly close the vnode.
1467 * For block or character devices, revert to an
1468 * anonymous device. For all other files, just kill them.
1469 */
1470 if (flags & FORCECLOSE) {
1471 simple_unlock(&mntvnode_slock);
1472 if (vp->v_type != VBLK && vp->v_type != VCHR) {
1473 vgonel(vp, l);
1474 } else {
1475 vclean(vp, 0, l);
1476 vp->v_op = spec_vnodeop_p;
1477 insmntque(vp, (struct mount *)0);
1478 }
1479 simple_lock(&mntvnode_slock);
1480 continue;
1481 }
1482 #ifdef DEBUG
1483 if (busyprt)
1484 vprint("vflush: busy vnode", vp);
1485 #endif
1486 simple_unlock(&vp->v_interlock);
1487 busy++;
1488 }
1489 simple_unlock(&mntvnode_slock);
1490 if (busy)
1491 return (EBUSY);
1492 return (0);
1493 }
1494
1495 /*
1496 * Disassociate the underlying file system from a vnode.
1497 */
1498 static void
1499 vclean(struct vnode *vp, int flags, struct lwp *l)
1500 {
1501 struct mount *mp;
1502 int active;
1503
1504 LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1505
1506 /*
1507 * Check to see if the vnode is in use.
1508 * If so we have to reference it before we clean it out
1509 * so that its count cannot fall to zero and generate a
1510 * race against ourselves to recycle it.
1511 */
1512
1513 if ((active = vp->v_usecount) != 0) {
1514 vp->v_usecount++;
1515 #ifdef DIAGNOSTIC
1516 if (vp->v_usecount == 0) {
1517 vprint("vclean", vp);
1518 panic("vclean: usecount overflow");
1519 }
1520 #endif
1521 }
1522
1523 /*
1524 * Prevent the vnode from being recycled or
1525 * brought into use while we clean it out.
1526 */
1527 if (vp->v_flag & VXLOCK)
1528 panic("vclean: deadlock, vp %p", vp);
1529 vp->v_flag |= VXLOCK;
1530 if (vp->v_flag & VEXECMAP) {
1531 uvmexp.execpages -= vp->v_uobj.uo_npages;
1532 uvmexp.filepages += vp->v_uobj.uo_npages;
1533 }
1534 vp->v_flag &= ~(VTEXT|VEXECMAP);
1535
1536 /*
1537 * Even if the count is zero, the VOP_INACTIVE routine may still
1538 * have the object locked while it cleans it out. For
1539 * active vnodes, it ensures that no other activity can
1540 * occur while the underlying object is being cleaned out.
1541 *
1542 * We drain the lock to make sure we are the last one trying to
1543 * get it and immediately resurrect the lock. Future accesses
1544 * for locking this _vnode_ will be protected by VXLOCK. However,
1545 * upper layers might be using the _lock_ in case the file system
1546 * exported it and might access it while the vnode lingers in
1547 * deadfs.
1548 */
1549 VOP_LOCK(vp, LK_DRAIN | LK_RESURRECT | LK_INTERLOCK);
1550
1551 /*
1552 * Clean out any cached data associated with the vnode.
1553 * If special device, remove it from special device alias list.
1554 * if it is on one.
1555 */
1556 if (flags & DOCLOSE) {
1557 int error;
1558 struct vnode *vq, *vx;
1559
1560 vn_start_write(vp, &mp, V_WAIT | V_LOWER);
1561 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1562 vn_finished_write(mp, V_LOWER);
1563 if (error)
1564 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1565 KASSERT(error == 0);
1566 KASSERT((vp->v_flag & VONWORKLST) == 0);
1567
1568 if (active)
1569 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1570
1571 if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1572 vp->v_specinfo != 0) {
1573 simple_lock(&spechash_slock);
1574 if (vp->v_hashchain != NULL) {
1575 if (*vp->v_hashchain == vp) {
1576 *vp->v_hashchain = vp->v_specnext;
1577 } else {
1578 for (vq = *vp->v_hashchain; vq;
1579 vq = vq->v_specnext) {
1580 if (vq->v_specnext != vp)
1581 continue;
1582 vq->v_specnext = vp->v_specnext;
1583 break;
1584 }
1585 if (vq == NULL)
1586 panic("missing bdev");
1587 }
1588 if (vp->v_flag & VALIASED) {
1589 vx = NULL;
1590 for (vq = *vp->v_hashchain; vq;
1591 vq = vq->v_specnext) {
1592 if (vq->v_rdev != vp->v_rdev ||
1593 vq->v_type != vp->v_type)
1594 continue;
1595 if (vx)
1596 break;
1597 vx = vq;
1598 }
1599 if (vx == NULL)
1600 panic("missing alias");
1601 if (vq == NULL)
1602 vx->v_flag &= ~VALIASED;
1603 vp->v_flag &= ~VALIASED;
1604 }
1605 }
1606 simple_unlock(&spechash_slock);
1607 FREE(vp->v_specinfo, M_VNODE);
1608 vp->v_specinfo = NULL;
1609 }
1610 }
1611 LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
1612
1613 /*
1614 * If purging an active vnode, it must be closed and
1615 * deactivated before being reclaimed. Note that the
1616 * VOP_INACTIVE will unlock the vnode.
1617 */
1618 if (active) {
1619 VOP_INACTIVE(vp, l);
1620 } else {
1621 /*
1622 * Any other processes trying to obtain this lock must first
1623 * wait for VXLOCK to clear, then call the new lock operation.
1624 */
1625 VOP_UNLOCK(vp, 0);
1626 }
1627 /*
1628 * Reclaim the vnode.
1629 */
1630 if (VOP_RECLAIM(vp, l))
1631 panic("vclean: cannot reclaim, vp %p", vp);
1632 if (active) {
1633 /*
1634 * Inline copy of vrele() since VOP_INACTIVE
1635 * has already been called.
1636 */
1637 simple_lock(&vp->v_interlock);
1638 if (--vp->v_usecount <= 0) {
1639 #ifdef DIAGNOSTIC
1640 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1641 vprint("vclean: bad ref count", vp);
1642 panic("vclean: ref cnt");
1643 }
1644 #endif
1645 /*
1646 * Insert at tail of LRU list.
1647 */
1648
1649 simple_unlock(&vp->v_interlock);
1650 simple_lock(&vnode_free_list_slock);
1651 #ifdef DIAGNOSTIC
1652 if (vp->v_holdcnt > 0)
1653 panic("vclean: not clean, vp %p", vp);
1654 #endif
1655 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1656 simple_unlock(&vnode_free_list_slock);
1657 } else
1658 simple_unlock(&vp->v_interlock);
1659 }
1660
1661 KASSERT(vp->v_uobj.uo_npages == 0);
1662 if (vp->v_type == VREG && vp->v_ractx != NULL) {
1663 uvm_ra_freectx(vp->v_ractx);
1664 vp->v_ractx = NULL;
1665 }
1666 cache_purge(vp);
1667
1668 /*
1669 * Done with purge, notify sleepers of the grim news.
1670 */
1671 vp->v_op = dead_vnodeop_p;
1672 vp->v_tag = VT_NON;
1673 vp->v_vnlock = NULL;
1674 simple_lock(&vp->v_interlock);
1675 VN_KNOTE(vp, NOTE_REVOKE); /* FreeBSD has this in vn_pollgone() */
1676 vp->v_flag &= ~(VXLOCK|VLOCKSWORK);
1677 if (vp->v_flag & VXWANT) {
1678 vp->v_flag &= ~VXWANT;
1679 simple_unlock(&vp->v_interlock);
1680 wakeup((caddr_t)vp);
1681 } else
1682 simple_unlock(&vp->v_interlock);
1683 }
1684
1685 /*
1686 * Recycle an unused vnode to the front of the free list.
1687 * Release the passed interlock if the vnode will be recycled.
1688 */
1689 int
1690 vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct lwp *l)
1691 {
1692
1693 simple_lock(&vp->v_interlock);
1694 if (vp->v_usecount == 0) {
1695 if (inter_lkp)
1696 simple_unlock(inter_lkp);
1697 vgonel(vp, l);
1698 return (1);
1699 }
1700 simple_unlock(&vp->v_interlock);
1701 return (0);
1702 }
1703
1704 /*
1705 * Eliminate all activity associated with a vnode
1706 * in preparation for reuse.
1707 */
1708 void
1709 vgone(struct vnode *vp)
1710 {
1711 struct lwp *l = curlwp; /* XXX */
1712
1713 simple_lock(&vp->v_interlock);
1714 vgonel(vp, l);
1715 }
1716
1717 /*
1718 * vgone, with the vp interlock held.
1719 */
1720 void
1721 vgonel(struct vnode *vp, struct lwp *l)
1722 {
1723
1724 LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
1725
1726 /*
1727 * If a vgone (or vclean) is already in progress,
1728 * wait until it is done and return.
1729 */
1730
1731 if (vp->v_flag & VXLOCK) {
1732 vp->v_flag |= VXWANT;
1733 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
1734 return;
1735 }
1736
1737 /*
1738 * Clean out the filesystem specific data.
1739 */
1740
1741 vclean(vp, DOCLOSE, l);
1742 KASSERT((vp->v_flag & VONWORKLST) == 0);
1743
1744 /*
1745 * Delete from old mount point vnode list, if on one.
1746 */
1747
1748 if (vp->v_mount != NULL)
1749 insmntque(vp, (struct mount *)0);
1750
1751 /*
1752 * The test of the back pointer and the reference count of
1753 * zero is because it will be removed from the free list by
1754 * getcleanvnode, but will not have its reference count
1755 * incremented until after calling vgone. If the reference
1756 * count were incremented first, vgone would (incorrectly)
1757 * try to close the previous instance of the underlying object.
1758 * So, the back pointer is explicitly set to `0xdeadb' in
1759 * getnewvnode after removing it from the freelist to ensure
1760 * that we do not try to move it here.
1761 */
1762
1763 vp->v_type = VBAD;
1764 if (vp->v_usecount == 0) {
1765 boolean_t dofree;
1766
1767 simple_lock(&vnode_free_list_slock);
1768 if (vp->v_holdcnt > 0)
1769 panic("vgonel: not clean, vp %p", vp);
1770 /*
1771 * if it isn't on the freelist, we're called by getcleanvnode
1772 * and vnode is being re-used. otherwise, we'll free it.
1773 */
1774 dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb;
1775 if (dofree) {
1776 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1777 numvnodes--;
1778 }
1779 simple_unlock(&vnode_free_list_slock);
1780 if (dofree)
1781 pool_put(&vnode_pool, vp);
1782 }
1783 }
1784
1785 /*
1786 * Lookup a vnode by device number.
1787 */
1788 int
1789 vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
1790 {
1791 struct vnode *vp;
1792 int rc = 0;
1793
1794 simple_lock(&spechash_slock);
1795 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1796 if (dev != vp->v_rdev || type != vp->v_type)
1797 continue;
1798 *vpp = vp;
1799 rc = 1;
1800 break;
1801 }
1802 simple_unlock(&spechash_slock);
1803 return (rc);
1804 }
1805
1806 /*
1807 * Revoke all the vnodes corresponding to the specified minor number
1808 * range (endpoints inclusive) of the specified major.
1809 */
1810 void
1811 vdevgone(int maj, int minl, int minh, enum vtype type)
1812 {
1813 struct vnode *vp;
1814 int mn;
1815
1816 vp = NULL; /* XXX gcc */
1817
1818 for (mn = minl; mn <= minh; mn++)
1819 if (vfinddev(makedev(maj, mn), type, &vp))
1820 VOP_REVOKE(vp, REVOKEALL);
1821 }
1822
1823 /*
1824 * Calculate the total number of references to a special device.
1825 */
1826 int
1827 vcount(struct vnode *vp)
1828 {
1829 struct vnode *vq, *vnext;
1830 int count;
1831
1832 loop:
1833 if ((vp->v_flag & VALIASED) == 0)
1834 return (vp->v_usecount);
1835 simple_lock(&spechash_slock);
1836 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1837 vnext = vq->v_specnext;
1838 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1839 continue;
1840 /*
1841 * Alias, but not in use, so flush it out.
1842 */
1843 if (vq->v_usecount == 0 && vq != vp &&
1844 (vq->v_flag & VXLOCK) == 0) {
1845 simple_unlock(&spechash_slock);
1846 vgone(vq);
1847 goto loop;
1848 }
1849 count += vq->v_usecount;
1850 }
1851 simple_unlock(&spechash_slock);
1852 return (count);
1853 }
1854
1855 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
1856 #define ARRAY_PRINT(idx, arr) \
1857 ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
1858
1859 const char * const vnode_tags[] = { VNODE_TAGS };
1860 const char * const vnode_types[] = { VNODE_TYPES };
1861 const char vnode_flagbits[] = VNODE_FLAGBITS;
1862
1863 /*
1864 * Print out a description of a vnode.
1865 */
1866 void
1867 vprint(const char *label, struct vnode *vp)
1868 {
1869 char bf[96];
1870
1871 if (label != NULL)
1872 printf("%s: ", label);
1873 printf("tag %s(%d) type %s(%d), usecount %d, writecount %ld, "
1874 "refcount %ld,", ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
1875 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
1876 vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
1877 bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf));
1878 if (bf[0] != '\0')
1879 printf(" flags (%s)", &bf[1]);
1880 if (vp->v_data == NULL) {
1881 printf("\n");
1882 } else {
1883 printf("\n\t");
1884 VOP_PRINT(vp);
1885 }
1886 }
1887
1888 #ifdef DEBUG
1889 /*
1890 * List all of the locked vnodes in the system.
1891 * Called when debugging the kernel.
1892 */
1893 void
1894 printlockedvnodes(void)
1895 {
1896 struct mount *mp, *nmp;
1897 struct vnode *vp;
1898
1899 printf("Locked vnodes\n");
1900 simple_lock(&mountlist_slock);
1901 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1902 mp = nmp) {
1903 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1904 nmp = CIRCLEQ_NEXT(mp, mnt_list);
1905 continue;
1906 }
1907 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1908 if (VOP_ISLOCKED(vp))
1909 vprint(NULL, vp);
1910 }
1911 simple_lock(&mountlist_slock);
1912 nmp = CIRCLEQ_NEXT(mp, mnt_list);
1913 vfs_unbusy(mp);
1914 }
1915 simple_unlock(&mountlist_slock);
1916 }
1917 #endif
1918
1919 /*
1920 * sysctl helper routine to return list of supported fstypes
1921 */
1922 static int
1923 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
1924 {
1925 char bf[MFSNAMELEN];
1926 char *where = oldp;
1927 struct vfsops *v;
1928 size_t needed, left, slen;
1929 int error, first;
1930
1931 if (newp != NULL)
1932 return (EPERM);
1933 if (namelen != 0)
1934 return (EINVAL);
1935
1936 first = 1;
1937 error = 0;
1938 needed = 0;
1939 left = *oldlenp;
1940
1941 LIST_FOREACH(v, &vfs_list, vfs_list) {
1942 if (where == NULL)
1943 needed += strlen(v->vfs_name) + 1;
1944 else {
1945 memset(bf, 0, sizeof(bf));
1946 if (first) {
1947 strncpy(bf, v->vfs_name, sizeof(bf));
1948 first = 0;
1949 } else {
1950 bf[0] = ' ';
1951 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
1952 }
1953 bf[sizeof(bf)-1] = '\0';
1954 slen = strlen(bf);
1955 if (left < slen + 1)
1956 break;
1957 /* +1 to copy out the trailing NUL byte */
1958 error = copyout(bf, where, slen + 1);
1959 if (error)
1960 break;
1961 where += slen;
1962 needed += slen;
1963 left -= slen;
1964 }
1965 }
1966 *oldlenp = needed;
1967 return (error);
1968 }
1969
1970 /*
1971 * Top level filesystem related information gathering.
1972 */
1973 SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
1974 {
1975 sysctl_createv(clog, 0, NULL, NULL,
1976 CTLFLAG_PERMANENT,
1977 CTLTYPE_NODE, "vfs", NULL,
1978 NULL, 0, NULL, 0,
1979 CTL_VFS, CTL_EOL);
1980 sysctl_createv(clog, 0, NULL, NULL,
1981 CTLFLAG_PERMANENT,
1982 CTLTYPE_NODE, "generic",
1983 SYSCTL_DESCR("Non-specific vfs related information"),
1984 NULL, 0, NULL, 0,
1985 CTL_VFS, VFS_GENERIC, CTL_EOL);
1986 sysctl_createv(clog, 0, NULL, NULL,
1987 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1988 CTLTYPE_INT, "usermount",
1989 SYSCTL_DESCR("Whether unprivileged users may mount "
1990 "filesystems"),
1991 NULL, 0, &dovfsusermount, 0,
1992 CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
1993 sysctl_createv(clog, 0, NULL, NULL,
1994 CTLFLAG_PERMANENT,
1995 CTLTYPE_STRING, "fstypes",
1996 SYSCTL_DESCR("List of file systems present"),
1997 sysctl_vfs_generic_fstypes, 0, NULL, 0,
1998 CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
1999 sysctl_createv(clog, 0, NULL, NULL,
2000 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2001 CTLTYPE_INT, "magiclinks",
2002 SYSCTL_DESCR("Whether \"magic\" symlinks are expanded"),
2003 NULL, 0, &vfs_magiclinks, 0,
2004 CTL_VFS, VFS_GENERIC, VFS_MAGICLINKS, CTL_EOL);
2005 }
2006
2007
2008 int kinfo_vdebug = 1;
2009 int kinfo_vgetfailed;
2010 #define KINFO_VNODESLOP 10
2011 /*
2012 * Dump vnode list (via sysctl).
2013 * Copyout address of vnode followed by vnode.
2014 */
2015 /* ARGSUSED */
2016 int
2017 sysctl_kern_vnode(SYSCTLFN_ARGS)
2018 {
2019 char *where = oldp;
2020 size_t *sizep = oldlenp;
2021 struct mount *mp, *nmp;
2022 struct vnode *vp;
2023 char *bp = where, *savebp;
2024 char *ewhere;
2025 int error;
2026
2027 if (namelen != 0)
2028 return (EOPNOTSUPP);
2029 if (newp != NULL)
2030 return (EPERM);
2031
2032 #define VPTRSZ sizeof(struct vnode *)
2033 #define VNODESZ sizeof(struct vnode)
2034 if (where == NULL) {
2035 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2036 return (0);
2037 }
2038 ewhere = where + *sizep;
2039
2040 simple_lock(&mountlist_slock);
2041 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2042 mp = nmp) {
2043 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
2044 nmp = CIRCLEQ_NEXT(mp, mnt_list);
2045 continue;
2046 }
2047 savebp = bp;
2048 again:
2049 simple_lock(&mntvnode_slock);
2050 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
2051 /*
2052 * Check that the vp is still associated with
2053 * this filesystem. RACE: could have been
2054 * recycled onto the same filesystem.
2055 */
2056 if (vp->v_mount != mp) {
2057 simple_unlock(&mntvnode_slock);
2058 if (kinfo_vdebug)
2059 printf("kinfo: vp changed\n");
2060 bp = savebp;
2061 goto again;
2062 }
2063 if (bp + VPTRSZ + VNODESZ > ewhere) {
2064 simple_unlock(&mntvnode_slock);
2065 *sizep = bp - where;
2066 return (ENOMEM);
2067 }
2068 simple_unlock(&mntvnode_slock);
2069 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2070 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2071 return (error);
2072 bp += VPTRSZ + VNODESZ;
2073 simple_lock(&mntvnode_slock);
2074 }
2075 simple_unlock(&mntvnode_slock);
2076 simple_lock(&mountlist_slock);
2077 nmp = CIRCLEQ_NEXT(mp, mnt_list);
2078 vfs_unbusy(mp);
2079 }
2080 simple_unlock(&mountlist_slock);
2081
2082 *sizep = bp - where;
2083 return (0);
2084 }
2085
2086 /*
2087 * Check to see if a filesystem is mounted on a block device.
2088 */
2089 int
2090 vfs_mountedon(struct vnode *vp)
2091 {
2092 struct vnode *vq;
2093 int error = 0;
2094
2095 if (vp->v_type != VBLK)
2096 return ENOTBLK;
2097 if (vp->v_specmountpoint != NULL)
2098 return (EBUSY);
2099 if (vp->v_flag & VALIASED) {
2100 simple_lock(&spechash_slock);
2101 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2102 if (vq->v_rdev != vp->v_rdev ||
2103 vq->v_type != vp->v_type)
2104 continue;
2105 if (vq->v_specmountpoint != NULL) {
2106 error = EBUSY;
2107 break;
2108 }
2109 }
2110 simple_unlock(&spechash_slock);
2111 }
2112 return (error);
2113 }
2114
2115 /*
2116 * Do the usual access checking.
2117 * file_mode, uid and gid are from the vnode in question,
2118 * while acc_mode and cred are from the VOP_ACCESS parameter list
2119 */
2120 int
2121 vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
2122 mode_t acc_mode, kauth_cred_t cred)
2123 {
2124 mode_t mask;
2125 int error, ismember;
2126
2127 /*
2128 * Super-user always gets read/write access, but execute access depends
2129 * on at least one execute bit being set.
2130 */
2131 if (kauth_cred_geteuid(cred) == 0) {
2132 if ((acc_mode & VEXEC) && type != VDIR &&
2133 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
2134 return (EACCES);
2135 return (0);
2136 }
2137
2138 mask = 0;
2139
2140 /* Otherwise, check the owner. */
2141 if (kauth_cred_geteuid(cred) == uid) {
2142 if (acc_mode & VEXEC)
2143 mask |= S_IXUSR;
2144 if (acc_mode & VREAD)
2145 mask |= S_IRUSR;
2146 if (acc_mode & VWRITE)
2147 mask |= S_IWUSR;
2148 return ((file_mode & mask) == mask ? 0 : EACCES);
2149 }
2150
2151 /* Otherwise, check the groups. */
2152 error = kauth_cred_ismember_gid(cred, gid, &ismember);
2153 if (error)
2154 return (error);
2155 if (kauth_cred_getegid(cred) == gid || ismember) {
2156 if (acc_mode & VEXEC)
2157 mask |= S_IXGRP;
2158 if (acc_mode & VREAD)
2159 mask |= S_IRGRP;
2160 if (acc_mode & VWRITE)
2161 mask |= S_IWGRP;
2162 return ((file_mode & mask) == mask ? 0 : EACCES);
2163 }
2164
2165 /* Otherwise, check everyone else. */
2166 if (acc_mode & VEXEC)
2167 mask |= S_IXOTH;
2168 if (acc_mode & VREAD)
2169 mask |= S_IROTH;
2170 if (acc_mode & VWRITE)
2171 mask |= S_IWOTH;
2172 return ((file_mode & mask) == mask ? 0 : EACCES);
2173 }
2174
2175 /*
2176 * Unmount all file systems.
2177 * We traverse the list in reverse order under the assumption that doing so
2178 * will avoid needing to worry about dependencies.
2179 */
2180 void
2181 vfs_unmountall(struct lwp *l)
2182 {
2183 struct mount *mp, *nmp;
2184 int allerror, error;
2185
2186 printf("unmounting file systems...");
2187 for (allerror = 0,
2188 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2189 nmp = mp->mnt_list.cqe_prev;
2190 #ifdef DEBUG
2191 printf("\nunmounting %s (%s)...",
2192 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
2193 #endif
2194 /*
2195 * XXX Freeze syncer. Must do this before locking the
2196 * mount point. See dounmount() for details.
2197 */
2198 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
2199 if (vfs_busy(mp, 0, 0)) {
2200 lockmgr(&syncer_lock, LK_RELEASE, NULL);
2201 continue;
2202 }
2203 if ((error = dounmount(mp, MNT_FORCE, l)) != 0) {
2204 printf("unmount of %s failed with error %d\n",
2205 mp->mnt_stat.f_mntonname, error);
2206 allerror = 1;
2207 }
2208 }
2209 printf(" done\n");
2210 if (allerror)
2211 printf("WARNING: some file systems would not unmount\n");
2212 }
2213
2214 extern struct simplelock bqueue_slock; /* XXX */
2215
2216 /*
2217 * Sync and unmount file systems before shutting down.
2218 */
2219 void
2220 vfs_shutdown(void)
2221 {
2222 struct lwp *l;
2223
2224 /* XXX we're certainly not running in lwp0's context! */
2225 l = curlwp;
2226 if (l == NULL)
2227 l = &lwp0;
2228
2229 printf("syncing disks... ");
2230
2231 /* remove user process from run queue */
2232 suspendsched();
2233 (void) spl0();
2234
2235 /* avoid coming back this way again if we panic. */
2236 doing_shutdown = 1;
2237
2238 sys_sync(l, NULL, NULL);
2239
2240 /* Wait for sync to finish. */
2241 if (buf_syncwait() != 0) {
2242 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
2243 Debugger();
2244 #endif
2245 printf("giving up\n");
2246 return;
2247 } else
2248 printf("done\n");
2249
2250 /*
2251 * If we've panic'd, don't make the situation potentially
2252 * worse by unmounting the file systems.
2253 */
2254 if (panicstr != NULL)
2255 return;
2256
2257 /* Release inodes held by texts before update. */
2258 #ifdef notdef
2259 vnshutdown();
2260 #endif
2261 /* Unmount file systems. */
2262 vfs_unmountall(l);
2263 }
2264
2265 /*
2266 * Mount the root file system. If the operator didn't specify a
2267 * file system to use, try all possible file systems until one
2268 * succeeds.
2269 */
2270 int
2271 vfs_mountroot(void)
2272 {
2273 struct vfsops *v;
2274 int error = ENODEV;
2275
2276 if (root_device == NULL)
2277 panic("vfs_mountroot: root device unknown");
2278
2279 switch (device_class(root_device)) {
2280 case DV_IFNET:
2281 if (rootdev != NODEV)
2282 panic("vfs_mountroot: rootdev set for DV_IFNET "
2283 "(0x%08x -> %d,%d)", rootdev,
2284 major(rootdev), minor(rootdev));
2285 break;
2286
2287 case DV_DISK:
2288 if (rootdev == NODEV)
2289 panic("vfs_mountroot: rootdev not set for DV_DISK");
2290 if (bdevvp(rootdev, &rootvp))
2291 panic("vfs_mountroot: can't get vnode for rootdev");
2292 error = VOP_OPEN(rootvp, FREAD, FSCRED, curlwp);
2293 if (error) {
2294 printf("vfs_mountroot: can't open root device\n");
2295 return (error);
2296 }
2297 break;
2298
2299 default:
2300 printf("%s: inappropriate for root file system\n",
2301 root_device->dv_xname);
2302 return (ENODEV);
2303 }
2304
2305 /*
2306 * If user specified a file system, use it.
2307 */
2308 if (mountroot != NULL) {
2309 error = (*mountroot)();
2310 goto done;
2311 }
2312
2313 /*
2314 * Try each file system currently configured into the kernel.
2315 */
2316 LIST_FOREACH(v, &vfs_list, vfs_list) {
2317 if (v->vfs_mountroot == NULL)
2318 continue;
2319 #ifdef DEBUG
2320 aprint_normal("mountroot: trying %s...\n", v->vfs_name);
2321 #endif
2322 error = (*v->vfs_mountroot)();
2323 if (!error) {
2324 aprint_normal("root file system type: %s\n",
2325 v->vfs_name);
2326 break;
2327 }
2328 }
2329
2330 if (v == NULL) {
2331 printf("no file system for %s", root_device->dv_xname);
2332 if (device_class(root_device) == DV_DISK)
2333 printf(" (dev 0x%x)", rootdev);
2334 printf("\n");
2335 error = EFTYPE;
2336 }
2337
2338 done:
2339 if (error && device_class(root_device) == DV_DISK) {
2340 VOP_CLOSE(rootvp, FREAD, FSCRED, curlwp);
2341 vrele(rootvp);
2342 }
2343 return (error);
2344 }
2345
2346 /*
2347 * Given a file system name, look up the vfsops for that
2348 * file system, or return NULL if file system isn't present
2349 * in the kernel.
2350 */
2351 struct vfsops *
2352 vfs_getopsbyname(const char *name)
2353 {
2354 struct vfsops *v;
2355
2356 LIST_FOREACH(v, &vfs_list, vfs_list) {
2357 if (strcmp(v->vfs_name, name) == 0)
2358 break;
2359 }
2360
2361 return (v);
2362 }
2363
2364 /*
2365 * Establish a file system and initialize it.
2366 */
2367 int
2368 vfs_attach(struct vfsops *vfs)
2369 {
2370 struct vfsops *v;
2371 int error = 0;
2372
2373
2374 /*
2375 * Make sure this file system doesn't already exist.
2376 */
2377 LIST_FOREACH(v, &vfs_list, vfs_list) {
2378 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
2379 error = EEXIST;
2380 goto out;
2381 }
2382 }
2383
2384 /*
2385 * Initialize the vnode operations for this file system.
2386 */
2387 vfs_opv_init(vfs->vfs_opv_descs);
2388
2389 /*
2390 * Now initialize the file system itself.
2391 */
2392 (*vfs->vfs_init)();
2393
2394 /*
2395 * ...and link it into the kernel's list.
2396 */
2397 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
2398
2399 /*
2400 * Sanity: make sure the reference count is 0.
2401 */
2402 vfs->vfs_refcount = 0;
2403
2404 out:
2405 return (error);
2406 }
2407
2408 /*
2409 * Remove a file system from the kernel.
2410 */
2411 int
2412 vfs_detach(struct vfsops *vfs)
2413 {
2414 struct vfsops *v;
2415
2416 /*
2417 * Make sure no one is using the filesystem.
2418 */
2419 if (vfs->vfs_refcount != 0)
2420 return (EBUSY);
2421
2422 /*
2423 * ...and remove it from the kernel's list.
2424 */
2425 LIST_FOREACH(v, &vfs_list, vfs_list) {
2426 if (v == vfs) {
2427 LIST_REMOVE(v, vfs_list);
2428 break;
2429 }
2430 }
2431
2432 if (v == NULL)
2433 return (ESRCH);
2434
2435 /*
2436 * Now run the file system-specific cleanups.
2437 */
2438 (*vfs->vfs_done)();
2439
2440 /*
2441 * Free the vnode operations vector.
2442 */
2443 vfs_opv_free(vfs->vfs_opv_descs);
2444 return (0);
2445 }
2446
2447 void
2448 vfs_reinit(void)
2449 {
2450 struct vfsops *vfs;
2451
2452 LIST_FOREACH(vfs, &vfs_list, vfs_list) {
2453 if (vfs->vfs_reinit) {
2454 (*vfs->vfs_reinit)();
2455 }
2456 }
2457 }
2458
2459 /*
2460 * Request a filesystem to suspend write operations.
2461 */
2462 int
2463 vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo)
2464 {
2465 struct lwp *l = curlwp; /* XXX */
2466 int error;
2467
2468 while ((mp->mnt_iflag & IMNT_SUSPEND)) {
2469 if (slptimeo < 0)
2470 return EWOULDBLOCK;
2471 error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo);
2472 if (error)
2473 return error;
2474 }
2475 mp->mnt_iflag |= IMNT_SUSPEND;
2476
2477 simple_lock(&mp->mnt_slock);
2478 if (mp->mnt_writeopcountupper > 0)
2479 ltsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt",
2480 0, &mp->mnt_slock);
2481 simple_unlock(&mp->mnt_slock);
2482
2483 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l);
2484 if (error) {
2485 vfs_write_resume(mp);
2486 return error;
2487 }
2488 mp->mnt_iflag |= IMNT_SUSPENDLOW;
2489
2490 simple_lock(&mp->mnt_slock);
2491 if (mp->mnt_writeopcountlower > 0)
2492 ltsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt",
2493 0, &mp->mnt_slock);
2494 mp->mnt_iflag |= IMNT_SUSPENDED;
2495 simple_unlock(&mp->mnt_slock);
2496
2497 return 0;
2498 }
2499
2500 /*
2501 * Request a filesystem to resume write operations.
2502 */
2503 void
2504 vfs_write_resume(struct mount *mp)
2505 {
2506
2507 if ((mp->mnt_iflag & IMNT_SUSPEND) == 0)
2508 return;
2509 mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED);
2510 wakeup(&mp->mnt_flag);
2511 }
2512
2513 void
2514 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
2515 {
2516 const struct statvfs *mbp;
2517
2518 if (sbp == (mbp = &mp->mnt_stat))
2519 return;
2520
2521 (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
2522 sbp->f_fsid = mbp->f_fsid;
2523 sbp->f_owner = mbp->f_owner;
2524 sbp->f_flag = mbp->f_flag;
2525 sbp->f_syncwrites = mbp->f_syncwrites;
2526 sbp->f_asyncwrites = mbp->f_asyncwrites;
2527 sbp->f_syncreads = mbp->f_syncreads;
2528 sbp->f_asyncreads = mbp->f_asyncreads;
2529 (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
2530 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
2531 sizeof(sbp->f_fstypename));
2532 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
2533 sizeof(sbp->f_mntonname));
2534 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
2535 sizeof(sbp->f_mntfromname));
2536 sbp->f_namemax = mbp->f_namemax;
2537 }
2538
2539 int
2540 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
2541 struct mount *mp, struct lwp *l)
2542 {
2543 int error;
2544 size_t size;
2545 struct statvfs *sfs = &mp->mnt_stat;
2546 int (*fun)(const void *, void *, size_t, size_t *);
2547
2548 (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name,
2549 sizeof(mp->mnt_stat.f_fstypename));
2550
2551 if (onp) {
2552 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
2553 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
2554 if (cwdi->cwdi_rdir != NULL) {
2555 size_t len;
2556 char *bp;
2557 char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
2558
2559 if (!path) /* XXX can't happen with M_WAITOK */
2560 return ENOMEM;
2561
2562 bp = path + MAXPATHLEN;
2563 *--bp = '\0';
2564 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
2565 path, MAXPATHLEN / 2, 0, l);
2566 if (error) {
2567 free(path, M_TEMP);
2568 return error;
2569 }
2570
2571 len = strlen(bp);
2572 if (len > sizeof(sfs->f_mntonname) - 1)
2573 len = sizeof(sfs->f_mntonname) - 1;
2574 (void)strncpy(sfs->f_mntonname, bp, len);
2575 free(path, M_TEMP);
2576
2577 if (len < sizeof(sfs->f_mntonname) - 1) {
2578 error = (*fun)(onp, &sfs->f_mntonname[len],
2579 sizeof(sfs->f_mntonname) - len - 1, &size);
2580 if (error)
2581 return error;
2582 size += len;
2583 } else {
2584 size = len;
2585 }
2586 } else {
2587 error = (*fun)(onp, &sfs->f_mntonname,
2588 sizeof(sfs->f_mntonname) - 1, &size);
2589 if (error)
2590 return error;
2591 }
2592 (void)memset(sfs->f_mntonname + size, 0,
2593 sizeof(sfs->f_mntonname) - size);
2594 }
2595
2596 if (fromp) {
2597 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
2598 error = (*fun)(fromp, sfs->f_mntfromname,
2599 sizeof(sfs->f_mntfromname) - 1, &size);
2600 if (error)
2601 return error;
2602 (void)memset(sfs->f_mntfromname + size, 0,
2603 sizeof(sfs->f_mntfromname) - size);
2604 }
2605 return 0;
2606 }
2607
2608 void
2609 vfs_timestamp(struct timespec *ts)
2610 {
2611
2612 nanotime(ts);
2613 }
2614
2615 /*
2616 * mount_specific_key_create --
2617 * Create a key for subsystem mount-specific data.
2618 */
2619 int
2620 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
2621 {
2622
2623 return (specificdata_key_create(mount_specificdata_domain, keyp, dtor));
2624 }
2625
2626 /*
2627 * mount_specific_key_delete --
2628 * Delete a key for subsystem mount-specific data.
2629 */
2630 void
2631 mount_specific_key_delete(specificdata_key_t key)
2632 {
2633
2634 specificdata_key_delete(mount_specificdata_domain, key);
2635 }
2636
2637 /*
2638 * mount_initspecific --
2639 * Initialize a mount's specificdata container.
2640 */
2641 void
2642 mount_initspecific(struct mount *mp)
2643 {
2644 int error;
2645
2646 error = specificdata_init(mount_specificdata_domain,
2647 &mp->mnt_specdataref);
2648 KASSERT(error == 0);
2649 }
2650
2651 /*
2652 * mount_finispecific --
2653 * Finalize a mount's specificdata container.
2654 */
2655 void
2656 mount_finispecific(struct mount *mp)
2657 {
2658
2659 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
2660 }
2661
2662 /*
2663 * mount_getspecific --
2664 * Return mount-specific data corresponding to the specified key.
2665 */
2666 void *
2667 mount_getspecific(struct mount *mp, specificdata_key_t key)
2668 {
2669
2670 return (specificdata_getspecific(mount_specificdata_domain,
2671 &mp->mnt_specdataref, key));
2672 }
2673
2674 /*
2675 * mount_setspecific --
2676 * Set mount-specific data corresponding to the specified key.
2677 */
2678 void
2679 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
2680 {
2681
2682 specificdata_setspecific(mount_specificdata_domain,
2683 &mp->mnt_specdataref, key, data);
2684 }
2685
2686 #ifdef DDB
2687 static const char buf_flagbits[] = BUF_FLAGBITS;
2688
2689 void
2690 vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
2691 {
2692 char bf[1024];
2693
2694 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
2695 PRIx64 " dev 0x%x\n",
2696 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
2697
2698 bitmask_snprintf(bp->b_flags, buf_flagbits, bf, sizeof(bf));
2699 (*pr)(" error %d flags 0x%s\n", bp->b_error, bf);
2700
2701 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
2702 bp->b_bufsize, bp->b_bcount, bp->b_resid);
2703 (*pr)(" data %p saveaddr %p dep %p\n",
2704 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
2705 (*pr)(" iodone %p\n", bp->b_iodone);
2706 }
2707
2708
2709 void
2710 vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
2711 {
2712 char bf[256];
2713
2714 uvm_object_printit(&vp->v_uobj, full, pr);
2715 bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf));
2716 (*pr)("\nVNODE flags %s\n", bf);
2717 (*pr)("mp %p numoutput %d size 0x%llx\n",
2718 vp->v_mount, vp->v_numoutput, vp->v_size);
2719
2720 (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
2721 vp->v_data, vp->v_usecount, vp->v_writecount,
2722 vp->v_holdcnt, vp->v_numoutput);
2723
2724 (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
2725 ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
2726 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
2727 vp->v_mount, vp->v_mountedhere);
2728
2729 if (full) {
2730 struct buf *bp;
2731
2732 (*pr)("clean bufs:\n");
2733 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
2734 (*pr)(" bp %p\n", bp);
2735 vfs_buf_print(bp, full, pr);
2736 }
2737
2738 (*pr)("dirty bufs:\n");
2739 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
2740 (*pr)(" bp %p\n", bp);
2741 vfs_buf_print(bp, full, pr);
2742 }
2743 }
2744 }
2745
2746 void
2747 vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
2748 {
2749 char sbuf[256];
2750
2751 (*pr)("vnodecovered = %p syncer = %p data = %p\n",
2752 mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
2753
2754 (*pr)("fs_bshift %d dev_bshift = %d\n",
2755 mp->mnt_fs_bshift,mp->mnt_dev_bshift);
2756
2757 bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf));
2758 (*pr)("flag = %s\n", sbuf);
2759
2760 bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf));
2761 (*pr)("iflag = %s\n", sbuf);
2762
2763 /* XXX use lockmgr_printinfo */
2764 if (mp->mnt_lock.lk_sharecount)
2765 (*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg,
2766 mp->mnt_lock.lk_sharecount);
2767 else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) {
2768 (*pr)(" lock type %s: EXCL (count %d) by ",
2769 mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount);
2770 if (mp->mnt_lock.lk_flags & LK_SPIN)
2771 (*pr)("processor %lu", mp->mnt_lock.lk_cpu);
2772 else
2773 (*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder,
2774 mp->mnt_lock.lk_locklwp);
2775 } else
2776 (*pr)(" not locked");
2777 if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0)
2778 (*pr)(" with %d pending", mp->mnt_lock.lk_waitcount);
2779
2780 (*pr)("\n");
2781
2782 if (mp->mnt_unmounter) {
2783 (*pr)("unmounter pid = %d ",mp->mnt_unmounter->l_proc);
2784 }
2785 (*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n",
2786 mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower);
2787
2788 (*pr)("statvfs cache:\n");
2789 (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
2790 (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
2791 (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
2792
2793 (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks);
2794 (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree);
2795 (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail);
2796 (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd);
2797
2798 (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files);
2799 (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree);
2800 (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail);
2801 (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd);
2802
2803 (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
2804 mp->mnt_stat.f_fsidx.__fsid_val[0],
2805 mp->mnt_stat.f_fsidx.__fsid_val[1]);
2806
2807 (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
2808 (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
2809
2810 bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf,
2811 sizeof(sbuf));
2812 (*pr)("\tflag = %s\n",sbuf);
2813 (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites);
2814 (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
2815 (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads);
2816 (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads);
2817 (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
2818 (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
2819 (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
2820
2821 {
2822 int cnt = 0;
2823 struct vnode *vp;
2824 (*pr)("locked vnodes =");
2825 /* XXX would take mountlist lock, except ddb may not have context */
2826 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
2827 if (VOP_ISLOCKED(vp)) {
2828 if ((++cnt % 6) == 0) {
2829 (*pr)(" %p,\n\t", vp);
2830 } else {
2831 (*pr)(" %p,", vp);
2832 }
2833 }
2834 }
2835 (*pr)("\n");
2836 }
2837
2838 if (full) {
2839 int cnt = 0;
2840 struct vnode *vp;
2841 (*pr)("all vnodes =");
2842 /* XXX would take mountlist lock, except ddb may not have context */
2843 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
2844 if (!TAILQ_NEXT(vp, v_mntvnodes)) {
2845 (*pr)(" %p", vp);
2846 } else if ((++cnt % 6) == 0) {
2847 (*pr)(" %p,\n\t", vp);
2848 } else {
2849 (*pr)(" %p,", vp);
2850 }
2851 }
2852 (*pr)("\n", vp);
2853 }
2854 }
2855 #endif /* DDB */
Cache object: 02a675ad15602284f6b871a048abfb08
|