FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_cache.c
1 /*
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Poul-Henning Kamp of the FreeBSD Project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
37 * $FreeBSD: releng/5.1/sys/kern/vfs_cache.c 112430 2003-03-20 10:40:45Z phk $
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/sysctl.h>
46 #include <sys/mount.h>
47 #include <sys/vnode.h>
48 #include <sys/namei.h>
49 #include <sys/malloc.h>
50 #include <sys/syscallsubr.h>
51 #include <sys/sysproto.h>
52 #include <sys/proc.h>
53 #include <sys/filedesc.h>
54 #include <sys/fnv_hash.h>
55
56 /*
57 * This structure describes the elements in the cache of recent
58 * names looked up by namei.
59 */
60
61 struct namecache {
62 LIST_ENTRY(namecache) nc_hash; /* hash chain */
63 LIST_ENTRY(namecache) nc_src; /* source vnode list */
64 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
65 struct vnode *nc_dvp; /* vnode of parent of name */
66 struct vnode *nc_vp; /* vnode the name refers to */
67 u_char nc_flag; /* flag bits */
68 u_char nc_nlen; /* length of name */
69 char nc_name[0]; /* segment name */
70 };
71
72 /*
73 * Name caching works as follows:
74 *
75 * Names found by directory scans are retained in a cache
76 * for future reference. It is managed LRU, so frequently
77 * used names will hang around. Cache is indexed by hash value
78 * obtained from (vp, name) where vp refers to the directory
79 * containing name.
80 *
81 * If it is a "negative" entry, (i.e. for a name that is known NOT to
82 * exist) the vnode pointer will be NULL.
83 *
84 * Upon reaching the last segment of a path, if the reference
85 * is for DELETE, or NOCACHE is set (rewrite), and the
86 * name is located in the cache, it will be dropped.
87 */
88
89 /*
90 * Structures associated with name cacheing.
91 */
92 #define NCHHASH(hash) \
93 (&nchashtbl[(hash) & nchash])
94 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
95 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */
96 static u_long nchash; /* size of hash table */
97 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
98 static u_long ncnegfactor = 16; /* ratio of negative entries */
99 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
100 static u_long numneg; /* number of cache entries allocated */
101 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
102 static u_long numcache; /* number of cache entries allocated */
103 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
104 static u_long numcachehv; /* number of cache entries with vnodes held */
105 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
106 #if 0
107 static u_long numcachepl; /* number of cache purge for leaf entries */
108 SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
109 #endif
110 struct nchstats nchstats; /* cache effectiveness statistics */
111
112 static int doingcache = 1; /* 1 => enable the cache */
113 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
114
115 /* Export size information to userland */
116 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
117 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
118
119 /*
120 * The new name cache statistics
121 */
122 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
123 #define STATNODE(mode, name, var) \
124 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
125 STATNODE(CTLFLAG_RD, numneg, &numneg);
126 STATNODE(CTLFLAG_RD, numcache, &numcache);
127 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
128 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
129 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
130 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
131 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
132 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
133 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
134 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
135 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
136 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
137
138 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats,
139 sizeof(nchstats), "LU", "VFS cache effectiveness statistics");
140
141
142
143 static void cache_zap(struct namecache *ncp);
144
145 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
146
147 /*
148 * Flags in namecache.nc_flag
149 */
150 #define NCF_WHITE 1
151
152 /*
153 * Grab an atomic snapshot of the name cache hash chain lengths
154 */
155 SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats");
156
157 static int
158 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
159 {
160 int error;
161 struct nchashhead *ncpp;
162 struct namecache *ncp;
163 int n_nchash;
164 int count;
165
166 n_nchash = nchash + 1; /* nchash is max index, not count */
167 if (!req->oldptr)
168 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
169
170 /* Scan hash tables for applicable entries */
171 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
172 count = 0;
173 LIST_FOREACH(ncp, ncpp, nc_hash) {
174 count++;
175 }
176 error = SYSCTL_OUT(req, &count, sizeof(count));
177 if (error)
178 return (error);
179 }
180 return (0);
181 }
182 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD,
183 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths");
184
185 static int
186 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
187 {
188 int error;
189 struct nchashhead *ncpp;
190 struct namecache *ncp;
191 int n_nchash;
192 int count, maxlength, used, pct;
193
194 if (!req->oldptr)
195 return SYSCTL_OUT(req, 0, 4 * sizeof(int));
196
197 n_nchash = nchash + 1; /* nchash is max index, not count */
198 used = 0;
199 maxlength = 0;
200
201 /* Scan hash tables for applicable entries */
202 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
203 count = 0;
204 LIST_FOREACH(ncp, ncpp, nc_hash) {
205 count++;
206 }
207 if (count)
208 used++;
209 if (maxlength < count)
210 maxlength = count;
211 }
212 n_nchash = nchash + 1;
213 pct = (used * 100 * 100) / n_nchash;
214 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
215 if (error)
216 return (error);
217 error = SYSCTL_OUT(req, &used, sizeof(used));
218 if (error)
219 return (error);
220 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
221 if (error)
222 return (error);
223 error = SYSCTL_OUT(req, &pct, sizeof(pct));
224 if (error)
225 return (error);
226 return (0);
227 }
228 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD,
229 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths");
230
231 /*
232 * cache_zap():
233 *
234 * Removes a namecache entry from cache, whether it contains an actual
235 * pointer to a vnode or if it is just a negative cache entry.
236 */
237 static void
238 cache_zap(ncp)
239 struct namecache *ncp;
240 {
241 LIST_REMOVE(ncp, nc_hash);
242 LIST_REMOVE(ncp, nc_src);
243 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
244 vdrop(ncp->nc_dvp);
245 numcachehv--;
246 }
247 if (ncp->nc_vp) {
248 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
249 } else {
250 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
251 numneg--;
252 }
253 numcache--;
254 free(ncp, M_VFSCACHE);
255 }
256
257 /*
258 * cache_leaf_test()
259 *
260 * Test whether this (directory) vnode's namei cache entry contains
261 * subdirectories or not. Used to determine whether the directory is
262 * a leaf in the namei cache or not. Note: the directory may still
263 * contain files in the namei cache.
264 *
265 * Returns 0 if the directory is a leaf, -1 if it isn't.
266 */
267 int
268 cache_leaf_test(struct vnode *vp)
269 {
270 struct namecache *ncpc;
271
272 for (ncpc = LIST_FIRST(&vp->v_cache_src);
273 ncpc != NULL;
274 ncpc = LIST_NEXT(ncpc, nc_src)
275 ) {
276 if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR)
277 return(-1);
278 }
279 return(0);
280 }
281
282 /*
283 * Lookup an entry in the cache
284 *
285 * Lookup is called with dvp pointing to the directory to search,
286 * cnp pointing to the name of the entry being sought. If the lookup
287 * succeeds, the vnode is returned in *vpp, and a status of -1 is
288 * returned. If the lookup determines that the name does not exist
289 * (negative cacheing), a status of ENOENT is returned. If the lookup
290 * fails, a status of zero is returned.
291 */
292
293 int
294 cache_lookup(dvp, vpp, cnp)
295 struct vnode *dvp;
296 struct vnode **vpp;
297 struct componentname *cnp;
298 {
299 struct namecache *ncp;
300 u_int32_t hash;
301
302 if (!doingcache) {
303 cnp->cn_flags &= ~MAKEENTRY;
304 return (0);
305 }
306
307 numcalls++;
308
309 if (cnp->cn_nameptr[0] == '.') {
310 if (cnp->cn_namelen == 1) {
311 *vpp = dvp;
312 dothits++;
313 return (-1);
314 }
315 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
316 dotdothits++;
317 if (dvp->v_dd->v_id != dvp->v_ddid ||
318 (cnp->cn_flags & MAKEENTRY) == 0) {
319 dvp->v_ddid = 0;
320 return (0);
321 }
322 *vpp = dvp->v_dd;
323 return (-1);
324 }
325 }
326
327 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
328 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
329 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
330 numchecks++;
331 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
332 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
333 break;
334 }
335
336 /* We failed to find an entry */
337 if (ncp == 0) {
338 if ((cnp->cn_flags & MAKEENTRY) == 0) {
339 nummisszap++;
340 } else {
341 nummiss++;
342 }
343 nchstats.ncs_miss++;
344 return (0);
345 }
346
347 /* We don't want to have an entry, so dump it */
348 if ((cnp->cn_flags & MAKEENTRY) == 0) {
349 numposzaps++;
350 nchstats.ncs_badhits++;
351 cache_zap(ncp);
352 return (0);
353 }
354
355 /* We found a "positive" match, return the vnode */
356 if (ncp->nc_vp) {
357 numposhits++;
358 nchstats.ncs_goodhits++;
359 *vpp = ncp->nc_vp;
360 return (-1);
361 }
362
363 /* We found a negative match, and want to create it, so purge */
364 if (cnp->cn_nameiop == CREATE) {
365 numnegzaps++;
366 nchstats.ncs_badhits++;
367 cache_zap(ncp);
368 return (0);
369 }
370
371 numneghits++;
372 /*
373 * We found a "negative" match, so we shift it to the end of
374 * the "negative" cache entries queue to satisfy LRU. Also,
375 * check to see if the entry is a whiteout; indicate this to
376 * the componentname, if so.
377 */
378 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
379 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
380 nchstats.ncs_neghits++;
381 if (ncp->nc_flag & NCF_WHITE)
382 cnp->cn_flags |= ISWHITEOUT;
383 return (ENOENT);
384 }
385
386 /*
387 * Add an entry to the cache.
388 */
389 void
390 cache_enter(dvp, vp, cnp)
391 struct vnode *dvp;
392 struct vnode *vp;
393 struct componentname *cnp;
394 {
395 struct namecache *ncp;
396 struct nchashhead *ncpp;
397 u_int32_t hash;
398 int len;
399
400 if (!doingcache)
401 return;
402
403 if (cnp->cn_nameptr[0] == '.') {
404 if (cnp->cn_namelen == 1) {
405 return;
406 }
407 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
408 if (vp) {
409 dvp->v_dd = vp;
410 dvp->v_ddid = vp->v_id;
411 } else {
412 dvp->v_dd = dvp;
413 dvp->v_ddid = 0;
414 }
415 return;
416 }
417 }
418
419 ncp = (struct namecache *)
420 malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK);
421 bzero((char *)ncp, sizeof *ncp);
422 numcache++;
423 if (!vp) {
424 numneg++;
425 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0;
426 } else if (vp->v_type == VDIR) {
427 vp->v_dd = dvp;
428 vp->v_ddid = dvp->v_id;
429 }
430
431 /*
432 * Set the rest of the namecache entry elements, calculate it's
433 * hash key and insert it into the appropriate chain within
434 * the cache entries table.
435 */
436 ncp->nc_vp = vp;
437 ncp->nc_dvp = dvp;
438 len = ncp->nc_nlen = cnp->cn_namelen;
439 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
440 bcopy(cnp->cn_nameptr, ncp->nc_name, len);
441 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
442 ncpp = NCHHASH(hash);
443 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
444 if (LIST_EMPTY(&dvp->v_cache_src)) {
445 vhold(dvp);
446 numcachehv++;
447 }
448 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
449 /*
450 * If the entry is "negative", we place it into the
451 * "negative" cache queue, otherwise, we place it into the
452 * destination vnode's cache entries queue.
453 */
454 if (vp) {
455 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
456 } else {
457 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
458 }
459 if (numneg * ncnegfactor > numcache) {
460 ncp = TAILQ_FIRST(&ncneg);
461 cache_zap(ncp);
462 }
463 }
464
465 /*
466 * Name cache initialization, from vfs_init() when we are booting
467 */
468 static void
469 nchinit(void *dummy __unused)
470 {
471
472 TAILQ_INIT(&ncneg);
473 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
474 }
475 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL)
476
477
478 /*
479 * Invalidate all entries to a particular vnode.
480 *
481 * Remove all entries in the namecache relating to this vnode and
482 * change the v_id. We take the v_id from a global counter, since
483 * it becomes a handy sequence number in crash-dumps that way.
484 * No valid vnode will ever have (v_id == 0).
485 *
486 * XXX: Only time and the size of v_id prevents this from failing:
487 * XXX: In theory we should hunt down all (struct vnode*, v_id)
488 * XXX: soft references and nuke them, at least on the global
489 * XXX: v_id wraparound. The period of resistance can be extended
490 * XXX: by incrementing each vnodes v_id individually instead of
491 * XXX: using the global v_id.
492 */
493
494 void
495 cache_purge(vp)
496 struct vnode *vp;
497 {
498 static u_long nextid;
499
500 while (!LIST_EMPTY(&vp->v_cache_src))
501 cache_zap(LIST_FIRST(&vp->v_cache_src));
502 while (!TAILQ_EMPTY(&vp->v_cache_dst))
503 cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
504
505 do
506 nextid++;
507 while (nextid == vp->v_id || !nextid);
508 vp->v_id = nextid;
509 vp->v_dd = vp;
510 vp->v_ddid = 0;
511 }
512
513 /*
514 * Flush all entries referencing a particular filesystem.
515 *
516 * Since we need to check it anyway, we will flush all the invalid
517 * entries at the same time.
518 */
519 void
520 cache_purgevfs(mp)
521 struct mount *mp;
522 {
523 struct nchashhead *ncpp;
524 struct namecache *ncp, *nnp;
525
526 /* Scan hash tables for applicable entries */
527 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
528 for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) {
529 nnp = LIST_NEXT(ncp, nc_hash);
530 if (ncp->nc_dvp->v_mount == mp) {
531 cache_zap(ncp);
532 }
533 }
534 }
535 }
536
537 /*
538 * Perform canonical checks and cache lookup and pass on to filesystem
539 * through the vop_cachedlookup only if needed.
540 */
541
542 int
543 vfs_cache_lookup(ap)
544 struct vop_lookup_args /* {
545 struct vnode *a_dvp;
546 struct vnode **a_vpp;
547 struct componentname *a_cnp;
548 } */ *ap;
549 {
550 struct vnode *dvp, *vp;
551 int lockparent;
552 int error;
553 struct vnode **vpp = ap->a_vpp;
554 struct componentname *cnp = ap->a_cnp;
555 struct ucred *cred = cnp->cn_cred;
556 int flags = cnp->cn_flags;
557 struct thread *td = cnp->cn_thread;
558 u_long vpid; /* capability number of vnode */
559
560 *vpp = NULL;
561 dvp = ap->a_dvp;
562 lockparent = flags & LOCKPARENT;
563
564 if (dvp->v_type != VDIR)
565 return (ENOTDIR);
566
567 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
568 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
569 return (EROFS);
570
571 error = VOP_ACCESS(dvp, VEXEC, cred, td);
572
573 if (error)
574 return (error);
575
576 error = cache_lookup(dvp, vpp, cnp);
577
578 #ifdef LOOKUP_SHARED
579 if (!error) {
580 /* We do this because the rest of the system now expects to get
581 * a shared lock, which is later upgraded if LOCKSHARED is not
582 * set. We have so many cases here because of bugs that yield
583 * inconsistant lock states. This all badly needs to be fixed
584 */
585 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
586 if (!error) {
587 int flock;
588
589 flock = VOP_ISLOCKED(*vpp, td);
590 if (flock != LK_EXCLUSIVE) {
591 if (flock == 0) {
592 if ((flags & ISLASTCN) &&
593 (flags & LOCKSHARED))
594 VOP_LOCK(*vpp, LK_SHARED, td);
595 else
596 VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
597 }
598 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
599 VOP_LOCK(*vpp, LK_DOWNGRADE, td);
600 }
601 return (error);
602 }
603 #else
604 if (!error)
605 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
606 #endif
607
608 if (error == ENOENT)
609 return (error);
610
611 vp = *vpp;
612 vpid = vp->v_id;
613 cnp->cn_flags &= ~PDIRUNLOCK;
614 if (dvp == vp) { /* lookup on "." */
615 VREF(vp);
616 error = 0;
617 } else if (flags & ISDOTDOT) {
618 VOP_UNLOCK(dvp, 0, td);
619 cnp->cn_flags |= PDIRUNLOCK;
620 #ifdef LOOKUP_SHARED
621 if ((flags & ISLASTCN) && (flags & LOCKSHARED))
622 error = vget(vp, LK_SHARED, td);
623 else
624 error = vget(vp, LK_EXCLUSIVE, td);
625 #else
626 error = vget(vp, LK_EXCLUSIVE, td);
627 #endif
628
629 if (!error && lockparent && (flags & ISLASTCN)) {
630 if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0)
631 cnp->cn_flags &= ~PDIRUNLOCK;
632 }
633 } else {
634 #ifdef LOOKUP_SHARED
635 if ((flags & ISLASTCN) && (flags & LOCKSHARED))
636 error = vget(vp, LK_SHARED, td);
637 else
638 error = vget(vp, LK_EXCLUSIVE, td);
639 #else
640 error = vget(vp, LK_EXCLUSIVE, td);
641 #endif
642 if (!lockparent || error || !(flags & ISLASTCN)) {
643 VOP_UNLOCK(dvp, 0, td);
644 cnp->cn_flags |= PDIRUNLOCK;
645 }
646 }
647 /*
648 * Check that the capability number did not change
649 * while we were waiting for the lock.
650 */
651 if (!error) {
652 if (vpid == vp->v_id)
653 return (0);
654 vput(vp);
655 if (lockparent && dvp != vp && (flags & ISLASTCN)) {
656 VOP_UNLOCK(dvp, 0, td);
657 cnp->cn_flags |= PDIRUNLOCK;
658 }
659 }
660 if (cnp->cn_flags & PDIRUNLOCK) {
661 error = vn_lock(dvp, LK_EXCLUSIVE, td);
662 if (error)
663 return (error);
664 cnp->cn_flags &= ~PDIRUNLOCK;
665 }
666 #ifdef LOOKUP_SHARED
667 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
668
669 if (!error) {
670 int flock = 0;
671
672 flock = VOP_ISLOCKED(*vpp, td);
673 if (flock != LK_EXCLUSIVE) {
674 if (flock == 0) {
675 if ((flags & ISLASTCN) && (flags & LOCKSHARED))
676 VOP_LOCK(*vpp, LK_SHARED, td);
677 else
678 VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
679 }
680 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
681 VOP_LOCK(*vpp, LK_DOWNGRADE, td);
682 }
683
684 return (error);
685 #else
686 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
687 #endif
688 }
689
690
691 #ifndef _SYS_SYSPROTO_H_
692 struct __getcwd_args {
693 u_char *buf;
694 u_int buflen;
695 };
696 #endif
697
698 /*
699 * XXX All of these sysctls would probably be more productive dead.
700 */
701 static int disablecwd;
702 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
703 "Disable the getcwd syscall");
704
705 /* Various statistics for the getcwd syscall */
706 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls);
707 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1);
708 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
709 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
710 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
711 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
712
713 /* Implementation of the getcwd syscall */
714 int
715 __getcwd(td, uap)
716 struct thread *td;
717 struct __getcwd_args *uap;
718 {
719
720 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen));
721 }
722
723 int
724 kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen)
725 {
726 char *bp, *tmpbuf;
727 int error, i, slash_prefixed;
728 struct filedesc *fdp;
729 struct namecache *ncp;
730 struct vnode *vp;
731
732 numcwdcalls++;
733 if (disablecwd)
734 return (ENODEV);
735 if (buflen < 2)
736 return (EINVAL);
737 if (buflen > MAXPATHLEN)
738 buflen = MAXPATHLEN;
739 error = 0;
740 tmpbuf = bp = malloc(buflen, M_TEMP, M_WAITOK);
741 bp += buflen - 1;
742 *bp = '\0';
743 fdp = td->td_proc->p_fd;
744 slash_prefixed = 0;
745 FILEDESC_LOCK(fdp);
746 mp_fixme("No vnode locking done!");
747 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) {
748 if (vp->v_vflag & VV_ROOT) {
749 if (vp->v_mount == NULL) { /* forced unmount */
750 FILEDESC_UNLOCK(fdp);
751 free(tmpbuf, M_TEMP);
752 return (EBADF);
753 }
754 vp = vp->v_mount->mnt_vnodecovered;
755 continue;
756 }
757 if (vp->v_dd->v_id != vp->v_ddid) {
758 FILEDESC_UNLOCK(fdp);
759 numcwdfail1++;
760 free(tmpbuf, M_TEMP);
761 return (ENOTDIR);
762 }
763 ncp = TAILQ_FIRST(&vp->v_cache_dst);
764 if (!ncp) {
765 FILEDESC_UNLOCK(fdp);
766 numcwdfail2++;
767 free(tmpbuf, M_TEMP);
768 return (ENOENT);
769 }
770 if (ncp->nc_dvp != vp->v_dd) {
771 FILEDESC_UNLOCK(fdp);
772 numcwdfail3++;
773 free(tmpbuf, M_TEMP);
774 return (EBADF);
775 }
776 for (i = ncp->nc_nlen - 1; i >= 0; i--) {
777 if (bp == tmpbuf) {
778 FILEDESC_UNLOCK(fdp);
779 numcwdfail4++;
780 free(tmpbuf, M_TEMP);
781 return (ENOMEM);
782 }
783 *--bp = ncp->nc_name[i];
784 }
785 if (bp == tmpbuf) {
786 FILEDESC_UNLOCK(fdp);
787 numcwdfail4++;
788 free(tmpbuf, M_TEMP);
789 return (ENOMEM);
790 }
791 *--bp = '/';
792 slash_prefixed = 1;
793 vp = vp->v_dd;
794 }
795 FILEDESC_UNLOCK(fdp);
796 if (!slash_prefixed) {
797 if (bp == tmpbuf) {
798 numcwdfail4++;
799 free(tmpbuf, M_TEMP);
800 return (ENOMEM);
801 }
802 *--bp = '/';
803 }
804 numcwdfound++;
805 if (bufseg == UIO_SYSSPACE)
806 bcopy(bp, buf, strlen(bp) + 1);
807 else
808 error = copyout(bp, buf, strlen(bp) + 1);
809 free(tmpbuf, M_TEMP);
810 return (error);
811 }
812
813 /*
814 * Thus begins the fullpath magic.
815 */
816
817 #undef STATNODE
818 #define STATNODE(name) \
819 static u_int name; \
820 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
821
822 static int disablefullpath;
823 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
824 "Disable the vn_fullpath function");
825
826 STATNODE(numfullpathcalls);
827 STATNODE(numfullpathfail1);
828 STATNODE(numfullpathfail2);
829 STATNODE(numfullpathfail3);
830 STATNODE(numfullpathfail4);
831 STATNODE(numfullpathfound);
832
833 /*
834 * Retrieve the full filesystem path that correspond to a vnode from the name
835 * cache (if available)
836 */
837 int
838 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
839 {
840 char *bp, *buf;
841 int i, slash_prefixed;
842 struct filedesc *fdp;
843 struct namecache *ncp;
844 struct vnode *vp;
845
846 numfullpathcalls++;
847 if (disablefullpath)
848 return (ENODEV);
849 if (vn == NULL)
850 return (EINVAL);
851 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
852 bp = buf + MAXPATHLEN - 1;
853 *bp = '\0';
854 fdp = td->td_proc->p_fd;
855 slash_prefixed = 0;
856 FILEDESC_LOCK(fdp);
857 for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) {
858 ASSERT_VOP_LOCKED(vp, "vn_fullpath");
859 if (vp->v_vflag & VV_ROOT) {
860 if (vp->v_mount == NULL) { /* forced unmount */
861 FILEDESC_UNLOCK(fdp);
862 free(buf, M_TEMP);
863 return (EBADF);
864 }
865 vp = vp->v_mount->mnt_vnodecovered;
866 continue;
867 }
868 if (vp != vn && vp->v_dd->v_id != vp->v_ddid) {
869 FILEDESC_UNLOCK(fdp);
870 numfullpathfail1++;
871 free(buf, M_TEMP);
872 return (ENOTDIR);
873 }
874 ncp = TAILQ_FIRST(&vp->v_cache_dst);
875 if (!ncp) {
876 FILEDESC_UNLOCK(fdp);
877 numfullpathfail2++;
878 free(buf, M_TEMP);
879 return (ENOENT);
880 }
881 if (vp != vn && ncp->nc_dvp != vp->v_dd) {
882 FILEDESC_UNLOCK(fdp);
883 numfullpathfail3++;
884 free(buf, M_TEMP);
885 return (EBADF);
886 }
887 for (i = ncp->nc_nlen - 1; i >= 0; i--) {
888 if (bp == buf) {
889 FILEDESC_UNLOCK(fdp);
890 numfullpathfail4++;
891 free(buf, M_TEMP);
892 return (ENOMEM);
893 }
894 *--bp = ncp->nc_name[i];
895 }
896 if (bp == buf) {
897 FILEDESC_UNLOCK(fdp);
898 numfullpathfail4++;
899 free(buf, M_TEMP);
900 return (ENOMEM);
901 }
902 *--bp = '/';
903 slash_prefixed = 1;
904 vp = ncp->nc_dvp;
905 }
906 if (!slash_prefixed) {
907 if (bp == buf) {
908 FILEDESC_UNLOCK(fdp);
909 numfullpathfail4++;
910 free(buf, M_TEMP);
911 return (ENOMEM);
912 }
913 *--bp = '/';
914 }
915 FILEDESC_UNLOCK(fdp);
916 numfullpathfound++;
917 *retbuf = bp;
918 *freebuf = buf;
919 return (0);
920 }
Cache object: 256edbe9fad158822628493118848341
|