FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_cache.c
1 /*
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Poul-Henning Kamp of the FreeBSD Project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
37 */
38
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD: releng/5.2/sys/kern/vfs_cache.c 120792 2003-10-05 07:13:50Z jeff $");
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mutex.h>
47 #include <sys/sysctl.h>
48 #include <sys/mount.h>
49 #include <sys/vnode.h>
50 #include <sys/namei.h>
51 #include <sys/malloc.h>
52 #include <sys/syscallsubr.h>
53 #include <sys/sysproto.h>
54 #include <sys/proc.h>
55 #include <sys/filedesc.h>
56 #include <sys/fnv_hash.h>
57
58 #include <vm/uma.h>
59
60 /*
61 * This structure describes the elements in the cache of recent
62 * names looked up by namei.
63 */
64
65 struct namecache {
66 LIST_ENTRY(namecache) nc_hash; /* hash chain */
67 LIST_ENTRY(namecache) nc_src; /* source vnode list */
68 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
69 struct vnode *nc_dvp; /* vnode of parent of name */
70 struct vnode *nc_vp; /* vnode the name refers to */
71 u_char nc_flag; /* flag bits */
72 u_char nc_nlen; /* length of name */
73 char nc_name[0]; /* segment name */
74 };
75
76 /*
77 * Name caching works as follows:
78 *
79 * Names found by directory scans are retained in a cache
80 * for future reference. It is managed LRU, so frequently
81 * used names will hang around. Cache is indexed by hash value
82 * obtained from (vp, name) where vp refers to the directory
83 * containing name.
84 *
85 * If it is a "negative" entry, (i.e. for a name that is known NOT to
86 * exist) the vnode pointer will be NULL.
87 *
88 * Upon reaching the last segment of a path, if the reference
89 * is for DELETE, or NOCACHE is set (rewrite), and the
90 * name is located in the cache, it will be dropped.
91 */
92
93 /*
94 * Structures associated with name cacheing.
95 */
96 #define NCHHASH(hash) \
97 (&nchashtbl[(hash) & nchash])
98 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
99 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */
100 static u_long nchash; /* size of hash table */
101 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
102 static u_long ncnegfactor = 16; /* ratio of negative entries */
103 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
104 static u_long numneg; /* number of cache entries allocated */
105 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
106 static u_long numcache; /* number of cache entries allocated */
107 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
108 static u_long numcachehv; /* number of cache entries with vnodes held */
109 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
110 #if 0
111 static u_long numcachepl; /* number of cache purge for leaf entries */
112 SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
113 #endif
114 struct nchstats nchstats; /* cache effectiveness statistics */
115
116 struct mtx cache_lock;
117 MTX_SYSINIT(vfscache, &cache_lock, "Name Cache", MTX_DEF);
118
119 #define CACHE_LOCK() mtx_lock(&cache_lock)
120 #define CACHE_UNLOCK() mtx_unlock(&cache_lock)
121
122 /*
123 * UMA zones for the VFS cache.
124 *
125 * The small cache is used for entries with short names, which are the
126 * most common. The large cache is used for entries which are too big to
127 * fit in the small cache.
128 */
129 static uma_zone_t cache_zone_small;
130 static uma_zone_t cache_zone_large;
131
132 #define CACHE_PATH_CUTOFF 32
133 #define CACHE_ZONE_SMALL (sizeof(struct namecache) + CACHE_PATH_CUTOFF)
134 #define CACHE_ZONE_LARGE (sizeof(struct namecache) + NAME_MAX)
135
136 #define cache_alloc(len) uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \
137 cache_zone_small : cache_zone_large, M_WAITOK)
138 #define cache_free(ncp) do { \
139 if (ncp != NULL) \
140 uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \
141 cache_zone_small : cache_zone_large, (ncp)); \
142 } while (0)
143
144 static int doingcache = 1; /* 1 => enable the cache */
145 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
146
147 /* Export size information to userland */
148 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
149 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
150
151 /*
152 * The new name cache statistics
153 */
154 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
155 #define STATNODE(mode, name, var) \
156 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
157 STATNODE(CTLFLAG_RD, numneg, &numneg);
158 STATNODE(CTLFLAG_RD, numcache, &numcache);
159 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
160 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
161 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
162 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
163 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
164 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
165 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
166 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
167 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
168 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
169
170 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats,
171 sizeof(nchstats), "LU", "VFS cache effectiveness statistics");
172
173
174
175 static void cache_zap(struct namecache *ncp, int locked);
176
177 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
178
179 /*
180 * Flags in namecache.nc_flag
181 */
182 #define NCF_WHITE 1
183
184 /*
185 * Grab an atomic snapshot of the name cache hash chain lengths
186 */
187 SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats");
188
189 static int
190 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
191 {
192 int error;
193 struct nchashhead *ncpp;
194 struct namecache *ncp;
195 int n_nchash;
196 int count;
197
198 n_nchash = nchash + 1; /* nchash is max index, not count */
199 if (!req->oldptr)
200 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
201
202 /* Scan hash tables for applicable entries */
203 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
204 count = 0;
205 LIST_FOREACH(ncp, ncpp, nc_hash) {
206 count++;
207 }
208 error = SYSCTL_OUT(req, &count, sizeof(count));
209 if (error)
210 return (error);
211 }
212 return (0);
213 }
214 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD,
215 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths");
216
217 static int
218 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
219 {
220 int error;
221 struct nchashhead *ncpp;
222 struct namecache *ncp;
223 int n_nchash;
224 int count, maxlength, used, pct;
225
226 if (!req->oldptr)
227 return SYSCTL_OUT(req, 0, 4 * sizeof(int));
228
229 n_nchash = nchash + 1; /* nchash is max index, not count */
230 used = 0;
231 maxlength = 0;
232
233 /* Scan hash tables for applicable entries */
234 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
235 count = 0;
236 LIST_FOREACH(ncp, ncpp, nc_hash) {
237 count++;
238 }
239 if (count)
240 used++;
241 if (maxlength < count)
242 maxlength = count;
243 }
244 n_nchash = nchash + 1;
245 pct = (used * 100 * 100) / n_nchash;
246 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
247 if (error)
248 return (error);
249 error = SYSCTL_OUT(req, &used, sizeof(used));
250 if (error)
251 return (error);
252 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
253 if (error)
254 return (error);
255 error = SYSCTL_OUT(req, &pct, sizeof(pct));
256 if (error)
257 return (error);
258 return (0);
259 }
260 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD,
261 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths");
262
263 /*
264 * cache_zap():
265 *
266 * Removes a namecache entry from cache, whether it contains an actual
267 * pointer to a vnode or if it is just a negative cache entry.
268 */
269 static void
270 cache_zap(ncp, locked)
271 struct namecache *ncp;
272 int locked;
273 {
274 struct vnode *vp;
275
276 vp = NULL;
277 if (!locked)
278 CACHE_LOCK();
279 LIST_REMOVE(ncp, nc_hash);
280 LIST_REMOVE(ncp, nc_src);
281 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
282 vp = ncp->nc_dvp;
283 numcachehv--;
284 }
285 if (ncp->nc_vp) {
286 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
287 } else {
288 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
289 numneg--;
290 }
291 numcache--;
292 CACHE_UNLOCK();
293 cache_free(ncp);
294 if (vp)
295 vdrop(vp);
296 if (locked)
297 CACHE_LOCK();
298 }
299
300 /*
301 * cache_leaf_test()
302 *
303 * Test whether this (directory) vnode's namei cache entry contains
304 * subdirectories or not. Used to determine whether the directory is
305 * a leaf in the namei cache or not. Note: the directory may still
306 * contain files in the namei cache.
307 *
308 * Returns 0 if the directory is a leaf, -1 if it isn't.
309 */
310 int
311 cache_leaf_test(struct vnode *vp)
312 {
313 struct namecache *ncpc;
314 int leaf;
315
316 leaf = 0;
317 CACHE_LOCK();
318 for (ncpc = LIST_FIRST(&vp->v_cache_src);
319 ncpc != NULL;
320 ncpc = LIST_NEXT(ncpc, nc_src)
321 ) {
322 if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) {
323 leaf = -1;
324 break;
325 }
326 }
327 CACHE_UNLOCK();
328 return (leaf);
329 }
330
331 /*
332 * Lookup an entry in the cache
333 *
334 * Lookup is called with dvp pointing to the directory to search,
335 * cnp pointing to the name of the entry being sought. If the lookup
336 * succeeds, the vnode is returned in *vpp, and a status of -1 is
337 * returned. If the lookup determines that the name does not exist
338 * (negative cacheing), a status of ENOENT is returned. If the lookup
339 * fails, a status of zero is returned.
340 */
341
342 int
343 cache_lookup(dvp, vpp, cnp)
344 struct vnode *dvp;
345 struct vnode **vpp;
346 struct componentname *cnp;
347 {
348 struct namecache *ncp;
349 u_int32_t hash;
350
351 if (!doingcache) {
352 cnp->cn_flags &= ~MAKEENTRY;
353 return (0);
354 }
355
356 CACHE_LOCK();
357 numcalls++;
358
359 if (cnp->cn_nameptr[0] == '.') {
360 if (cnp->cn_namelen == 1) {
361 *vpp = dvp;
362 dothits++;
363 CACHE_UNLOCK();
364 return (-1);
365 }
366 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
367 dotdothits++;
368 if (dvp->v_dd->v_id != dvp->v_ddid ||
369 (cnp->cn_flags & MAKEENTRY) == 0) {
370 dvp->v_ddid = 0;
371 CACHE_UNLOCK();
372 return (0);
373 }
374 *vpp = dvp->v_dd;
375 CACHE_UNLOCK();
376 return (-1);
377 }
378 }
379
380 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
381 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
382 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
383 numchecks++;
384 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
385 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
386 break;
387 }
388
389 /* We failed to find an entry */
390 if (ncp == 0) {
391 if ((cnp->cn_flags & MAKEENTRY) == 0) {
392 nummisszap++;
393 } else {
394 nummiss++;
395 }
396 nchstats.ncs_miss++;
397 CACHE_UNLOCK();
398 return (0);
399 }
400
401 /* We don't want to have an entry, so dump it */
402 if ((cnp->cn_flags & MAKEENTRY) == 0) {
403 numposzaps++;
404 nchstats.ncs_badhits++;
405 CACHE_UNLOCK();
406 cache_zap(ncp, 0);
407 return (0);
408 }
409
410 /* We found a "positive" match, return the vnode */
411 if (ncp->nc_vp) {
412 numposhits++;
413 nchstats.ncs_goodhits++;
414 *vpp = ncp->nc_vp;
415 CACHE_UNLOCK();
416 return (-1);
417 }
418
419 /* We found a negative match, and want to create it, so purge */
420 if (cnp->cn_nameiop == CREATE) {
421 numnegzaps++;
422 nchstats.ncs_badhits++;
423 CACHE_UNLOCK();
424 cache_zap(ncp, 0);
425 return (0);
426 }
427
428 numneghits++;
429 /*
430 * We found a "negative" match, so we shift it to the end of
431 * the "negative" cache entries queue to satisfy LRU. Also,
432 * check to see if the entry is a whiteout; indicate this to
433 * the componentname, if so.
434 */
435 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
436 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
437 nchstats.ncs_neghits++;
438 if (ncp->nc_flag & NCF_WHITE)
439 cnp->cn_flags |= ISWHITEOUT;
440 CACHE_UNLOCK();
441 return (ENOENT);
442 }
443
444 /*
445 * Add an entry to the cache.
446 */
447 void
448 cache_enter(dvp, vp, cnp)
449 struct vnode *dvp;
450 struct vnode *vp;
451 struct componentname *cnp;
452 {
453 struct namecache *ncp;
454 struct nchashhead *ncpp;
455 u_int32_t hash;
456 int hold;
457 int zap;
458 int len;
459
460 if (!doingcache)
461 return;
462
463 if (cnp->cn_nameptr[0] == '.') {
464 if (cnp->cn_namelen == 1) {
465 return;
466 }
467 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
468 if (vp) {
469 dvp->v_dd = vp;
470 dvp->v_ddid = vp->v_id;
471 } else {
472 dvp->v_dd = dvp;
473 dvp->v_ddid = 0;
474 }
475 return;
476 }
477 }
478
479 hold = 0;
480 zap = 0;
481 ncp = cache_alloc(cnp->cn_namelen);
482 CACHE_LOCK();
483 numcache++;
484 if (!vp) {
485 numneg++;
486 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0;
487 } else if (vp->v_type == VDIR) {
488 vp->v_dd = dvp;
489 vp->v_ddid = dvp->v_id;
490 }
491
492 /*
493 * Set the rest of the namecache entry elements, calculate it's
494 * hash key and insert it into the appropriate chain within
495 * the cache entries table.
496 */
497 ncp->nc_vp = vp;
498 ncp->nc_dvp = dvp;
499 len = ncp->nc_nlen = cnp->cn_namelen;
500 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
501 bcopy(cnp->cn_nameptr, ncp->nc_name, len);
502 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
503 ncpp = NCHHASH(hash);
504 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
505 if (LIST_EMPTY(&dvp->v_cache_src)) {
506 hold = 1;
507 numcachehv++;
508 }
509 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
510 /*
511 * If the entry is "negative", we place it into the
512 * "negative" cache queue, otherwise, we place it into the
513 * destination vnode's cache entries queue.
514 */
515 if (vp) {
516 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
517 } else {
518 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
519 }
520 if (numneg * ncnegfactor > numcache) {
521 ncp = TAILQ_FIRST(&ncneg);
522 zap = 1;
523 }
524 CACHE_UNLOCK();
525 if (hold)
526 vhold(dvp);
527 if (zap)
528 cache_zap(ncp, 0);
529 }
530
531 /*
532 * Name cache initialization, from vfs_init() when we are booting
533 */
534 static void
535 nchinit(void *dummy __unused)
536 {
537
538 TAILQ_INIT(&ncneg);
539
540 cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL,
541 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
542 cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL,
543 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
544
545 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
546 }
547 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL)
548
549
550 /*
551 * Invalidate all entries to a particular vnode.
552 *
553 * Remove all entries in the namecache relating to this vnode and
554 * change the v_id. We take the v_id from a global counter, since
555 * it becomes a handy sequence number in crash-dumps that way.
556 * No valid vnode will ever have (v_id == 0).
557 *
558 * XXX: Only time and the size of v_id prevents this from failing:
559 * XXX: In theory we should hunt down all (struct vnode*, v_id)
560 * XXX: soft references and nuke them, at least on the global
561 * XXX: v_id wraparound. The period of resistance can be extended
562 * XXX: by incrementing each vnodes v_id individually instead of
563 * XXX: using the global v_id.
564 */
565
566 /*
567 * XXX This is sometimes called when a vnode may still be re-used, in which
568 * case v_dd may be invalid. Need to look this up.
569 */
570 void
571 cache_purge(vp)
572 struct vnode *vp;
573 {
574 static u_long nextid;
575
576 CACHE_LOCK();
577 while (!LIST_EMPTY(&vp->v_cache_src))
578 cache_zap(LIST_FIRST(&vp->v_cache_src), 1);
579 while (!TAILQ_EMPTY(&vp->v_cache_dst))
580 cache_zap(TAILQ_FIRST(&vp->v_cache_dst), 1);
581
582 do
583 nextid++;
584 while (nextid == vp->v_id || !nextid);
585 vp->v_id = nextid;
586 vp->v_dd = vp;
587 vp->v_ddid = 0;
588 CACHE_UNLOCK();
589 }
590
591 /*
592 * Flush all entries referencing a particular filesystem.
593 *
594 * Since we need to check it anyway, we will flush all the invalid
595 * entries at the same time.
596 */
597 void
598 cache_purgevfs(mp)
599 struct mount *mp;
600 {
601 struct nchashhead *ncpp;
602 struct namecache *ncp, *nnp;
603 struct nchashhead mplist;
604
605 LIST_INIT(&mplist);
606 ncp = NULL;
607
608 /* Scan hash tables for applicable entries */
609 CACHE_LOCK();
610 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
611 for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) {
612 nnp = LIST_NEXT(ncp, nc_hash);
613 if (ncp->nc_dvp->v_mount == mp) {
614 LIST_REMOVE(ncp, nc_hash);
615 LIST_INSERT_HEAD(&mplist, ncp, nc_hash);
616 }
617 }
618 }
619 CACHE_UNLOCK();
620 while (!LIST_EMPTY(&mplist))
621 cache_zap(LIST_FIRST(&mplist), 0);
622 }
623
624 /*
625 * Perform canonical checks and cache lookup and pass on to filesystem
626 * through the vop_cachedlookup only if needed.
627 */
628
629 int
630 vfs_cache_lookup(ap)
631 struct vop_lookup_args /* {
632 struct vnode *a_dvp;
633 struct vnode **a_vpp;
634 struct componentname *a_cnp;
635 } */ *ap;
636 {
637 struct vnode *dvp, *vp;
638 int lockparent;
639 int error;
640 struct vnode **vpp = ap->a_vpp;
641 struct componentname *cnp = ap->a_cnp;
642 struct ucred *cred = cnp->cn_cred;
643 int flags = cnp->cn_flags;
644 struct thread *td = cnp->cn_thread;
645 u_long vpid; /* capability number of vnode */
646
647 *vpp = NULL;
648 dvp = ap->a_dvp;
649 lockparent = flags & LOCKPARENT;
650
651 if (dvp->v_type != VDIR)
652 return (ENOTDIR);
653
654 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
655 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
656 return (EROFS);
657
658 error = VOP_ACCESS(dvp, VEXEC, cred, td);
659
660 if (error)
661 return (error);
662
663 error = cache_lookup(dvp, vpp, cnp);
664
665 #ifdef LOOKUP_SHARED
666 if (!error) {
667 /* We do this because the rest of the system now expects to get
668 * a shared lock, which is later upgraded if LOCKSHARED is not
669 * set. We have so many cases here because of bugs that yield
670 * inconsistant lock states. This all badly needs to be fixed
671 */
672 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
673 if (!error) {
674 int flock;
675
676 flock = VOP_ISLOCKED(*vpp, td);
677 if (flock != LK_EXCLUSIVE) {
678 if (flock == 0) {
679 if ((flags & ISLASTCN) &&
680 (flags & LOCKSHARED))
681 VOP_LOCK(*vpp, LK_SHARED, td);
682 else
683 VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
684 }
685 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
686 VOP_LOCK(*vpp, LK_DOWNGRADE, td);
687 }
688 return (error);
689 }
690 #else
691 if (!error)
692 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
693 #endif
694
695 if (error == ENOENT)
696 return (error);
697
698 vp = *vpp;
699 vpid = vp->v_id;
700 cnp->cn_flags &= ~PDIRUNLOCK;
701 if (dvp == vp) { /* lookup on "." */
702 VREF(vp);
703 error = 0;
704 } else if (flags & ISDOTDOT) {
705 VOP_UNLOCK(dvp, 0, td);
706 cnp->cn_flags |= PDIRUNLOCK;
707 #ifdef LOOKUP_SHARED
708 if ((flags & ISLASTCN) && (flags & LOCKSHARED))
709 error = vget(vp, LK_SHARED, td);
710 else
711 error = vget(vp, LK_EXCLUSIVE, td);
712 #else
713 error = vget(vp, LK_EXCLUSIVE, td);
714 #endif
715
716 if (!error && lockparent && (flags & ISLASTCN)) {
717 if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0)
718 cnp->cn_flags &= ~PDIRUNLOCK;
719 }
720 } else {
721 #ifdef LOOKUP_SHARED
722 if ((flags & ISLASTCN) && (flags & LOCKSHARED))
723 error = vget(vp, LK_SHARED, td);
724 else
725 error = vget(vp, LK_EXCLUSIVE, td);
726 #else
727 error = vget(vp, LK_EXCLUSIVE, td);
728 #endif
729 if (!lockparent || error || !(flags & ISLASTCN)) {
730 VOP_UNLOCK(dvp, 0, td);
731 cnp->cn_flags |= PDIRUNLOCK;
732 }
733 }
734 /*
735 * Check that the capability number did not change
736 * while we were waiting for the lock.
737 */
738 if (!error) {
739 if (vpid == vp->v_id)
740 return (0);
741 vput(vp);
742 if (lockparent && dvp != vp && (flags & ISLASTCN)) {
743 VOP_UNLOCK(dvp, 0, td);
744 cnp->cn_flags |= PDIRUNLOCK;
745 }
746 }
747 if (cnp->cn_flags & PDIRUNLOCK) {
748 error = vn_lock(dvp, LK_EXCLUSIVE, td);
749 if (error)
750 return (error);
751 cnp->cn_flags &= ~PDIRUNLOCK;
752 }
753 #ifdef LOOKUP_SHARED
754 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
755
756 if (!error) {
757 int flock = 0;
758
759 flock = VOP_ISLOCKED(*vpp, td);
760 if (flock != LK_EXCLUSIVE) {
761 if (flock == 0) {
762 if ((flags & ISLASTCN) && (flags & LOCKSHARED))
763 VOP_LOCK(*vpp, LK_SHARED, td);
764 else
765 VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
766 }
767 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
768 VOP_LOCK(*vpp, LK_DOWNGRADE, td);
769 }
770
771 return (error);
772 #else
773 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
774 #endif
775 }
776
777
778 #ifndef _SYS_SYSPROTO_H_
779 struct __getcwd_args {
780 u_char *buf;
781 u_int buflen;
782 };
783 #endif
784
785 /*
786 * XXX All of these sysctls would probably be more productive dead.
787 */
788 static int disablecwd;
789 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
790 "Disable the getcwd syscall");
791
792 /* Various statistics for the getcwd syscall */
793 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls);
794 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1);
795 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
796 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
797 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
798 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
799
800 /* Implementation of the getcwd syscall */
801 int
802 __getcwd(td, uap)
803 struct thread *td;
804 struct __getcwd_args *uap;
805 {
806
807 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen));
808 }
809
810 int
811 kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen)
812 {
813 char *bp, *tmpbuf;
814 int error, i, slash_prefixed;
815 struct filedesc *fdp;
816 struct namecache *ncp;
817 struct vnode *vp;
818
819 numcwdcalls++;
820 if (disablecwd)
821 return (ENODEV);
822 if (buflen < 2)
823 return (EINVAL);
824 if (buflen > MAXPATHLEN)
825 buflen = MAXPATHLEN;
826 error = 0;
827 tmpbuf = bp = malloc(buflen, M_TEMP, M_WAITOK);
828 bp += buflen - 1;
829 *bp = '\0';
830 fdp = td->td_proc->p_fd;
831 slash_prefixed = 0;
832 FILEDESC_LOCK(fdp);
833 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) {
834 if (vp->v_vflag & VV_ROOT) {
835 if (vp->v_mount == NULL) { /* forced unmount */
836 FILEDESC_UNLOCK(fdp);
837 free(tmpbuf, M_TEMP);
838 return (EBADF);
839 }
840 vp = vp->v_mount->mnt_vnodecovered;
841 continue;
842 }
843 if (vp->v_dd->v_id != vp->v_ddid) {
844 FILEDESC_UNLOCK(fdp);
845 numcwdfail1++;
846 free(tmpbuf, M_TEMP);
847 return (ENOTDIR);
848 }
849 CACHE_LOCK();
850 ncp = TAILQ_FIRST(&vp->v_cache_dst);
851 if (!ncp) {
852 numcwdfail2++;
853 CACHE_UNLOCK();
854 FILEDESC_UNLOCK(fdp);
855 free(tmpbuf, M_TEMP);
856 return (ENOENT);
857 }
858 if (ncp->nc_dvp != vp->v_dd) {
859 numcwdfail3++;
860 CACHE_UNLOCK();
861 FILEDESC_UNLOCK(fdp);
862 free(tmpbuf, M_TEMP);
863 return (EBADF);
864 }
865 for (i = ncp->nc_nlen - 1; i >= 0; i--) {
866 if (bp == tmpbuf) {
867 numcwdfail4++;
868 CACHE_UNLOCK();
869 FILEDESC_UNLOCK(fdp);
870 free(tmpbuf, M_TEMP);
871 return (ENOMEM);
872 }
873 *--bp = ncp->nc_name[i];
874 }
875 if (bp == tmpbuf) {
876 numcwdfail4++;
877 CACHE_UNLOCK();
878 FILEDESC_UNLOCK(fdp);
879 free(tmpbuf, M_TEMP);
880 return (ENOMEM);
881 }
882 *--bp = '/';
883 slash_prefixed = 1;
884 vp = vp->v_dd;
885 CACHE_UNLOCK();
886 }
887 FILEDESC_UNLOCK(fdp);
888 if (!slash_prefixed) {
889 if (bp == tmpbuf) {
890 numcwdfail4++;
891 free(tmpbuf, M_TEMP);
892 return (ENOMEM);
893 }
894 *--bp = '/';
895 }
896 numcwdfound++;
897 if (bufseg == UIO_SYSSPACE)
898 bcopy(bp, buf, strlen(bp) + 1);
899 else
900 error = copyout(bp, buf, strlen(bp) + 1);
901 free(tmpbuf, M_TEMP);
902 return (error);
903 }
904
905 /*
906 * Thus begins the fullpath magic.
907 */
908
909 #undef STATNODE
910 #define STATNODE(name) \
911 static u_int name; \
912 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
913
914 static int disablefullpath;
915 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
916 "Disable the vn_fullpath function");
917
918 STATNODE(numfullpathcalls);
919 STATNODE(numfullpathfail1);
920 STATNODE(numfullpathfail2);
921 STATNODE(numfullpathfail3);
922 STATNODE(numfullpathfail4);
923 STATNODE(numfullpathfound);
924
925 /*
926 * Retrieve the full filesystem path that correspond to a vnode from the name
927 * cache (if available)
928 */
929 int
930 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
931 {
932 char *bp, *buf;
933 int i, slash_prefixed;
934 struct filedesc *fdp;
935 struct namecache *ncp;
936 struct vnode *vp;
937
938 numfullpathcalls++;
939 if (disablefullpath)
940 return (ENODEV);
941 if (vn == NULL)
942 return (EINVAL);
943 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
944 bp = buf + MAXPATHLEN - 1;
945 *bp = '\0';
946 fdp = td->td_proc->p_fd;
947 slash_prefixed = 0;
948 ASSERT_VOP_LOCKED(vn, "vn_fullpath");
949 FILEDESC_LOCK(fdp);
950 for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) {
951 if (vp->v_vflag & VV_ROOT) {
952 if (vp->v_mount == NULL) { /* forced unmount */
953 FILEDESC_UNLOCK(fdp);
954 free(buf, M_TEMP);
955 return (EBADF);
956 }
957 vp = vp->v_mount->mnt_vnodecovered;
958 continue;
959 }
960 if (vp != vn && vp->v_dd->v_id != vp->v_ddid) {
961 FILEDESC_UNLOCK(fdp);
962 free(buf, M_TEMP);
963 numfullpathfail1++;
964 return (ENOTDIR);
965 }
966 CACHE_LOCK();
967 ncp = TAILQ_FIRST(&vp->v_cache_dst);
968 if (!ncp) {
969 numfullpathfail2++;
970 CACHE_UNLOCK();
971 FILEDESC_UNLOCK(fdp);
972 free(buf, M_TEMP);
973 return (ENOENT);
974 }
975 if (vp != vn && ncp->nc_dvp != vp->v_dd) {
976 numfullpathfail3++;
977 CACHE_UNLOCK();
978 FILEDESC_UNLOCK(fdp);
979 free(buf, M_TEMP);
980 return (EBADF);
981 }
982 for (i = ncp->nc_nlen - 1; i >= 0; i--) {
983 if (bp == buf) {
984 numfullpathfail4++;
985 CACHE_UNLOCK();
986 FILEDESC_UNLOCK(fdp);
987 free(buf, M_TEMP);
988 return (ENOMEM);
989 }
990 *--bp = ncp->nc_name[i];
991 }
992 if (bp == buf) {
993 numfullpathfail4++;
994 CACHE_UNLOCK();
995 FILEDESC_UNLOCK(fdp);
996 free(buf, M_TEMP);
997 return (ENOMEM);
998 }
999 *--bp = '/';
1000 slash_prefixed = 1;
1001 vp = ncp->nc_dvp;
1002 CACHE_UNLOCK();
1003 }
1004 if (!slash_prefixed) {
1005 if (bp == buf) {
1006 numfullpathfail4++;
1007 FILEDESC_UNLOCK(fdp);
1008 free(buf, M_TEMP);
1009 return (ENOMEM);
1010 }
1011 *--bp = '/';
1012 }
1013 FILEDESC_UNLOCK(fdp);
1014 numfullpathfound++;
1015 *retbuf = bp;
1016 *freebuf = buf;
1017 return (0);
1018 }
Cache object: 59f5815a6344c03af1a1e75b141e457a
|