FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_cache.c
1 /*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Poul-Henning Kamp of the FreeBSD Project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/mutex.h>
43 #include <sys/sysctl.h>
44 #include <sys/mount.h>
45 #include <sys/vnode.h>
46 #include <sys/namei.h>
47 #include <sys/malloc.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/sysproto.h>
50 #include <sys/proc.h>
51 #include <sys/filedesc.h>
52 #include <sys/fnv_hash.h>
53
54 #include <vm/uma.h>
55
56 /*
57 * This structure describes the elements in the cache of recent
58 * names looked up by namei.
59 */
60
61 struct namecache {
62 LIST_ENTRY(namecache) nc_hash; /* hash chain */
63 LIST_ENTRY(namecache) nc_src; /* source vnode list */
64 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
65 struct vnode *nc_dvp; /* vnode of parent of name */
66 struct vnode *nc_vp; /* vnode the name refers to */
67 u_char nc_flag; /* flag bits */
68 u_char nc_nlen; /* length of name */
69 char nc_name[0]; /* segment name */
70 };
71
72 /*
73 * Name caching works as follows:
74 *
75 * Names found by directory scans are retained in a cache
76 * for future reference. It is managed LRU, so frequently
77 * used names will hang around. Cache is indexed by hash value
78 * obtained from (vp, name) where vp refers to the directory
79 * containing name.
80 *
81 * If it is a "negative" entry, (i.e. for a name that is known NOT to
82 * exist) the vnode pointer will be NULL.
83 *
84 * Upon reaching the last segment of a path, if the reference
85 * is for DELETE, or NOCACHE is set (rewrite), and the
86 * name is located in the cache, it will be dropped.
87 */
88
89 /*
90 * Structures associated with name cacheing.
91 */
92 #define NCHHASH(hash) \
93 (&nchashtbl[(hash) & nchash])
94 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
95 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */
96 static u_long nchash; /* size of hash table */
97 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
98 static u_long ncnegfactor = 16; /* ratio of negative entries */
99 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
100 static u_long numneg; /* number of cache entries allocated */
101 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
102 static u_long numcache; /* number of cache entries allocated */
103 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
104 static u_long numcachehv; /* number of cache entries with vnodes held */
105 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
106 #if 0
107 static u_long numcachepl; /* number of cache purge for leaf entries */
108 SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
109 #endif
110 struct nchstats nchstats; /* cache effectiveness statistics */
111
112 struct mtx cache_lock;
113 MTX_SYSINIT(vfscache, &cache_lock, "Name Cache", MTX_DEF);
114
115 #define CACHE_LOCK() mtx_lock(&cache_lock)
116 #define CACHE_UNLOCK() mtx_unlock(&cache_lock)
117
118 /*
119 * UMA zones for the VFS cache.
120 *
121 * The small cache is used for entries with short names, which are the
122 * most common. The large cache is used for entries which are too big to
123 * fit in the small cache.
124 */
125 static uma_zone_t cache_zone_small;
126 static uma_zone_t cache_zone_large;
127
128 #define CACHE_PATH_CUTOFF 32
129 #define CACHE_ZONE_SMALL (sizeof(struct namecache) + CACHE_PATH_CUTOFF)
130 #define CACHE_ZONE_LARGE (sizeof(struct namecache) + NAME_MAX)
131
132 #define cache_alloc(len) uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \
133 cache_zone_small : cache_zone_large, M_WAITOK)
134 #define cache_free(ncp) do { \
135 if (ncp != NULL) \
136 uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \
137 cache_zone_small : cache_zone_large, (ncp)); \
138 } while (0)
139
140 static int doingcache = 1; /* 1 => enable the cache */
141 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
142
143 /* Export size information to userland */
144 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
145 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
146
147 /*
148 * The new name cache statistics
149 */
150 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
151 #define STATNODE(mode, name, var) \
152 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
153 STATNODE(CTLFLAG_RD, numneg, &numneg);
154 STATNODE(CTLFLAG_RD, numcache, &numcache);
155 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
156 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
157 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
158 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
159 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
160 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
161 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
162 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
163 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
164 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
165
166 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats,
167 sizeof(nchstats), "LU", "VFS cache effectiveness statistics");
168
169
170
171 static void cache_zap(struct namecache *ncp, int locked);
172
173 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
174
175 /*
176 * Flags in namecache.nc_flag
177 */
178 #define NCF_WHITE 1
179
180 /*
181 * Grab an atomic snapshot of the name cache hash chain lengths
182 */
183 SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats");
184
185 static int
186 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
187 {
188 int error;
189 struct nchashhead *ncpp;
190 struct namecache *ncp;
191 int n_nchash;
192 int count;
193
194 n_nchash = nchash + 1; /* nchash is max index, not count */
195 if (!req->oldptr)
196 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
197
198 /* Scan hash tables for applicable entries */
199 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
200 count = 0;
201 LIST_FOREACH(ncp, ncpp, nc_hash) {
202 count++;
203 }
204 error = SYSCTL_OUT(req, &count, sizeof(count));
205 if (error)
206 return (error);
207 }
208 return (0);
209 }
210 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD,
211 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths");
212
213 static int
214 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
215 {
216 int error;
217 struct nchashhead *ncpp;
218 struct namecache *ncp;
219 int n_nchash;
220 int count, maxlength, used, pct;
221
222 if (!req->oldptr)
223 return SYSCTL_OUT(req, 0, 4 * sizeof(int));
224
225 n_nchash = nchash + 1; /* nchash is max index, not count */
226 used = 0;
227 maxlength = 0;
228
229 /* Scan hash tables for applicable entries */
230 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
231 count = 0;
232 LIST_FOREACH(ncp, ncpp, nc_hash) {
233 count++;
234 }
235 if (count)
236 used++;
237 if (maxlength < count)
238 maxlength = count;
239 }
240 n_nchash = nchash + 1;
241 pct = (used * 100 * 100) / n_nchash;
242 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
243 if (error)
244 return (error);
245 error = SYSCTL_OUT(req, &used, sizeof(used));
246 if (error)
247 return (error);
248 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
249 if (error)
250 return (error);
251 error = SYSCTL_OUT(req, &pct, sizeof(pct));
252 if (error)
253 return (error);
254 return (0);
255 }
256 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD,
257 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths");
258
259 /*
260 * cache_zap():
261 *
262 * Removes a namecache entry from cache, whether it contains an actual
263 * pointer to a vnode or if it is just a negative cache entry.
264 */
265 static void
266 cache_zap(ncp, locked)
267 struct namecache *ncp;
268 int locked;
269 {
270 struct vnode *vp;
271
272 vp = NULL;
273 if (!locked)
274 CACHE_LOCK();
275 LIST_REMOVE(ncp, nc_hash);
276 LIST_REMOVE(ncp, nc_src);
277 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
278 vp = ncp->nc_dvp;
279 numcachehv--;
280 }
281 if (ncp->nc_vp) {
282 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
283 } else {
284 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
285 numneg--;
286 }
287 numcache--;
288 CACHE_UNLOCK();
289 cache_free(ncp);
290 if (vp)
291 vdrop(vp);
292 if (locked)
293 CACHE_LOCK();
294 }
295
296 /*
297 * cache_leaf_test()
298 *
299 * Test whether this (directory) vnode's namei cache entry contains
300 * subdirectories or not. Used to determine whether the directory is
301 * a leaf in the namei cache or not. Note: the directory may still
302 * contain files in the namei cache.
303 *
304 * Returns 0 if the directory is a leaf, -1 if it isn't.
305 */
306 int
307 cache_leaf_test(struct vnode *vp)
308 {
309 struct namecache *ncpc;
310 int leaf;
311
312 leaf = 0;
313 CACHE_LOCK();
314 for (ncpc = LIST_FIRST(&vp->v_cache_src);
315 ncpc != NULL;
316 ncpc = LIST_NEXT(ncpc, nc_src)
317 ) {
318 if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) {
319 leaf = -1;
320 break;
321 }
322 }
323 CACHE_UNLOCK();
324 return (leaf);
325 }
326
327 /*
328 * Lookup an entry in the cache
329 *
330 * Lookup is called with dvp pointing to the directory to search,
331 * cnp pointing to the name of the entry being sought. If the lookup
332 * succeeds, the vnode is returned in *vpp, and a status of -1 is
333 * returned. If the lookup determines that the name does not exist
334 * (negative cacheing), a status of ENOENT is returned. If the lookup
335 * fails, a status of zero is returned.
336 */
337
338 int
339 cache_lookup(dvp, vpp, cnp)
340 struct vnode *dvp;
341 struct vnode **vpp;
342 struct componentname *cnp;
343 {
344 struct namecache *ncp;
345 u_int32_t hash;
346
347 if (!doingcache) {
348 cnp->cn_flags &= ~MAKEENTRY;
349 return (0);
350 }
351
352 CACHE_LOCK();
353 numcalls++;
354
355 if (cnp->cn_nameptr[0] == '.') {
356 if (cnp->cn_namelen == 1) {
357 *vpp = dvp;
358 dothits++;
359 CACHE_UNLOCK();
360 return (-1);
361 }
362 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
363 dotdothits++;
364 if (dvp->v_dd->v_id != dvp->v_ddid ||
365 (cnp->cn_flags & MAKEENTRY) == 0) {
366 dvp->v_ddid = 0;
367 CACHE_UNLOCK();
368 return (0);
369 }
370 *vpp = dvp->v_dd;
371 CACHE_UNLOCK();
372 return (-1);
373 }
374 }
375
376 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
377 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
378 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
379 numchecks++;
380 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
381 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
382 break;
383 }
384
385 /* We failed to find an entry */
386 if (ncp == 0) {
387 if ((cnp->cn_flags & MAKEENTRY) == 0) {
388 nummisszap++;
389 } else {
390 nummiss++;
391 }
392 nchstats.ncs_miss++;
393 CACHE_UNLOCK();
394 return (0);
395 }
396
397 /* We don't want to have an entry, so dump it */
398 if ((cnp->cn_flags & MAKEENTRY) == 0) {
399 numposzaps++;
400 nchstats.ncs_badhits++;
401 CACHE_UNLOCK();
402 cache_zap(ncp, 0);
403 return (0);
404 }
405
406 /* We found a "positive" match, return the vnode */
407 if (ncp->nc_vp) {
408 numposhits++;
409 nchstats.ncs_goodhits++;
410 *vpp = ncp->nc_vp;
411 CACHE_UNLOCK();
412 return (-1);
413 }
414
415 /* We found a negative match, and want to create it, so purge */
416 if (cnp->cn_nameiop == CREATE) {
417 numnegzaps++;
418 nchstats.ncs_badhits++;
419 CACHE_UNLOCK();
420 cache_zap(ncp, 0);
421 return (0);
422 }
423
424 numneghits++;
425 /*
426 * We found a "negative" match, so we shift it to the end of
427 * the "negative" cache entries queue to satisfy LRU. Also,
428 * check to see if the entry is a whiteout; indicate this to
429 * the componentname, if so.
430 */
431 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
432 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
433 nchstats.ncs_neghits++;
434 if (ncp->nc_flag & NCF_WHITE)
435 cnp->cn_flags |= ISWHITEOUT;
436 CACHE_UNLOCK();
437 return (ENOENT);
438 }
439
440 /*
441 * Add an entry to the cache.
442 */
443 void
444 cache_enter(dvp, vp, cnp)
445 struct vnode *dvp;
446 struct vnode *vp;
447 struct componentname *cnp;
448 {
449 struct namecache *ncp;
450 struct nchashhead *ncpp;
451 u_int32_t hash;
452 int hold;
453 int zap;
454 int len;
455
456 if (!doingcache)
457 return;
458
459 if (cnp->cn_nameptr[0] == '.') {
460 if (cnp->cn_namelen == 1) {
461 return;
462 }
463 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
464 if (vp) {
465 dvp->v_dd = vp;
466 dvp->v_ddid = vp->v_id;
467 } else {
468 dvp->v_dd = dvp;
469 dvp->v_ddid = 0;
470 }
471 return;
472 }
473 }
474
475 hold = 0;
476 zap = 0;
477 ncp = cache_alloc(cnp->cn_namelen);
478 CACHE_LOCK();
479 numcache++;
480 if (!vp) {
481 numneg++;
482 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0;
483 } else if (vp->v_type == VDIR) {
484 vp->v_dd = dvp;
485 vp->v_ddid = dvp->v_id;
486 }
487
488 /*
489 * Set the rest of the namecache entry elements, calculate it's
490 * hash key and insert it into the appropriate chain within
491 * the cache entries table.
492 */
493 ncp->nc_vp = vp;
494 ncp->nc_dvp = dvp;
495 len = ncp->nc_nlen = cnp->cn_namelen;
496 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
497 bcopy(cnp->cn_nameptr, ncp->nc_name, len);
498 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
499 ncpp = NCHHASH(hash);
500 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
501 if (LIST_EMPTY(&dvp->v_cache_src)) {
502 hold = 1;
503 numcachehv++;
504 }
505 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
506 /*
507 * If the entry is "negative", we place it into the
508 * "negative" cache queue, otherwise, we place it into the
509 * destination vnode's cache entries queue.
510 */
511 if (vp) {
512 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
513 } else {
514 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
515 }
516 if (numneg * ncnegfactor > numcache) {
517 ncp = TAILQ_FIRST(&ncneg);
518 zap = 1;
519 }
520 CACHE_UNLOCK();
521 if (hold)
522 vhold(dvp);
523 if (zap)
524 cache_zap(ncp, 0);
525 }
526
527 /*
528 * Name cache initialization, from vfs_init() when we are booting
529 */
530 static void
531 nchinit(void *dummy __unused)
532 {
533
534 TAILQ_INIT(&ncneg);
535
536 cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL,
537 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
538 cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL,
539 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
540
541 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
542 }
543 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL)
544
545
546 /*
547 * Invalidate all entries to a particular vnode.
548 *
549 * Remove all entries in the namecache relating to this vnode and
550 * change the v_id. We take the v_id from a global counter, since
551 * it becomes a handy sequence number in crash-dumps that way.
552 * No valid vnode will ever have (v_id == 0).
553 *
554 * XXX: Only time and the size of v_id prevents this from failing:
555 * XXX: In theory we should hunt down all (struct vnode*, v_id)
556 * XXX: soft references and nuke them, at least on the global
557 * XXX: v_id wraparound. The period of resistance can be extended
558 * XXX: by incrementing each vnodes v_id individually instead of
559 * XXX: using the global v_id.
560 */
561
562 /*
563 * XXX This is sometimes called when a vnode may still be re-used, in which
564 * case v_dd may be invalid. Need to look this up.
565 */
566 void
567 cache_purge(vp)
568 struct vnode *vp;
569 {
570 static u_long nextid;
571
572 CACHE_LOCK();
573 while (!LIST_EMPTY(&vp->v_cache_src))
574 cache_zap(LIST_FIRST(&vp->v_cache_src), 1);
575 while (!TAILQ_EMPTY(&vp->v_cache_dst))
576 cache_zap(TAILQ_FIRST(&vp->v_cache_dst), 1);
577
578 do
579 nextid++;
580 while (nextid == vp->v_id || !nextid);
581 vp->v_id = nextid;
582 vp->v_dd = vp;
583 vp->v_ddid = 0;
584 CACHE_UNLOCK();
585 }
586
587 /*
588 * Flush all entries referencing a particular filesystem.
589 *
590 * Since we need to check it anyway, we will flush all the invalid
591 * entries at the same time.
592 */
593 void
594 cache_purgevfs(mp)
595 struct mount *mp;
596 {
597 struct nchashhead *ncpp;
598 struct namecache *ncp, *nnp;
599 struct nchashhead mplist;
600
601 LIST_INIT(&mplist);
602 ncp = NULL;
603
604 /* Scan hash tables for applicable entries */
605 CACHE_LOCK();
606 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
607 for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) {
608 nnp = LIST_NEXT(ncp, nc_hash);
609 if (ncp->nc_dvp->v_mount == mp) {
610 LIST_REMOVE(ncp, nc_hash);
611 LIST_INSERT_HEAD(&mplist, ncp, nc_hash);
612 }
613 }
614 }
615 CACHE_UNLOCK();
616 while (!LIST_EMPTY(&mplist))
617 cache_zap(LIST_FIRST(&mplist), 0);
618 }
619
620 /*
621 * Perform canonical checks and cache lookup and pass on to filesystem
622 * through the vop_cachedlookup only if needed.
623 */
624
625 int
626 vfs_cache_lookup(ap)
627 struct vop_lookup_args /* {
628 struct vnode *a_dvp;
629 struct vnode **a_vpp;
630 struct componentname *a_cnp;
631 } */ *ap;
632 {
633 struct vnode *dvp, *vp;
634 int lockparent;
635 int error;
636 struct vnode **vpp = ap->a_vpp;
637 struct componentname *cnp = ap->a_cnp;
638 struct ucred *cred = cnp->cn_cred;
639 int flags = cnp->cn_flags;
640 struct thread *td = cnp->cn_thread;
641 u_long vpid; /* capability number of vnode */
642
643 *vpp = NULL;
644 dvp = ap->a_dvp;
645 lockparent = flags & LOCKPARENT;
646
647 if (dvp->v_type != VDIR)
648 return (ENOTDIR);
649
650 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
651 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
652 return (EROFS);
653
654 error = VOP_ACCESS(dvp, VEXEC, cred, td);
655
656 if (error)
657 return (error);
658
659 error = cache_lookup(dvp, vpp, cnp);
660
661 #ifdef LOOKUP_SHARED
662 if (!error) {
663 /* We do this because the rest of the system now expects to get
664 * a shared lock, which is later upgraded if LOCKSHARED is not
665 * set. We have so many cases here because of bugs that yield
666 * inconsistant lock states. This all badly needs to be fixed
667 */
668 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
669 if (!error) {
670 int flock;
671
672 flock = VOP_ISLOCKED(*vpp, td);
673 if (flock != LK_EXCLUSIVE) {
674 if (flock == 0) {
675 if ((flags & ISLASTCN) &&
676 (flags & LOCKSHARED))
677 VOP_LOCK(*vpp, LK_SHARED, td);
678 else
679 VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
680 }
681 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
682 VOP_LOCK(*vpp, LK_DOWNGRADE, td);
683 }
684 return (error);
685 }
686 #else
687 if (!error)
688 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
689 #endif
690
691 if (error == ENOENT)
692 return (error);
693
694 vp = *vpp;
695 vpid = vp->v_id;
696 cnp->cn_flags &= ~PDIRUNLOCK;
697 if (dvp == vp) { /* lookup on "." */
698 VREF(vp);
699 error = 0;
700 } else if (flags & ISDOTDOT) {
701 VOP_UNLOCK(dvp, 0, td);
702 cnp->cn_flags |= PDIRUNLOCK;
703 #ifdef LOOKUP_SHARED
704 if ((flags & ISLASTCN) && (flags & LOCKSHARED))
705 error = vget(vp, LK_SHARED, td);
706 else
707 error = vget(vp, LK_EXCLUSIVE, td);
708 #else
709 error = vget(vp, LK_EXCLUSIVE, td);
710 #endif
711
712 if (!error && lockparent && (flags & ISLASTCN)) {
713 if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0)
714 cnp->cn_flags &= ~PDIRUNLOCK;
715 }
716 } else {
717 #ifdef LOOKUP_SHARED
718 if ((flags & ISLASTCN) && (flags & LOCKSHARED))
719 error = vget(vp, LK_SHARED, td);
720 else
721 error = vget(vp, LK_EXCLUSIVE, td);
722 #else
723 error = vget(vp, LK_EXCLUSIVE, td);
724 #endif
725 if (!lockparent || error || !(flags & ISLASTCN)) {
726 VOP_UNLOCK(dvp, 0, td);
727 cnp->cn_flags |= PDIRUNLOCK;
728 }
729 }
730 /*
731 * Check that the capability number did not change
732 * while we were waiting for the lock.
733 */
734 if (!error) {
735 if (vpid == vp->v_id)
736 return (0);
737 vput(vp);
738 if (lockparent && dvp != vp && (flags & ISLASTCN)) {
739 VOP_UNLOCK(dvp, 0, td);
740 cnp->cn_flags |= PDIRUNLOCK;
741 }
742 }
743 if (cnp->cn_flags & PDIRUNLOCK) {
744 error = vn_lock(dvp, LK_EXCLUSIVE, td);
745 if (error)
746 return (error);
747 cnp->cn_flags &= ~PDIRUNLOCK;
748 }
749 #ifdef LOOKUP_SHARED
750 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
751
752 if (!error) {
753 int flock = 0;
754
755 flock = VOP_ISLOCKED(*vpp, td);
756 if (flock != LK_EXCLUSIVE) {
757 if (flock == 0) {
758 if ((flags & ISLASTCN) && (flags & LOCKSHARED))
759 VOP_LOCK(*vpp, LK_SHARED, td);
760 else
761 VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
762 }
763 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
764 VOP_LOCK(*vpp, LK_DOWNGRADE, td);
765 }
766
767 return (error);
768 #else
769 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
770 #endif
771 }
772
773
774 #ifndef _SYS_SYSPROTO_H_
775 struct __getcwd_args {
776 u_char *buf;
777 u_int buflen;
778 };
779 #endif
780
781 /*
782 * XXX All of these sysctls would probably be more productive dead.
783 */
784 static int disablecwd;
785 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
786 "Disable the getcwd syscall");
787
788 /* Various statistics for the getcwd syscall */
789 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls);
790 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1);
791 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
792 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
793 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
794 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
795
796 /* Implementation of the getcwd syscall */
797 int
798 __getcwd(td, uap)
799 struct thread *td;
800 struct __getcwd_args *uap;
801 {
802
803 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen));
804 }
805
806 int
807 kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen)
808 {
809 char *bp, *tmpbuf;
810 int error, i, slash_prefixed;
811 struct filedesc *fdp;
812 struct namecache *ncp;
813 struct vnode *vp;
814
815 numcwdcalls++;
816 if (disablecwd)
817 return (ENODEV);
818 if (buflen < 2)
819 return (EINVAL);
820 if (buflen > MAXPATHLEN)
821 buflen = MAXPATHLEN;
822 error = 0;
823 tmpbuf = bp = malloc(buflen, M_TEMP, M_WAITOK);
824 bp += buflen - 1;
825 *bp = '\0';
826 fdp = td->td_proc->p_fd;
827 slash_prefixed = 0;
828 FILEDESC_LOCK(fdp);
829 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) {
830 if (vp->v_vflag & VV_ROOT) {
831 if (vp->v_mount == NULL) { /* forced unmount */
832 FILEDESC_UNLOCK(fdp);
833 free(tmpbuf, M_TEMP);
834 return (EBADF);
835 }
836 vp = vp->v_mount->mnt_vnodecovered;
837 continue;
838 }
839 if (vp->v_dd->v_id != vp->v_ddid) {
840 FILEDESC_UNLOCK(fdp);
841 numcwdfail1++;
842 free(tmpbuf, M_TEMP);
843 return (ENOTDIR);
844 }
845 CACHE_LOCK();
846 ncp = TAILQ_FIRST(&vp->v_cache_dst);
847 if (!ncp) {
848 numcwdfail2++;
849 CACHE_UNLOCK();
850 FILEDESC_UNLOCK(fdp);
851 free(tmpbuf, M_TEMP);
852 return (ENOENT);
853 }
854 if (ncp->nc_dvp != vp->v_dd) {
855 numcwdfail3++;
856 CACHE_UNLOCK();
857 FILEDESC_UNLOCK(fdp);
858 free(tmpbuf, M_TEMP);
859 return (EBADF);
860 }
861 for (i = ncp->nc_nlen - 1; i >= 0; i--) {
862 if (bp == tmpbuf) {
863 numcwdfail4++;
864 CACHE_UNLOCK();
865 FILEDESC_UNLOCK(fdp);
866 free(tmpbuf, M_TEMP);
867 return (ENOMEM);
868 }
869 *--bp = ncp->nc_name[i];
870 }
871 if (bp == tmpbuf) {
872 numcwdfail4++;
873 CACHE_UNLOCK();
874 FILEDESC_UNLOCK(fdp);
875 free(tmpbuf, M_TEMP);
876 return (ENOMEM);
877 }
878 *--bp = '/';
879 slash_prefixed = 1;
880 vp = vp->v_dd;
881 CACHE_UNLOCK();
882 }
883 FILEDESC_UNLOCK(fdp);
884 if (!slash_prefixed) {
885 if (bp == tmpbuf) {
886 numcwdfail4++;
887 free(tmpbuf, M_TEMP);
888 return (ENOMEM);
889 }
890 *--bp = '/';
891 }
892 numcwdfound++;
893 if (bufseg == UIO_SYSSPACE)
894 bcopy(bp, buf, strlen(bp) + 1);
895 else
896 error = copyout(bp, buf, strlen(bp) + 1);
897 free(tmpbuf, M_TEMP);
898 return (error);
899 }
900
901 /*
902 * Thus begins the fullpath magic.
903 */
904
905 #undef STATNODE
906 #define STATNODE(name) \
907 static u_int name; \
908 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
909
910 static int disablefullpath;
911 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
912 "Disable the vn_fullpath function");
913
914 STATNODE(numfullpathcalls);
915 STATNODE(numfullpathfail1);
916 STATNODE(numfullpathfail2);
917 STATNODE(numfullpathfail3);
918 STATNODE(numfullpathfail4);
919 STATNODE(numfullpathfound);
920
921 /*
922 * Retrieve the full filesystem path that correspond to a vnode from the name
923 * cache (if available)
924 */
925 int
926 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
927 {
928 char *bp, *buf;
929 int i, slash_prefixed;
930 struct filedesc *fdp;
931 struct namecache *ncp;
932 struct vnode *vp;
933
934 numfullpathcalls++;
935 if (disablefullpath)
936 return (ENODEV);
937 if (vn == NULL)
938 return (EINVAL);
939 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
940 bp = buf + MAXPATHLEN - 1;
941 *bp = '\0';
942 fdp = td->td_proc->p_fd;
943 slash_prefixed = 0;
944 ASSERT_VOP_LOCKED(vn, "vn_fullpath");
945 FILEDESC_LOCK(fdp);
946 for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) {
947 if (vp->v_vflag & VV_ROOT) {
948 if (vp->v_mount == NULL) { /* forced unmount */
949 FILEDESC_UNLOCK(fdp);
950 free(buf, M_TEMP);
951 return (EBADF);
952 }
953 vp = vp->v_mount->mnt_vnodecovered;
954 continue;
955 }
956 if (vp != vn && vp->v_dd->v_id != vp->v_ddid) {
957 FILEDESC_UNLOCK(fdp);
958 free(buf, M_TEMP);
959 numfullpathfail1++;
960 return (ENOTDIR);
961 }
962 CACHE_LOCK();
963 ncp = TAILQ_FIRST(&vp->v_cache_dst);
964 if (!ncp) {
965 numfullpathfail2++;
966 CACHE_UNLOCK();
967 FILEDESC_UNLOCK(fdp);
968 free(buf, M_TEMP);
969 return (ENOENT);
970 }
971 if (vp != vn && ncp->nc_dvp != vp->v_dd) {
972 numfullpathfail3++;
973 CACHE_UNLOCK();
974 FILEDESC_UNLOCK(fdp);
975 free(buf, M_TEMP);
976 return (EBADF);
977 }
978 for (i = ncp->nc_nlen - 1; i >= 0; i--) {
979 if (bp == buf) {
980 numfullpathfail4++;
981 CACHE_UNLOCK();
982 FILEDESC_UNLOCK(fdp);
983 free(buf, M_TEMP);
984 return (ENOMEM);
985 }
986 *--bp = ncp->nc_name[i];
987 }
988 if (bp == buf) {
989 numfullpathfail4++;
990 CACHE_UNLOCK();
991 FILEDESC_UNLOCK(fdp);
992 free(buf, M_TEMP);
993 return (ENOMEM);
994 }
995 *--bp = '/';
996 slash_prefixed = 1;
997 vp = ncp->nc_dvp;
998 CACHE_UNLOCK();
999 }
1000 if (!slash_prefixed) {
1001 if (bp == buf) {
1002 numfullpathfail4++;
1003 FILEDESC_UNLOCK(fdp);
1004 free(buf, M_TEMP);
1005 return (ENOMEM);
1006 }
1007 *--bp = '/';
1008 }
1009 FILEDESC_UNLOCK(fdp);
1010 numfullpathfound++;
1011 *retbuf = bp;
1012 *freebuf = buf;
1013 return (0);
1014 }
Cache object: ce2e8bb40aa4caf5bf512711ec5248ab
|