FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_cache.c
1 /*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Poul-Henning Kamp of the FreeBSD Project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include "opt_ktrace.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/sysctl.h>
46 #include <sys/mount.h>
47 #include <sys/vnode.h>
48 #include <sys/namei.h>
49 #include <sys/malloc.h>
50 #include <sys/syscallsubr.h>
51 #include <sys/sysproto.h>
52 #include <sys/proc.h>
53 #include <sys/filedesc.h>
54 #include <sys/fnv_hash.h>
55 #ifdef KTRACE
56 #include <sys/ktrace.h>
57 #endif
58
59 #include <vm/uma.h>
60
61 /*
62 * This structure describes the elements in the cache of recent
63 * names looked up by namei.
64 */
65
66 struct namecache {
67 LIST_ENTRY(namecache) nc_hash; /* hash chain */
68 LIST_ENTRY(namecache) nc_src; /* source vnode list */
69 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
70 struct vnode *nc_dvp; /* vnode of parent of name */
71 struct vnode *nc_vp; /* vnode the name refers to */
72 u_char nc_flag; /* flag bits */
73 u_char nc_nlen; /* length of name */
74 char nc_name[0]; /* segment name */
75 };
76
77 /*
78 * struct namecache_ts repeats struct namecache layout up to the
79 * nc_nlen member.
80 * struct namecache_ts is used in place of struct namecache when time(s) need
81 * to be stored. The nc_dotdottime field is used when a cache entry is mapping
82 * both a non-dotdot directory name plus dotdot for the directory's
83 * parent.
84 */
85 struct namecache_ts {
86 LIST_ENTRY(namecache) nc_hash; /* hash chain */
87 LIST_ENTRY(namecache) nc_src; /* source vnode list */
88 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
89 struct vnode *nc_dvp; /* vnode of parent of name */
90 struct vnode *nc_vp; /* vnode the name refers to */
91 u_char nc_flag; /* flag bits */
92 u_char nc_nlen; /* length of name */
93 struct timespec nc_time; /* timespec provided by fs */
94 struct timespec nc_dotdottime; /* dotdot timespec provided by fs */
95 int nc_ticks; /* ticks value when entry was added */
96 char nc_name[0]; /* segment name + nul */
97 };
98
99 /*
100 * Flags in namecache.nc_flag
101 */
102 #define NCF_WHITE 0x01
103 #define NCF_ISDOTDOT 0x02
104 #define NCF_TS 0x04
105 #define NCF_DTS 0x08
106
107 /*
108 * Name caching works as follows:
109 *
110 * Names found by directory scans are retained in a cache
111 * for future reference. It is managed LRU, so frequently
112 * used names will hang around. Cache is indexed by hash value
113 * obtained from (vp, name) where vp refers to the directory
114 * containing name.
115 *
116 * If it is a "negative" entry, (i.e. for a name that is known NOT to
117 * exist) the vnode pointer will be NULL.
118 *
119 * Upon reaching the last segment of a path, if the reference
120 * is for DELETE, or NOCACHE is set (rewrite), and the
121 * name is located in the cache, it will be dropped.
122 */
123
124 /*
125 * Structures associated with name cacheing.
126 */
127 #define NCHHASH(hash) \
128 (&nchashtbl[(hash) & nchash])
129 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
130 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */
131 static u_long nchash; /* size of hash table */
132 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
133 static u_long ncnegfactor = 16; /* ratio of negative entries */
134 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
135 static u_long numneg; /* number of cache entries allocated */
136 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
137 static u_long numcache; /* number of cache entries allocated */
138 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
139 static u_long numcachehv; /* number of cache entries with vnodes held */
140 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
141 #if 0
142 static u_long numcachepl; /* number of cache purge for leaf entries */
143 SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
144 #endif
145 struct nchstats nchstats; /* cache effectiveness statistics */
146
147 static struct mtx cache_lock;
148 MTX_SYSINIT(vfscache, &cache_lock, "Name Cache", MTX_DEF);
149
150 #define CACHE_LOCK() mtx_lock(&cache_lock)
151 #define CACHE_UNLOCK() mtx_unlock(&cache_lock)
152
153 /*
154 * UMA zones for the VFS cache.
155 *
156 * The small cache is used for entries with short names, which are the
157 * most common. The large cache is used for entries which are too big to
158 * fit in the small cache.
159 */
160 static uma_zone_t cache_zone_small;
161 static uma_zone_t cache_zone_small_ts;
162 static uma_zone_t cache_zone_large;
163 static uma_zone_t cache_zone_large_ts;
164
165 #define CACHE_PATH_CUTOFF 32
166
167 static struct namecache *
168 cache_alloc(int len, int ts)
169 {
170
171 if (len > CACHE_PATH_CUTOFF) {
172 if (ts)
173 return (uma_zalloc(cache_zone_large_ts, M_WAITOK));
174 else
175 return (uma_zalloc(cache_zone_large, M_WAITOK));
176 }
177 if (ts)
178 return (uma_zalloc(cache_zone_small_ts, M_WAITOK));
179 else
180 return (uma_zalloc(cache_zone_small, M_WAITOK));
181 }
182
183 static void
184 cache_free(struct namecache *ncp)
185 {
186 int ts;
187
188 if (ncp == NULL)
189 return;
190 ts = ncp->nc_flag & NCF_TS;
191 if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) {
192 if (ts)
193 uma_zfree(cache_zone_small_ts, ncp);
194 else
195 uma_zfree(cache_zone_small, ncp);
196 } else if (ts)
197 uma_zfree(cache_zone_large_ts, ncp);
198 else
199 uma_zfree(cache_zone_large, ncp);
200 }
201
202 static char *
203 nc_get_name(struct namecache *ncp)
204 {
205 struct namecache_ts *ncp_ts;
206
207 if ((ncp->nc_flag & NCF_TS) == 0)
208 return (ncp->nc_name);
209 ncp_ts = (struct namecache_ts *)ncp;
210 return (ncp_ts->nc_name);
211 }
212
213 static void
214 cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp)
215 {
216
217 KASSERT((ncp->nc_flag & NCF_TS) != 0 ||
218 (tsp == NULL && ticksp == NULL),
219 ("No NCF_TS"));
220
221 if (tsp != NULL)
222 *tsp = ((struct namecache_ts *)ncp)->nc_time;
223 if (ticksp != NULL)
224 *ticksp = ((struct namecache_ts *)ncp)->nc_ticks;
225 }
226
227 static int doingcache = 1; /* 1 => enable the cache */
228 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
229
230 /* Export size information to userland */
231 SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, 0,
232 sizeof(struct namecache), "");
233
234 /*
235 * The new name cache statistics
236 */
237 static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
238 #define STATNODE(mode, name, var) \
239 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
240 STATNODE(CTLFLAG_RD, numneg, &numneg);
241 STATNODE(CTLFLAG_RD, numcache, &numcache);
242 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
243 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
244 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
245 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
246 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
247 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
248 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
249 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
250 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
251 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
252
253 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE,
254 &nchstats, sizeof(nchstats), "LU", "VFS cache effectiveness statistics");
255
256
257
258 static void cache_zap(struct namecache *ncp);
259 static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
260 char *buf, char **retbuf, u_int buflen);
261
262 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
263
264 #ifdef DIAGNOSTIC
265 /*
266 * Grab an atomic snapshot of the name cache hash chain lengths
267 */
268 SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats");
269
270 static int
271 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
272 {
273 int error;
274 struct nchashhead *ncpp;
275 struct namecache *ncp;
276 int n_nchash;
277 int count;
278
279 n_nchash = nchash + 1; /* nchash is max index, not count */
280 if (!req->oldptr)
281 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
282
283 /* Scan hash tables for applicable entries */
284 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
285 count = 0;
286 LIST_FOREACH(ncp, ncpp, nc_hash) {
287 count++;
288 }
289 error = SYSCTL_OUT(req, &count, sizeof(count));
290 if (error)
291 return (error);
292 }
293 return (0);
294 }
295 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD|
296 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int",
297 "nchash chain lengths");
298
299 static int
300 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
301 {
302 int error;
303 struct nchashhead *ncpp;
304 struct namecache *ncp;
305 int n_nchash;
306 int count, maxlength, used, pct;
307
308 if (!req->oldptr)
309 return SYSCTL_OUT(req, 0, 4 * sizeof(int));
310
311 n_nchash = nchash + 1; /* nchash is max index, not count */
312 used = 0;
313 maxlength = 0;
314
315 /* Scan hash tables for applicable entries */
316 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
317 count = 0;
318 LIST_FOREACH(ncp, ncpp, nc_hash) {
319 count++;
320 }
321 if (count)
322 used++;
323 if (maxlength < count)
324 maxlength = count;
325 }
326 n_nchash = nchash + 1;
327 pct = (used * 100 * 100) / n_nchash;
328 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
329 if (error)
330 return (error);
331 error = SYSCTL_OUT(req, &used, sizeof(used));
332 if (error)
333 return (error);
334 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
335 if (error)
336 return (error);
337 error = SYSCTL_OUT(req, &pct, sizeof(pct));
338 if (error)
339 return (error);
340 return (0);
341 }
342 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD|
343 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I",
344 "nchash chain lengths");
345 #endif
346
347 /*
348 * cache_zap():
349 *
350 * Removes a namecache entry from cache, whether it contains an actual
351 * pointer to a vnode or if it is just a negative cache entry.
352 */
353 static void
354 cache_zap(ncp)
355 struct namecache *ncp;
356 {
357 struct vnode *vp;
358
359 mtx_assert(&cache_lock, MA_OWNED);
360 CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp);
361 vp = NULL;
362 LIST_REMOVE(ncp, nc_hash);
363 if (ncp->nc_flag & NCF_ISDOTDOT) {
364 if (ncp == ncp->nc_dvp->v_cache_dd)
365 ncp->nc_dvp->v_cache_dd = NULL;
366 } else {
367 LIST_REMOVE(ncp, nc_src);
368 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
369 vp = ncp->nc_dvp;
370 numcachehv--;
371 }
372 }
373 if (ncp->nc_vp) {
374 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
375 if (ncp == ncp->nc_vp->v_cache_dd)
376 ncp->nc_vp->v_cache_dd = NULL;
377 } else {
378 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
379 numneg--;
380 }
381 numcache--;
382 cache_free(ncp);
383 if (vp)
384 vdrop(vp);
385 }
386
387 /*
388 * Lookup an entry in the cache
389 *
390 * Lookup is called with dvp pointing to the directory to search,
391 * cnp pointing to the name of the entry being sought. If the lookup
392 * succeeds, the vnode is returned in *vpp, and a status of -1 is
393 * returned. If the lookup determines that the name does not exist
394 * (negative cacheing), a status of ENOENT is returned. If the lookup
395 * fails, a status of zero is returned. If the directory vnode is
396 * recycled out from under us due to a forced unmount, a status of
397 * ENOENT is returned.
398 *
399 * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is
400 * unlocked. If we're looking up . an extra ref is taken, but the lock is
401 * not recursively acquired.
402 */
403
404 int
405 cache_lookup_times(dvp, vpp, cnp, tsp, ticksp)
406 struct vnode *dvp;
407 struct vnode **vpp;
408 struct componentname *cnp;
409 struct timespec *tsp;
410 int *ticksp;
411 {
412 struct namecache *ncp;
413 struct thread *td;
414 u_int32_t hash;
415 int error, ltype;
416
417 if (!doingcache) {
418 cnp->cn_flags &= ~MAKEENTRY;
419 return (0);
420 }
421 td = cnp->cn_thread;
422 retry:
423 CACHE_LOCK();
424 numcalls++;
425
426 if (cnp->cn_nameptr[0] == '.') {
427 if (cnp->cn_namelen == 1) {
428 *vpp = dvp;
429 CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .",
430 dvp, cnp->cn_nameptr);
431 dothits++;
432 if (tsp != NULL)
433 timespecclear(tsp);
434 if (ticksp != NULL)
435 *ticksp = ticks;
436 goto success;
437 }
438 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
439 dotdothits++;
440 if (dvp->v_cache_dd == NULL) {
441 CACHE_UNLOCK();
442 return (0);
443 }
444 if ((cnp->cn_flags & MAKEENTRY) == 0) {
445 if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT)
446 cache_zap(dvp->v_cache_dd);
447 dvp->v_cache_dd = NULL;
448 CACHE_UNLOCK();
449 return (0);
450 }
451 ncp = dvp->v_cache_dd;
452 if (ncp->nc_flag & NCF_ISDOTDOT)
453 *vpp = ncp->nc_vp;
454 else
455 *vpp = ncp->nc_dvp;
456 /* Return failure if negative entry was found. */
457 if (*vpp == NULL)
458 goto negative_success;
459 CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..",
460 dvp, cnp->cn_nameptr, *vpp);
461 cache_out_ts(ncp, tsp, ticksp);
462 if ((ncp->nc_flag & (NCF_ISDOTDOT | NCF_DTS)) ==
463 NCF_DTS && tsp != NULL)
464 *tsp = ((struct namecache_ts *)ncp)->
465 nc_dotdottime;
466 goto success;
467 }
468 }
469
470 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
471 hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
472 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
473 numchecks++;
474 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
475 !bcmp(nc_get_name(ncp), cnp->cn_nameptr, ncp->nc_nlen))
476 break;
477 }
478
479 /* We failed to find an entry */
480 if (ncp == 0) {
481 if ((cnp->cn_flags & MAKEENTRY) == 0) {
482 nummisszap++;
483 } else {
484 nummiss++;
485 }
486 nchstats.ncs_miss++;
487 CACHE_UNLOCK();
488 return (0);
489 }
490
491 /* We don't want to have an entry, so dump it */
492 if ((cnp->cn_flags & MAKEENTRY) == 0) {
493 numposzaps++;
494 nchstats.ncs_badhits++;
495 cache_zap(ncp);
496 CACHE_UNLOCK();
497 return (0);
498 }
499
500 /* We found a "positive" match, return the vnode */
501 if (ncp->nc_vp) {
502 numposhits++;
503 nchstats.ncs_goodhits++;
504 *vpp = ncp->nc_vp;
505 CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
506 dvp, cnp->cn_nameptr, *vpp, ncp);
507 cache_out_ts(ncp, tsp, ticksp);
508 goto success;
509 }
510
511 negative_success:
512 /* We found a negative match, and want to create it, so purge */
513 if (cnp->cn_nameiop == CREATE) {
514 numnegzaps++;
515 nchstats.ncs_badhits++;
516 cache_zap(ncp);
517 CACHE_UNLOCK();
518 return (0);
519 }
520
521 numneghits++;
522 /*
523 * We found a "negative" match, so we shift it to the end of
524 * the "negative" cache entries queue to satisfy LRU. Also,
525 * check to see if the entry is a whiteout; indicate this to
526 * the componentname, if so.
527 */
528 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
529 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
530 nchstats.ncs_neghits++;
531 if (ncp->nc_flag & NCF_WHITE)
532 cnp->cn_flags |= ISWHITEOUT;
533 cache_out_ts(ncp, tsp, ticksp);
534 CACHE_UNLOCK();
535 return (ENOENT);
536
537 success:
538 /*
539 * On success we return a locked and ref'd vnode as per the lookup
540 * protocol.
541 */
542 if (dvp == *vpp) { /* lookup on "." */
543 VREF(*vpp);
544 CACHE_UNLOCK();
545 /*
546 * When we lookup "." we still can be asked to lock it
547 * differently...
548 */
549 ltype = cnp->cn_lkflags & LK_TYPE_MASK;
550 if (ltype != VOP_ISLOCKED(*vpp, td)) {
551 if (ltype == LK_EXCLUSIVE) {
552 vn_lock(*vpp, LK_UPGRADE | LK_RETRY, td);
553 if ((*vpp)->v_iflag & VI_DOOMED) {
554 /* forced unmount */
555 vrele(*vpp);
556 *vpp = NULL;
557 return (ENOENT);
558 }
559 } else
560 vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY, td);
561 }
562 return (-1);
563 }
564 ltype = 0; /* silence gcc warning */
565 if (cnp->cn_flags & ISDOTDOT) {
566 ltype = VOP_ISLOCKED(dvp, td);
567 VOP_UNLOCK(dvp, 0, td);
568 }
569 VI_LOCK(*vpp);
570 CACHE_UNLOCK();
571 error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, td);
572 if (cnp->cn_flags & ISDOTDOT)
573 vn_lock(dvp, ltype | LK_RETRY, td);
574 if (error) {
575 *vpp = NULL;
576 goto retry;
577 }
578 if ((cnp->cn_flags & ISLASTCN) &&
579 (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) {
580 ASSERT_VOP_ELOCKED(*vpp, "cache_lookup");
581 }
582 return (-1);
583 }
584
585 /*
586 * Add an entry to the cache.
587 */
588 void
589 cache_enter_time(dvp, vp, cnp, tsp, dtsp)
590 struct vnode *dvp;
591 struct vnode *vp;
592 struct componentname *cnp;
593 struct timespec *tsp;
594 struct timespec *dtsp;
595 {
596 struct namecache *ncp, *n2;
597 struct namecache_ts *n3;
598 struct nchashhead *ncpp;
599 u_int32_t hash;
600 int flag;
601 int hold;
602 int zap;
603 int len;
604
605 CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
606 VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp,
607 ("cache_enter: Adding a doomed vnode"));
608
609 if (!doingcache)
610 return;
611
612 /*
613 * Avoid blowout in namecache entries.
614 */
615 if (numcache >= desiredvnodes * 2)
616 return;
617
618 flag = 0;
619 if (cnp->cn_nameptr[0] == '.') {
620 if (cnp->cn_namelen == 1)
621 return;
622 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
623 CACHE_LOCK();
624 /*
625 * If dotdot entry already exists, just retarget it
626 * to new parent vnode, otherwise continue with new
627 * namecache entry allocation.
628 */
629 if ((ncp = dvp->v_cache_dd) != NULL &&
630 ncp->nc_flag & NCF_ISDOTDOT) {
631 KASSERT(ncp->nc_dvp == dvp,
632 ("wrong isdotdot parent"));
633 if (ncp->nc_vp != NULL)
634 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst,
635 ncp, nc_dst);
636 else
637 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
638 if (vp != NULL)
639 TAILQ_INSERT_HEAD(&vp->v_cache_dst,
640 ncp, nc_dst);
641 else
642 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
643 ncp->nc_vp = vp;
644 CACHE_UNLOCK();
645 return;
646 }
647 dvp->v_cache_dd = NULL;
648 CACHE_UNLOCK();
649 flag = NCF_ISDOTDOT;
650 }
651 }
652
653 hold = 0;
654 zap = 0;
655
656 /*
657 * Calculate the hash key and setup as much of the new
658 * namecache entry as possible before acquiring the lock.
659 */
660 ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);
661 ncp->nc_vp = vp;
662 ncp->nc_dvp = dvp;
663 ncp->nc_flag = flag;
664 if (tsp != NULL) {
665 n3 = (struct namecache_ts *)ncp;
666 n3->nc_time = *tsp;
667 n3->nc_ticks = ticks;
668 n3->nc_flag |= NCF_TS;
669 if (dtsp != NULL) {
670 n3->nc_dotdottime = *dtsp;
671 n3->nc_flag |= NCF_DTS;
672 }
673 }
674 len = ncp->nc_nlen = cnp->cn_namelen;
675 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
676 bcopy(cnp->cn_nameptr, nc_get_name(ncp), len);
677 hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
678 CACHE_LOCK();
679
680 /*
681 * See if this vnode or negative entry is already in the cache
682 * with this name. This can happen with concurrent lookups of
683 * the same path name.
684 */
685 ncpp = NCHHASH(hash);
686 LIST_FOREACH(n2, ncpp, nc_hash) {
687 if (n2->nc_dvp == dvp &&
688 n2->nc_nlen == cnp->cn_namelen &&
689 !bcmp(nc_get_name(n2), cnp->cn_nameptr, n2->nc_nlen)) {
690 if (tsp != NULL) {
691 KASSERT((n2->nc_flag & NCF_TS) != 0,
692 ("no NCF_TS"));
693 n3 = (struct namecache_ts *)n2;
694 n3->nc_time =
695 ((struct namecache_ts *)ncp)->nc_time;
696 n3->nc_ticks =
697 ((struct namecache_ts *)ncp)->nc_ticks;
698 if (dtsp != NULL) {
699 n3->nc_dotdottime =
700 ((struct namecache_ts *)ncp)->
701 nc_dotdottime;
702 n3->nc_flag |= NCF_DTS;
703 }
704 }
705 CACHE_UNLOCK();
706 cache_free(ncp);
707 return;
708 }
709 }
710
711 if (flag == NCF_ISDOTDOT) {
712 /*
713 * See if we are trying to add .. entry, but some other lookup
714 * has populated v_cache_dd pointer already.
715 */
716 if (dvp->v_cache_dd != NULL) {
717 CACHE_UNLOCK();
718 cache_free(ncp);
719 return;
720 }
721 KASSERT(vp == NULL || vp->v_type == VDIR,
722 ("wrong vnode type %p", vp));
723 dvp->v_cache_dd = ncp;
724 }
725
726 numcache++;
727 if (!vp) {
728 numneg++;
729 if (cnp->cn_flags & ISWHITEOUT)
730 ncp->nc_flag |= NCF_WHITE;
731 } else if (vp->v_type == VDIR) {
732 if (flag != NCF_ISDOTDOT) {
733 /*
734 * For this case, the cache entry maps both the
735 * directory name in it and the name ".." for the
736 * directory's parent.
737 */
738 if ((n2 = vp->v_cache_dd) != NULL &&
739 (n2->nc_flag & NCF_ISDOTDOT) != 0)
740 cache_zap(n2);
741 vp->v_cache_dd = ncp;
742 }
743 } else {
744 vp->v_cache_dd = NULL;
745 }
746
747 /*
748 * Insert the new namecache entry into the appropriate chain
749 * within the cache entries table.
750 */
751 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
752 if (flag != NCF_ISDOTDOT) {
753 if (LIST_EMPTY(&dvp->v_cache_src)) {
754 hold = 1;
755 numcachehv++;
756 }
757 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
758 }
759
760 /*
761 * If the entry is "negative", we place it into the
762 * "negative" cache queue, otherwise, we place it into the
763 * destination vnode's cache entries queue.
764 */
765 if (vp) {
766 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
767 } else {
768 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
769 }
770 if (numneg * ncnegfactor > numcache) {
771 ncp = TAILQ_FIRST(&ncneg);
772 zap = 1;
773 }
774 if (hold)
775 vhold(dvp);
776 if (zap)
777 cache_zap(ncp);
778 CACHE_UNLOCK();
779 }
780
781 /*
782 * Name cache initialization, from vfs_init() when we are booting
783 */
784 static void
785 nchinit(void *dummy __unused)
786 {
787
788 TAILQ_INIT(&ncneg);
789
790 cache_zone_small = uma_zcreate("S VFS Cache",
791 sizeof(struct namecache) + CACHE_PATH_CUTOFF,
792 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
793 cache_zone_small_ts = uma_zcreate("STS VFS Cache",
794 sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF,
795 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
796 cache_zone_large = uma_zcreate("L VFS Cache",
797 sizeof(struct namecache) + NAME_MAX,
798 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
799 cache_zone_large_ts = uma_zcreate("LTS VFS Cache",
800 sizeof(struct namecache_ts) + NAME_MAX,
801 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
802
803 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
804 }
805 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
806
807
808 /*
809 * Invalidate all entries to a particular vnode.
810 */
811 void
812 cache_purge(vp)
813 struct vnode *vp;
814 {
815
816 CTR1(KTR_VFS, "cache_purge(%p)", vp);
817 CACHE_LOCK();
818 while (!LIST_EMPTY(&vp->v_cache_src))
819 cache_zap(LIST_FIRST(&vp->v_cache_src));
820 while (!TAILQ_EMPTY(&vp->v_cache_dst))
821 cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
822 if (vp->v_cache_dd != NULL) {
823 KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT,
824 ("lost dotdot link"));
825 cache_zap(vp->v_cache_dd);
826 }
827 KASSERT(vp->v_cache_dd == NULL, ("incomplete purge"));
828 CACHE_UNLOCK();
829 }
830
831 /*
832 * Invalidate all negative entries for a particular directory vnode.
833 */
834 void
835 cache_purge_negative(vp)
836 struct vnode *vp;
837 {
838 struct namecache *cp, *ncp;
839
840 CTR1(KTR_VFS, "cache_purge_negative(%p)", vp);
841 CACHE_LOCK();
842 LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) {
843 if (cp->nc_vp == NULL)
844 cache_zap(cp);
845 }
846 CACHE_UNLOCK();
847 }
848
849 /*
850 * Flush all entries referencing a particular filesystem.
851 */
852 void
853 cache_purgevfs(mp)
854 struct mount *mp;
855 {
856 struct nchashhead *ncpp;
857 struct namecache *ncp, *nnp;
858
859 /* Scan hash tables for applicable entries */
860 CACHE_LOCK();
861 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
862 LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) {
863 if (ncp->nc_dvp->v_mount == mp)
864 cache_zap(ncp);
865 }
866 }
867 CACHE_UNLOCK();
868 }
869
870 /*
871 * Perform canonical checks and cache lookup and pass on to filesystem
872 * through the vop_cachedlookup only if needed.
873 */
874
875 int
876 vfs_cache_lookup(ap)
877 struct vop_lookup_args /* {
878 struct vnode *a_dvp;
879 struct vnode **a_vpp;
880 struct componentname *a_cnp;
881 } */ *ap;
882 {
883 struct vnode *dvp;
884 int error;
885 struct vnode **vpp = ap->a_vpp;
886 struct componentname *cnp = ap->a_cnp;
887 struct ucred *cred = cnp->cn_cred;
888 int flags = cnp->cn_flags;
889 struct thread *td = cnp->cn_thread;
890
891 *vpp = NULL;
892 dvp = ap->a_dvp;
893
894 if (dvp->v_type != VDIR)
895 return (ENOTDIR);
896
897 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
898 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
899 return (EROFS);
900
901 error = VOP_ACCESS(dvp, VEXEC, cred, td);
902 if (error)
903 return (error);
904
905 error = cache_lookup(dvp, vpp, cnp);
906 if (error == 0)
907 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
908 if (error == -1)
909 return (0);
910 return (error);
911 }
912
913
914 #ifndef _SYS_SYSPROTO_H_
915 struct __getcwd_args {
916 u_char *buf;
917 u_int buflen;
918 };
919 #endif
920
921 /*
922 * XXX All of these sysctls would probably be more productive dead.
923 */
924 static int disablecwd;
925 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
926 "Disable the getcwd syscall");
927
928 /* Implementation of the getcwd syscall. */
929 int
930 __getcwd(td, uap)
931 struct thread *td;
932 struct __getcwd_args *uap;
933 {
934
935 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen));
936 }
937
938 int
939 kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen)
940 {
941 char *bp, *tmpbuf;
942 struct filedesc *fdp;
943 int error;
944
945 if (disablecwd)
946 return (ENODEV);
947 if (buflen < 2)
948 return (EINVAL);
949 if (buflen > MAXPATHLEN)
950 buflen = MAXPATHLEN;
951
952 tmpbuf = malloc(buflen, M_TEMP, M_WAITOK);
953 fdp = td->td_proc->p_fd;
954 mtx_lock(&Giant);
955 FILEDESC_SLOCK(fdp);
956 error = vn_fullpath1(td, fdp->fd_cdir, fdp->fd_rdir, tmpbuf,
957 &bp, buflen);
958 FILEDESC_SUNLOCK(fdp);
959 mtx_unlock(&Giant);
960
961 if (!error) {
962 if (bufseg == UIO_SYSSPACE)
963 bcopy(bp, buf, strlen(bp) + 1);
964 else
965 error = copyout(bp, buf, strlen(bp) + 1);
966 #ifdef KTRACE
967 if (KTRPOINT(curthread, KTR_NAMEI))
968 ktrnamei(bp);
969 #endif
970 }
971 free(tmpbuf, M_TEMP);
972 return (error);
973 }
974
975 /*
976 * Thus begins the fullpath magic.
977 */
978
979 #undef STATNODE
980 #define STATNODE(name) \
981 static u_int name; \
982 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
983
984 static int disablefullpath;
985 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
986 "Disable the vn_fullpath function");
987
988 /* These count for kern___getcwd(), too. */
989 STATNODE(numfullpathcalls);
990 STATNODE(numfullpathfail1);
991 STATNODE(numfullpathfail2);
992 STATNODE(numfullpathfail4);
993 STATNODE(numfullpathfound);
994
995 /*
996 * Retrieve the full filesystem path that correspond to a vnode from the name
997 * cache (if available)
998 */
999 int
1000 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
1001 {
1002 char *buf;
1003 struct filedesc *fdp;
1004 int error;
1005
1006 if (disablefullpath)
1007 return (ENODEV);
1008 if (vn == NULL)
1009 return (EINVAL);
1010
1011 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1012 fdp = td->td_proc->p_fd;
1013 FILEDESC_SLOCK(fdp);
1014 error = vn_fullpath1(td, vn, fdp->fd_rdir, buf, retbuf, MAXPATHLEN);
1015 FILEDESC_SUNLOCK(fdp);
1016
1017 if (!error)
1018 *freebuf = buf;
1019 else
1020 free(buf, M_TEMP);
1021 return (error);
1022 }
1023
1024 /*
1025 * This function is similar to vn_fullpath, but it attempts to lookup the
1026 * pathname relative to the global root mount point. This is required for the
1027 * auditing sub-system, as audited pathnames must be absolute, relative to the
1028 * global root mount point.
1029 */
1030 int
1031 vn_fullpath_global(struct thread *td, struct vnode *vn,
1032 char **retbuf, char **freebuf)
1033 {
1034 char *buf;
1035 int error;
1036
1037 if (disablefullpath)
1038 return (ENODEV);
1039 if (vn == NULL)
1040 return (EINVAL);
1041 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1042 error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN);
1043 if (!error)
1044 *freebuf = buf;
1045 else
1046 free(buf, M_TEMP);
1047 return (error);
1048 }
1049
1050 /*
1051 * The magic behind kern___getcwd() and vn_fullpath().
1052 */
1053 static int
1054 vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
1055 char *buf, char **retbuf, u_int buflen)
1056 {
1057 char *bp, *nc_name;
1058 int error, i, slash_prefixed;
1059 struct namecache *ncp;
1060
1061 bp = buf + buflen - 1;
1062 *bp = '\0';
1063 error = 0;
1064 slash_prefixed = 0;
1065
1066 CACHE_LOCK();
1067 numfullpathcalls++;
1068 if (vp->v_type != VDIR) {
1069 ncp = TAILQ_FIRST(&vp->v_cache_dst);
1070 if (!ncp) {
1071 numfullpathfail2++;
1072 CACHE_UNLOCK();
1073 return (ENOENT);
1074 }
1075 nc_name = nc_get_name(ncp);
1076 for (i = ncp->nc_nlen - 1; i >= 0 && bp > buf; i--)
1077 *--bp = nc_name[i];
1078 if (bp == buf) {
1079 numfullpathfail4++;
1080 CACHE_UNLOCK();
1081 return (ENOMEM);
1082 }
1083 *--bp = '/';
1084 slash_prefixed = 1;
1085 vp = ncp->nc_dvp;
1086 }
1087 while (vp != rdir && vp != rootvnode) {
1088 if (vp->v_vflag & VV_ROOT) {
1089 if (vp->v_iflag & VI_DOOMED) { /* forced unmount */
1090 error = ENOENT;
1091 break;
1092 }
1093 vp = vp->v_mount->mnt_vnodecovered;
1094 continue;
1095 }
1096 if (vp->v_type != VDIR) {
1097 numfullpathfail1++;
1098 error = ENOTDIR;
1099 break;
1100 }
1101 TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst)
1102 if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
1103 break;
1104 if (!ncp) {
1105 numfullpathfail2++;
1106 error = ENOENT;
1107 break;
1108 }
1109 nc_name = nc_get_name(ncp);
1110 for (i = ncp->nc_nlen - 1; i >= 0 && bp != buf; i--)
1111 *--bp = nc_name[i];
1112 if (bp == buf) {
1113 numfullpathfail4++;
1114 error = ENOMEM;
1115 break;
1116 }
1117 *--bp = '/';
1118 slash_prefixed = 1;
1119 vp = ncp->nc_dvp;
1120 }
1121 if (error) {
1122 CACHE_UNLOCK();
1123 return (error);
1124 }
1125 if (!slash_prefixed) {
1126 if (bp == buf) {
1127 numfullpathfail4++;
1128 CACHE_UNLOCK();
1129 return (ENOMEM);
1130 } else {
1131 *--bp = '/';
1132 }
1133 }
1134 numfullpathfound++;
1135 CACHE_UNLOCK();
1136
1137 *retbuf = bp;
1138 return (0);
1139 }
1140
1141 /* ABI compat shims for old kernel modules. */
1142 #undef cache_enter
1143 #undef cache_lookup
1144
1145 void cache_enter(struct vnode *dvp, struct vnode *vp,
1146 struct componentname *cnp);
1147 int cache_lookup(struct vnode *dvp, struct vnode **vpp,
1148 struct componentname *cnp);
1149
1150 void
1151 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
1152 {
1153
1154 cache_enter_time(dvp, vp, cnp, NULL, NULL);
1155 }
1156
1157 int
1158 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
1159 {
1160
1161 return (cache_lookup_times(dvp, vpp, cnp, NULL, NULL));
1162 }
Cache object: b040726f39790360e982295f7b9b73f3
|