1 /*-
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39
40 #include "opt_ffs_broken_fixme.h"
41 #include "opt_ufs.h"
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/namei.h>
47 #include <sys/bio.h>
48 #include <sys/buf.h>
49 #include <sys/proc.h>
50 #include <sys/stat.h>
51 #include <sys/mount.h>
52 #include <sys/vnode.h>
53 #include <sys/sysctl.h>
54
55 #include <vm/vm.h>
56 #include <vm/vm_extern.h>
57
58 #include <ufs/ufs/extattr.h>
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/inode.h>
61 #include <ufs/ufs/dir.h>
62 #ifdef UFS_DIRHASH
63 #include <ufs/ufs/dirhash.h>
64 #endif
65 #include <ufs/ufs/ufsmount.h>
66 #include <ufs/ufs/ufs_extern.h>
67
68 #ifdef DIAGNOSTIC
69 static int dirchk = 1;
70 #else
71 static int dirchk = 0;
72 #endif
73
74 SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, "");
75
76 /* true if old FS format...*/
77 #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0)
78
79 /*
80 * Convert a component of a pathname into a pointer to a locked inode.
81 * This is a very central and rather complicated routine.
82 * If the filesystem is not maintained in a strict tree hierarchy,
83 * this can result in a deadlock situation (see comments in code below).
84 *
85 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
86 * on whether the name is to be looked up, created, renamed, or deleted.
87 * When CREATE, RENAME, or DELETE is specified, information usable in
88 * creating, renaming, or deleting a directory entry may be calculated.
89 * If flag has LOCKPARENT or'ed into it and the target of the pathname
90 * exists, lookup returns both the target and its parent directory locked.
91 * When creating or renaming and LOCKPARENT is specified, the target may
92 * not be ".". When deleting and LOCKPARENT is specified, the target may
93 * be "."., but the caller must check to ensure it does an vrele and vput
94 * instead of two vputs.
95 *
96 * This routine is actually used as VOP_CACHEDLOOKUP method, and the
97 * filesystem employs the generic vfs_cache_lookup() as VOP_LOOKUP
98 * method.
99 *
100 * vfs_cache_lookup() performs the following for us:
101 * check that it is a directory
102 * check accessibility of directory
103 * check for modification attempts on read-only mounts
104 * if name found in cache
105 * if at end of path and deleting or creating
106 * drop it
107 * else
108 * return name.
109 * return VOP_CACHEDLOOKUP()
110 *
111 * Overall outline of ufs_lookup:
112 *
113 * search for name in directory, to found or notfound
114 * notfound:
115 * if creating, return locked directory, leaving info on available slots
116 * else return error
117 * found:
118 * if at end of path and deleting, return information to allow delete
119 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target
120 * inode and return info to allow rewrite
121 * if not at end, add name to cache; if at end and neither creating
122 * nor deleting, add name to cache
123 */
124 int
125 ufs_lookup(ap)
126 struct vop_cachedlookup_args /* {
127 struct vnode *a_dvp;
128 struct vnode **a_vpp;
129 struct componentname *a_cnp;
130 } */ *ap;
131 {
132 struct vnode *vdp; /* vnode for directory being searched */
133 struct inode *dp; /* inode for directory being searched */
134 struct buf *bp; /* a buffer of directory entries */
135 struct direct *ep; /* the current directory entry */
136 int entryoffsetinblock; /* offset of ep in bp's buffer */
137 enum {NONE, COMPACT, FOUND} slotstatus;
138 doff_t slotoffset; /* offset of area with free space */
139 int slotsize; /* size of area at slotoffset */
140 int slotfreespace; /* amount of space free in slot */
141 int slotneeded; /* size of the entry we're seeking */
142 int numdirpasses; /* strategy for directory search */
143 doff_t endsearch; /* offset to end directory search */
144 doff_t prevoff; /* prev entry dp->i_offset */
145 struct vnode *pdp; /* saved dp during symlink work */
146 struct vnode *tdp; /* returned by VFS_VGET */
147 doff_t enduseful; /* pointer past last used dir slot */
148 u_long bmask; /* block offset mask */
149 int lockparent; /* 1 => lockparent flag is set */
150 int wantparent; /* 1 => wantparent or lockparent flag */
151 int namlen, error;
152 struct vnode **vpp = ap->a_vpp;
153 struct componentname *cnp = ap->a_cnp;
154 struct ucred *cred = cnp->cn_cred;
155 int flags = cnp->cn_flags;
156 int nameiop = cnp->cn_nameiop;
157 struct thread *td = cnp->cn_thread;
158
159 bp = NULL;
160 slotoffset = -1;
161 cnp->cn_flags &= ~PDIRUNLOCK;
162 /*
163 * XXX there was a soft-update diff about this I couldn't merge.
164 * I think this was the equiv.
165 */
166 *vpp = NULL;
167
168 vdp = ap->a_dvp;
169 dp = VTOI(vdp);
170 lockparent = flags & LOCKPARENT;
171 wantparent = flags & (LOCKPARENT|WANTPARENT);
172
173 /*
174 * We now have a segment name to search for, and a directory to search.
175 *
176 * Suppress search for slots unless creating
177 * file and at end of pathname, in which case
178 * we watch for a place to put the new file in
179 * case it doesn't already exist.
180 */
181 slotstatus = FOUND;
182 slotfreespace = slotsize = slotneeded = 0;
183 if ((nameiop == CREATE || nameiop == RENAME) &&
184 (flags & ISLASTCN)) {
185 slotstatus = NONE;
186 slotneeded = DIRECTSIZ(cnp->cn_namelen);
187 }
188 bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
189
190 #ifdef UFS_DIRHASH
191 /*
192 * Use dirhash for fast operations on large directories. The logic
193 * to determine whether to hash the directory is contained within
194 * ufsdirhash_build(); a zero return means that it decided to hash
195 * this directory and it successfully built up the hash table.
196 */
197 if (ufsdirhash_build(dp) == 0) {
198 /* Look for a free slot if needed. */
199 enduseful = dp->i_size;
200 if (slotstatus != FOUND) {
201 slotoffset = ufsdirhash_findfree(dp, slotneeded,
202 &slotsize);
203 if (slotoffset >= 0) {
204 slotstatus = COMPACT;
205 enduseful = ufsdirhash_enduseful(dp);
206 if (enduseful < 0)
207 enduseful = dp->i_size;
208 }
209 }
210 /* Look up the component. */
211 numdirpasses = 1;
212 entryoffsetinblock = 0; /* silence compiler warning */
213 switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen,
214 &dp->i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) {
215 case 0:
216 ep = (struct direct *)((char *)bp->b_data +
217 (dp->i_offset & bmask));
218 goto foundentry;
219 case ENOENT:
220 dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ);
221 goto notfound;
222 default:
223 /* Something failed; just do a linear search. */
224 break;
225 }
226 }
227 #endif /* UFS_DIRHASH */
228 /*
229 * If there is cached information on a previous search of
230 * this directory, pick up where we last left off.
231 * We cache only lookups as these are the most common
232 * and have the greatest payoff. Caching CREATE has little
233 * benefit as it usually must search the entire directory
234 * to determine that the entry does not exist. Caching the
235 * location of the last DELETE or RENAME has not reduced
236 * profiling time and hence has been removed in the interest
237 * of simplicity.
238 */
239 if (nameiop != LOOKUP || dp->i_diroff == 0 ||
240 dp->i_diroff >= dp->i_size) {
241 entryoffsetinblock = 0;
242 dp->i_offset = 0;
243 numdirpasses = 1;
244 } else {
245 dp->i_offset = dp->i_diroff;
246 if ((entryoffsetinblock = dp->i_offset & bmask) &&
247 (error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)))
248 return (error);
249 numdirpasses = 2;
250 nchstats.ncs_2passes++;
251 }
252 prevoff = dp->i_offset;
253 endsearch = roundup2(dp->i_size, DIRBLKSIZ);
254 enduseful = 0;
255
256 searchloop:
257 while (dp->i_offset < endsearch) {
258 /*
259 * If necessary, get the next directory block.
260 */
261 if ((dp->i_offset & bmask) == 0) {
262 if (bp != NULL)
263 brelse(bp);
264 error =
265 UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp);
266 if (error)
267 return (error);
268 entryoffsetinblock = 0;
269 }
270 /*
271 * If still looking for a slot, and at a DIRBLKSIZE
272 * boundary, have to start looking for free space again.
273 */
274 if (slotstatus == NONE &&
275 (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
276 slotoffset = -1;
277 slotfreespace = 0;
278 }
279 /*
280 * Get pointer to next entry.
281 * Full validation checks are slow, so we only check
282 * enough to insure forward progress through the
283 * directory. Complete checks can be run by patching
284 * "dirchk" to be true.
285 */
286 ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock);
287 if (ep->d_reclen == 0 || ep->d_reclen >
288 DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
289 (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) {
290 int i;
291
292 ufs_dirbad(dp, dp->i_offset, "mangled entry");
293 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
294 dp->i_offset += i;
295 entryoffsetinblock += i;
296 continue;
297 }
298
299 /*
300 * If an appropriate sized slot has not yet been found,
301 * check to see if one is available. Also accumulate space
302 * in the current block so that we can determine if
303 * compaction is viable.
304 */
305 if (slotstatus != FOUND) {
306 int size = ep->d_reclen;
307
308 if (ep->d_ino != 0)
309 size -= DIRSIZ(OFSFMT(vdp), ep);
310 if (size > 0) {
311 if (size >= slotneeded) {
312 slotstatus = FOUND;
313 slotoffset = dp->i_offset;
314 slotsize = ep->d_reclen;
315 } else if (slotstatus == NONE) {
316 slotfreespace += size;
317 if (slotoffset == -1)
318 slotoffset = dp->i_offset;
319 if (slotfreespace >= slotneeded) {
320 slotstatus = COMPACT;
321 slotsize = dp->i_offset +
322 ep->d_reclen - slotoffset;
323 }
324 }
325 }
326 }
327
328 /*
329 * Check for a name match.
330 */
331 if (ep->d_ino) {
332 # if (BYTE_ORDER == LITTLE_ENDIAN)
333 if (OFSFMT(vdp))
334 namlen = ep->d_type;
335 else
336 namlen = ep->d_namlen;
337 # else
338 namlen = ep->d_namlen;
339 # endif
340 if (namlen == cnp->cn_namelen &&
341 (cnp->cn_nameptr[0] == ep->d_name[0]) &&
342 !bcmp(cnp->cn_nameptr, ep->d_name,
343 (unsigned)namlen)) {
344 #ifdef UFS_DIRHASH
345 foundentry:
346 #endif
347 /*
348 * Save directory entry's inode number and
349 * reclen in ndp->ni_ufs area, and release
350 * directory buffer.
351 */
352 if (vdp->v_mount->mnt_maxsymlinklen > 0 &&
353 ep->d_type == DT_WHT) {
354 slotstatus = FOUND;
355 slotoffset = dp->i_offset;
356 slotsize = ep->d_reclen;
357 dp->i_reclen = slotsize;
358 enduseful = dp->i_size;
359 ap->a_cnp->cn_flags |= ISWHITEOUT;
360 numdirpasses--;
361 goto notfound;
362 }
363 dp->i_ino = ep->d_ino;
364 dp->i_reclen = ep->d_reclen;
365 goto found;
366 }
367 }
368 prevoff = dp->i_offset;
369 dp->i_offset += ep->d_reclen;
370 entryoffsetinblock += ep->d_reclen;
371 if (ep->d_ino)
372 enduseful = dp->i_offset;
373 }
374 notfound:
375 /*
376 * If we started in the middle of the directory and failed
377 * to find our target, we must check the beginning as well.
378 */
379 if (numdirpasses == 2) {
380 numdirpasses--;
381 dp->i_offset = 0;
382 endsearch = dp->i_diroff;
383 goto searchloop;
384 }
385 if (bp != NULL)
386 brelse(bp);
387 /*
388 * If creating, and at end of pathname and current
389 * directory has not been removed, then can consider
390 * allowing file to be created.
391 */
392 if ((nameiop == CREATE || nameiop == RENAME ||
393 (nameiop == DELETE &&
394 (ap->a_cnp->cn_flags & DOWHITEOUT) &&
395 (ap->a_cnp->cn_flags & ISWHITEOUT))) &&
396 (flags & ISLASTCN) && dp->i_effnlink != 0) {
397 /*
398 * Access for write is interpreted as allowing
399 * creation of files in the directory.
400 */
401 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread);
402 if (error)
403 return (error);
404 /*
405 * Return an indication of where the new directory
406 * entry should be put. If we didn't find a slot,
407 * then set dp->i_count to 0 indicating
408 * that the new slot belongs at the end of the
409 * directory. If we found a slot, then the new entry
410 * can be put in the range from dp->i_offset to
411 * dp->i_offset + dp->i_count.
412 */
413 if (slotstatus == NONE) {
414 dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ);
415 dp->i_count = 0;
416 enduseful = dp->i_offset;
417 } else if (nameiop == DELETE) {
418 dp->i_offset = slotoffset;
419 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
420 dp->i_count = 0;
421 else
422 dp->i_count = dp->i_offset - prevoff;
423 } else {
424 dp->i_offset = slotoffset;
425 dp->i_count = slotsize;
426 if (enduseful < slotoffset + slotsize)
427 enduseful = slotoffset + slotsize;
428 }
429 dp->i_endoff = roundup2(enduseful, DIRBLKSIZ);
430 dp->i_flag |= IN_CHANGE | IN_UPDATE;
431 /*
432 * We return with the directory locked, so that
433 * the parameters we set up above will still be
434 * valid if we actually decide to do a direnter().
435 * We return ni_vp == NULL to indicate that the entry
436 * does not currently exist; we leave a pointer to
437 * the (locked) directory inode in ndp->ni_dvp.
438 * The pathname buffer is saved so that the name
439 * can be obtained later.
440 *
441 * NB - if the directory is unlocked, then this
442 * information cannot be used.
443 */
444 cnp->cn_flags |= SAVENAME;
445 if (!lockparent) {
446 VOP_UNLOCK(vdp, 0, td);
447 cnp->cn_flags |= PDIRUNLOCK;
448 }
449 return (EJUSTRETURN);
450 }
451 /*
452 * Insert name into cache (as non-existent) if appropriate.
453 */
454 if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
455 cache_enter(vdp, *vpp, cnp);
456 return (ENOENT);
457
458 found:
459 if (numdirpasses == 2)
460 nchstats.ncs_pass2++;
461 /*
462 * Check that directory length properly reflects presence
463 * of this entry.
464 */
465 if (dp->i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) {
466 ufs_dirbad(dp, dp->i_offset, "i_size too small");
467 dp->i_size = dp->i_offset + DIRSIZ(OFSFMT(vdp), ep);
468 DIP_SET(dp, i_size, dp->i_size);
469 dp->i_flag |= IN_CHANGE | IN_UPDATE;
470 }
471 brelse(bp);
472
473 /*
474 * Found component in pathname.
475 * If the final component of path name, save information
476 * in the cache as to where the entry was found.
477 */
478 if ((flags & ISLASTCN) && nameiop == LOOKUP)
479 dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
480
481 /*
482 * If deleting, and at end of pathname, return
483 * parameters which can be used to remove file.
484 * If the wantparent flag isn't set, we return only
485 * the directory (in ndp->ni_dvp), otherwise we go
486 * on and lock the inode, being careful with ".".
487 */
488 if (nameiop == DELETE && (flags & ISLASTCN)) {
489 /*
490 * Write access to directory required to delete files.
491 */
492 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread);
493 if (error)
494 return (error);
495 /*
496 * Return pointer to current entry in dp->i_offset,
497 * and distance past previous entry (if there
498 * is a previous entry in this block) in dp->i_count.
499 * Save directory inode pointer in ndp->ni_dvp for dirremove().
500 */
501 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
502 dp->i_count = 0;
503 else
504 dp->i_count = dp->i_offset - prevoff;
505 if (dp->i_number == dp->i_ino) {
506 VREF(vdp);
507 *vpp = vdp;
508 return (0);
509 }
510 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino,
511 LK_EXCLUSIVE, &tdp)) != 0)
512 return (error);
513 /*
514 * If directory is "sticky", then user must own
515 * the directory, or the file in it, else she
516 * may not delete it (unless she's root). This
517 * implements append-only directories.
518 */
519 if ((dp->i_mode & ISVTX) &&
520 VOP_ACCESS(vdp, VADMIN, cred, cnp->cn_thread) &&
521 VOP_ACCESS(tdp, VADMIN, cred, cnp->cn_thread)) {
522 vput(tdp);
523 return (EPERM);
524 }
525 *vpp = tdp;
526 if (!lockparent) {
527 VOP_UNLOCK(vdp, 0, td);
528 cnp->cn_flags |= PDIRUNLOCK;
529 }
530 return (0);
531 }
532
533 /*
534 * If rewriting (RENAME), return the inode and the
535 * information required to rewrite the present directory
536 * Must get inode of directory entry to verify it's a
537 * regular file, or empty directory.
538 */
539 if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
540 if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)))
541 return (error);
542 /*
543 * Careful about locking second inode.
544 * This can only occur if the target is ".".
545 */
546 if (dp->i_number == dp->i_ino)
547 return (EISDIR);
548 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino,
549 LK_EXCLUSIVE, &tdp)) != 0)
550 return (error);
551 *vpp = tdp;
552 cnp->cn_flags |= SAVENAME;
553 if (!lockparent) {
554 VOP_UNLOCK(vdp, 0, td);
555 cnp->cn_flags |= PDIRUNLOCK;
556 }
557 return (0);
558 }
559
560 /*
561 * Step through the translation in the name. We do not `vput' the
562 * directory because we may need it again if a symbolic link
563 * is relative to the current directory. Instead we save it
564 * unlocked as "pdp". We must get the target inode before unlocking
565 * the directory to insure that the inode will not be removed
566 * before we get it. We prevent deadlock by always fetching
567 * inodes from the root, moving down the directory tree. Thus
568 * when following backward pointers ".." we must unlock the
569 * parent directory before getting the requested directory.
570 * There is a potential race condition here if both the current
571 * and parent directories are removed before the VFS_VGET for the
572 * inode associated with ".." returns. We hope that this occurs
573 * infrequently since we cannot avoid this race condition without
574 * implementing a sophisticated deadlock detection algorithm.
575 * Note also that this simple deadlock detection scheme will not
576 * work if the filesystem has any hard links other than ".."
577 * that point backwards in the directory structure.
578 */
579 pdp = vdp;
580 if (flags & ISDOTDOT) {
581 if ((VFS_VGET(pdp->v_mount, dp->i_ino, LK_NOWAIT | LK_EXCLUSIVE,
582 &tdp)) != 0) {
583 VOP_UNLOCK(pdp, 0, td); /* race to get the inode */
584 error = VFS_VGET(pdp->v_mount, dp->i_ino,
585 LK_EXCLUSIVE, &tdp);
586 vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, td);
587 if (error)
588 return (error);
589 }
590 if (!lockparent || !(flags & ISLASTCN)) {
591 VOP_UNLOCK(pdp, 0, td);
592 cnp->cn_flags |= PDIRUNLOCK;
593 }
594 *vpp = tdp;
595 } else if (dp->i_number == dp->i_ino) {
596 VREF(vdp); /* we want ourself, ie "." */
597 *vpp = vdp;
598 } else {
599 error = VFS_VGET(pdp->v_mount, dp->i_ino, LK_EXCLUSIVE, &tdp);
600 if (error)
601 return (error);
602 if (!lockparent || !(flags & ISLASTCN)) {
603 VOP_UNLOCK(pdp, 0, td);
604 cnp->cn_flags |= PDIRUNLOCK;
605 }
606 *vpp = tdp;
607 }
608
609 /*
610 * Insert name into cache if appropriate.
611 */
612 if (cnp->cn_flags & MAKEENTRY)
613 cache_enter(vdp, *vpp, cnp);
614 return (0);
615 }
616
617 void
618 ufs_dirbad(ip, offset, how)
619 struct inode *ip;
620 doff_t offset;
621 char *how;
622 {
623 struct mount *mp;
624
625 mp = ITOV(ip)->v_mount;
626 (void)printf("%s: bad dir ino %lu at offset %ld: %s\n",
627 mp->mnt_stat.f_mntonname, (u_long)ip->i_number, (long)offset, how);
628 if ((mp->mnt_flag & MNT_RDONLY) == 0)
629 panic("ufs_dirbad: bad dir");
630 }
631
632 /*
633 * Do consistency checking on a directory entry:
634 * record length must be multiple of 4
635 * entry must fit in rest of its DIRBLKSIZ block
636 * record must be large enough to contain entry
637 * name is not longer than MAXNAMLEN
638 * name must be as long as advertised, and null terminated
639 */
640 int
641 ufs_dirbadentry(dp, ep, entryoffsetinblock)
642 struct vnode *dp;
643 struct direct *ep;
644 int entryoffsetinblock;
645 {
646 int i, namlen;
647
648 # if (BYTE_ORDER == LITTLE_ENDIAN)
649 if (OFSFMT(dp))
650 namlen = ep->d_type;
651 else
652 namlen = ep->d_namlen;
653 # else
654 namlen = ep->d_namlen;
655 # endif
656 if ((ep->d_reclen & 0x3) != 0 ||
657 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
658 ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > MAXNAMLEN) {
659 /*return (1); */
660 printf("First bad\n");
661 goto bad;
662 }
663 if (ep->d_ino == 0)
664 return (0);
665 for (i = 0; i < namlen; i++)
666 if (ep->d_name[i] == '\0') {
667 /*return (1); */
668 printf("Second bad\n");
669 goto bad;
670 }
671 if (ep->d_name[i])
672 goto bad;
673 return (0);
674 bad:
675 return (1);
676 }
677
678 /*
679 * Construct a new directory entry after a call to namei, using the
680 * parameters that it left in the componentname argument cnp. The
681 * argument ip is the inode to which the new directory entry will refer.
682 */
683 void
684 ufs_makedirentry(ip, cnp, newdirp)
685 struct inode *ip;
686 struct componentname *cnp;
687 struct direct *newdirp;
688 {
689
690 #ifdef DIAGNOSTIC
691 if ((cnp->cn_flags & SAVENAME) == 0)
692 panic("ufs_makedirentry: missing name");
693 #endif
694 newdirp->d_ino = ip->i_number;
695 newdirp->d_namlen = cnp->cn_namelen;
696 bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1);
697 if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0)
698 newdirp->d_type = IFTODT(ip->i_mode);
699 else {
700 newdirp->d_type = 0;
701 # if (BYTE_ORDER == LITTLE_ENDIAN)
702 { u_char tmp = newdirp->d_namlen;
703 newdirp->d_namlen = newdirp->d_type;
704 newdirp->d_type = tmp; }
705 # endif
706 }
707 }
708
709 /*
710 * Write a directory entry after a call to namei, using the parameters
711 * that it left in nameidata. The argument dirp is the new directory
712 * entry contents. Dvp is a pointer to the directory to be written,
713 * which was left locked by namei. Remaining parameters (dp->i_offset,
714 * dp->i_count) indicate how the space for the new entry is to be obtained.
715 * Non-null bp indicates that a directory is being created (for the
716 * soft dependency code).
717 */
718 int
719 ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
720 struct vnode *dvp;
721 struct vnode *tvp;
722 struct direct *dirp;
723 struct componentname *cnp;
724 struct buf *newdirbp;
725 {
726 struct ucred *cr;
727 struct thread *td;
728 int newentrysize;
729 struct inode *dp;
730 struct buf *bp;
731 u_int dsize;
732 struct direct *ep, *nep;
733 int error, ret, blkoff, loc, spacefree, flags;
734 char *dirbuf;
735
736 td = curthread; /* XXX */
737 cr = td->td_ucred;
738
739 dp = VTOI(dvp);
740 newentrysize = DIRSIZ(OFSFMT(dvp), dirp);
741
742 if (dp->i_count == 0) {
743 /*
744 * If dp->i_count is 0, then namei could find no
745 * space in the directory. Here, dp->i_offset will
746 * be on a directory block boundary and we will write the
747 * new entry into a fresh block.
748 */
749 if (dp->i_offset & (DIRBLKSIZ - 1))
750 panic("ufs_direnter: newblk");
751 flags = BA_CLRBUF;
752 if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp))
753 flags |= IO_SYNC;
754 if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ,
755 cr, flags, &bp)) != 0) {
756 if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
757 bdwrite(newdirbp);
758 return (error);
759 }
760 dp->i_size = dp->i_offset + DIRBLKSIZ;
761 DIP_SET(dp, i_size, dp->i_size);
762 dp->i_flag |= IN_CHANGE | IN_UPDATE;
763 vnode_pager_setsize(dvp, (u_long)dp->i_size);
764 dirp->d_reclen = DIRBLKSIZ;
765 blkoff = dp->i_offset &
766 (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
767 bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize);
768 #ifdef UFS_DIRHASH
769 if (dp->i_dirhash != NULL) {
770 ufsdirhash_newblk(dp, dp->i_offset);
771 ufsdirhash_add(dp, dirp, dp->i_offset);
772 ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff,
773 dp->i_offset);
774 }
775 #endif
776 if (DOINGSOFTDEP(dvp)) {
777 /*
778 * Ensure that the entire newly allocated block is a
779 * valid directory so that future growth within the
780 * block does not have to ensure that the block is
781 * written before the inode.
782 */
783 blkoff += DIRBLKSIZ;
784 while (blkoff < bp->b_bcount) {
785 ((struct direct *)
786 (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
787 blkoff += DIRBLKSIZ;
788 }
789 if (softdep_setup_directory_add(bp, dp, dp->i_offset,
790 dirp->d_ino, newdirbp, 1) == 0) {
791 bdwrite(bp);
792 return (UFS_UPDATE(dvp, 0));
793 }
794 /* We have just allocated a directory block in an
795 * indirect block. Rather than tracking when it gets
796 * claimed by the inode, we simply do a VOP_FSYNC
797 * now to ensure that it is there (in case the user
798 * does a future fsync). Note that we have to unlock
799 * the inode for the entry that we just entered, as
800 * the VOP_FSYNC may need to lock other inodes which
801 * can lead to deadlock if we also hold a lock on
802 * the newly entered node.
803 */
804 if ((error = bwrite(bp)))
805 return (error);
806 if (tvp != NULL)
807 VOP_UNLOCK(tvp, 0, td);
808 error = VOP_FSYNC(dvp, td->td_ucred, MNT_WAIT, td);
809 if (tvp != NULL)
810 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td);
811 return (error);
812 }
813 if (DOINGASYNC(dvp)) {
814 bdwrite(bp);
815 return (UFS_UPDATE(dvp, 0));
816 }
817 error = bwrite(bp);
818 ret = UFS_UPDATE(dvp, 1);
819 if (error == 0)
820 return (ret);
821 return (error);
822 }
823
824 /*
825 * If dp->i_count is non-zero, then namei found space for the new
826 * entry in the range dp->i_offset to dp->i_offset + dp->i_count
827 * in the directory. To use this space, we may have to compact
828 * the entries located there, by copying them together towards the
829 * beginning of the block, leaving the free space in one usable
830 * chunk at the end.
831 */
832
833 /*
834 * Increase size of directory if entry eats into new space.
835 * This should never push the size past a new multiple of
836 * DIRBLKSIZE.
837 *
838 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
839 */
840 if (dp->i_offset + dp->i_count > dp->i_size) {
841 dp->i_size = dp->i_offset + dp->i_count;
842 DIP_SET(dp, i_size, dp->i_size);
843 }
844 /*
845 * Get the block containing the space for the new directory entry.
846 */
847 error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp);
848 if (error) {
849 if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
850 bdwrite(newdirbp);
851 return (error);
852 }
853 /*
854 * Find space for the new entry. In the simple case, the entry at
855 * offset base will have the space. If it does not, then namei
856 * arranged that compacting the region dp->i_offset to
857 * dp->i_offset + dp->i_count would yield the space.
858 */
859 ep = (struct direct *)dirbuf;
860 dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0;
861 spacefree = ep->d_reclen - dsize;
862 for (loc = ep->d_reclen; loc < dp->i_count; ) {
863 nep = (struct direct *)(dirbuf + loc);
864
865 /* Trim the existing slot (NB: dsize may be zero). */
866 ep->d_reclen = dsize;
867 ep = (struct direct *)((char *)ep + dsize);
868
869 /* Read nep->d_reclen now as the bcopy() may clobber it. */
870 loc += nep->d_reclen;
871 if (nep->d_ino == 0) {
872 /*
873 * A mid-block unused entry. Such entries are
874 * never created by the kernel, but fsck_ffs
875 * can create them (and it doesn't fix them).
876 *
877 * Add up the free space, and initialise the
878 * relocated entry since we don't bcopy it.
879 */
880 spacefree += nep->d_reclen;
881 ep->d_ino = 0;
882 dsize = 0;
883 continue;
884 }
885 dsize = DIRSIZ(OFSFMT(dvp), nep);
886 spacefree += nep->d_reclen - dsize;
887 #ifdef UFS_DIRHASH
888 if (dp->i_dirhash != NULL)
889 ufsdirhash_move(dp, nep,
890 dp->i_offset + ((char *)nep - dirbuf),
891 dp->i_offset + ((char *)ep - dirbuf));
892 #endif
893 if (DOINGSOFTDEP(dvp))
894 softdep_change_directoryentry_offset(dp, dirbuf,
895 (caddr_t)nep, (caddr_t)ep, dsize);
896 else
897 bcopy((caddr_t)nep, (caddr_t)ep, dsize);
898 }
899 /*
900 * Here, `ep' points to a directory entry containing `dsize' in-use
901 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0,
902 * then the entry is completely unused (dsize == 0). The value
903 * of ep->d_reclen is always indeterminate.
904 *
905 * Update the pointer fields in the previous entry (if any),
906 * copy in the new entry, and write out the block.
907 */
908 if (ep->d_ino == 0 ||
909 (ep->d_ino == WINO &&
910 bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) {
911 if (spacefree + dsize < newentrysize)
912 panic("ufs_direnter: compact1");
913 dirp->d_reclen = spacefree + dsize;
914 } else {
915 if (spacefree < newentrysize)
916 panic("ufs_direnter: compact2");
917 dirp->d_reclen = spacefree;
918 ep->d_reclen = dsize;
919 ep = (struct direct *)((char *)ep + dsize);
920 }
921 #ifdef UFS_DIRHASH
922 if (dp->i_dirhash != NULL && (ep->d_ino == 0 ||
923 dirp->d_reclen == spacefree))
924 ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf));
925 #endif
926 bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize);
927 #ifdef UFS_DIRHASH
928 if (dp->i_dirhash != NULL)
929 ufsdirhash_checkblock(dp, dirbuf -
930 (dp->i_offset & (DIRBLKSIZ - 1)),
931 dp->i_offset & ~(DIRBLKSIZ - 1));
932 #endif
933
934 if (DOINGSOFTDEP(dvp)) {
935 (void) softdep_setup_directory_add(bp, dp,
936 dp->i_offset + (caddr_t)ep - dirbuf,
937 dirp->d_ino, newdirbp, 0);
938 bdwrite(bp);
939 } else {
940 if (DOINGASYNC(dvp)) {
941 bdwrite(bp);
942 error = 0;
943 } else {
944 error = bwrite(bp);
945 }
946 }
947 dp->i_flag |= IN_CHANGE | IN_UPDATE;
948 /*
949 * If all went well, and the directory can be shortened, proceed
950 * with the truncation. Note that we have to unlock the inode for
951 * the entry that we just entered, as the truncation may need to
952 * lock other inodes which can lead to deadlock if we also hold a
953 * lock on the newly entered node.
954 */
955 if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) {
956 if (tvp != NULL)
957 VOP_UNLOCK(tvp, 0, td);
958 #ifdef UFS_DIRHASH
959 if (dp->i_dirhash != NULL)
960 ufsdirhash_dirtrunc(dp, dp->i_endoff);
961 #endif
962 (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff,
963 IO_NORMAL | IO_SYNC, cr, td);
964 if (tvp != NULL)
965 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td);
966 }
967 return (error);
968 }
969
970 /*
971 * Remove a directory entry after a call to namei, using
972 * the parameters which it left in nameidata. The entry
973 * dp->i_offset contains the offset into the directory of the
974 * entry to be eliminated. The dp->i_count field contains the
975 * size of the previous record in the directory. If this
976 * is 0, the first entry is being deleted, so we need only
977 * zero the inode number to mark the entry as free. If the
978 * entry is not the first in the directory, we must reclaim
979 * the space of the now empty record by adding the record size
980 * to the size of the previous entry.
981 */
982 int
983 ufs_dirremove(dvp, ip, flags, isrmdir)
984 struct vnode *dvp;
985 struct inode *ip;
986 int flags;
987 int isrmdir;
988 {
989 struct inode *dp;
990 struct direct *ep;
991 struct buf *bp;
992 int error;
993
994 dp = VTOI(dvp);
995
996 if (flags & DOWHITEOUT) {
997 /*
998 * Whiteout entry: set d_ino to WINO.
999 */
1000 if ((error =
1001 UFS_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0)
1002 return (error);
1003 ep->d_ino = WINO;
1004 ep->d_type = DT_WHT;
1005 goto out;
1006 }
1007
1008 if ((error = UFS_BLKATOFF(dvp,
1009 (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0)
1010 return (error);
1011 #ifdef UFS_DIRHASH
1012 /*
1013 * Remove the dirhash entry. This is complicated by the fact
1014 * that `ep' is the previous entry when dp->i_count != 0.
1015 */
1016 if (dp->i_dirhash != NULL)
1017 ufsdirhash_remove(dp, (dp->i_count == 0) ? ep :
1018 (struct direct *)((char *)ep + ep->d_reclen), dp->i_offset);
1019 #endif
1020 if (dp->i_count == 0) {
1021 /*
1022 * First entry in block: set d_ino to zero.
1023 */
1024 ep->d_ino = 0;
1025 } else {
1026 /*
1027 * Collapse new free space into previous entry.
1028 */
1029 ep->d_reclen += dp->i_reclen;
1030 }
1031 #ifdef UFS_DIRHASH
1032 if (dp->i_dirhash != NULL)
1033 ufsdirhash_checkblock(dp, (char *)ep -
1034 ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)),
1035 dp->i_offset & ~(DIRBLKSIZ - 1));
1036 #endif
1037 out:
1038 if (DOINGSOFTDEP(dvp)) {
1039 if (ip) {
1040 ip->i_effnlink--;
1041 softdep_change_linkcnt(ip);
1042 softdep_setup_remove(bp, dp, ip, isrmdir);
1043 }
1044 if (softdep_slowdown(dvp)) {
1045 error = bwrite(bp);
1046 } else {
1047 bdwrite(bp);
1048 error = 0;
1049 }
1050 } else {
1051 if (ip) {
1052 ip->i_effnlink--;
1053 ip->i_nlink--;
1054 DIP_SET(ip, i_nlink, ip->i_nlink);
1055 ip->i_flag |= IN_CHANGE;
1056 }
1057 if (flags & DOWHITEOUT)
1058 error = bwrite(bp);
1059 else if (DOINGASYNC(dvp) && dp->i_count != 0) {
1060 bdwrite(bp);
1061 error = 0;
1062 } else
1063 error = bwrite(bp);
1064 }
1065 dp->i_flag |= IN_CHANGE | IN_UPDATE;
1066 /*
1067 * If the last named reference to a snapshot goes away,
1068 * drop its snapshot reference so that it will be reclaimed
1069 * when last open reference goes away.
1070 */
1071 #if defined(FFS) || defined(IFS)
1072 if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 && ip->i_effnlink == 0)
1073 ffs_snapgone(ip);
1074 #endif
1075 return (error);
1076 }
1077
1078 /*
1079 * Rewrite an existing directory entry to point at the inode
1080 * supplied. The parameters describing the directory entry are
1081 * set up by a call to namei.
1082 */
1083 int
1084 ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir)
1085 struct inode *dp, *oip;
1086 ino_t newinum;
1087 int newtype;
1088 int isrmdir;
1089 {
1090 struct buf *bp;
1091 struct direct *ep;
1092 struct vnode *vdp = ITOV(dp);
1093 int error;
1094
1095 error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp);
1096 if (error)
1097 return (error);
1098 ep->d_ino = newinum;
1099 if (!OFSFMT(vdp))
1100 ep->d_type = newtype;
1101 oip->i_effnlink--;
1102 if (DOINGSOFTDEP(vdp)) {
1103 softdep_change_linkcnt(oip);
1104 softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir);
1105 bdwrite(bp);
1106 } else {
1107 oip->i_nlink--;
1108 DIP_SET(oip, i_nlink, oip->i_nlink);
1109 oip->i_flag |= IN_CHANGE;
1110 if (DOINGASYNC(vdp)) {
1111 bdwrite(bp);
1112 error = 0;
1113 } else {
1114 error = bwrite(bp);
1115 }
1116 }
1117 dp->i_flag |= IN_CHANGE | IN_UPDATE;
1118 /*
1119 * If the last named reference to a snapshot goes away,
1120 * drop its snapshot reference so that it will be reclaimed
1121 * when last open reference goes away.
1122 */
1123 #if defined(FFS) || defined(IFS)
1124 if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_effnlink == 0)
1125 ffs_snapgone(oip);
1126 #endif
1127 return (error);
1128 }
1129
1130 /*
1131 * Check if a directory is empty or not.
1132 * Inode supplied must be locked.
1133 *
1134 * Using a struct dirtemplate here is not precisely
1135 * what we want, but better than using a struct direct.
1136 *
1137 * NB: does not handle corrupted directories.
1138 */
1139 int
1140 ufs_dirempty(ip, parentino, cred)
1141 struct inode *ip;
1142 ino_t parentino;
1143 struct ucred *cred;
1144 {
1145 doff_t off;
1146 struct dirtemplate dbuf;
1147 struct direct *dp = (struct direct *)&dbuf;
1148 int error, count, namlen;
1149 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2)
1150
1151 for (off = 0; off < ip->i_size; off += dp->d_reclen) {
1152 error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ,
1153 off, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred,
1154 NOCRED, &count, (struct thread *)0);
1155 /*
1156 * Since we read MINDIRSIZ, residual must
1157 * be 0 unless we're at end of file.
1158 */
1159 if (error || count != 0)
1160 return (0);
1161 /* avoid infinite loops */
1162 if (dp->d_reclen == 0)
1163 return (0);
1164 /* skip empty entries */
1165 if (dp->d_ino == 0 || dp->d_ino == WINO)
1166 continue;
1167 /* accept only "." and ".." */
1168 # if (BYTE_ORDER == LITTLE_ENDIAN)
1169 if (OFSFMT(ITOV(ip)))
1170 namlen = dp->d_type;
1171 else
1172 namlen = dp->d_namlen;
1173 # else
1174 namlen = dp->d_namlen;
1175 # endif
1176 if (namlen > 2)
1177 return (0);
1178 if (dp->d_name[0] != '.')
1179 return (0);
1180 /*
1181 * At this point namlen must be 1 or 2.
1182 * 1 implies ".", 2 implies ".." if second
1183 * char is also "."
1184 */
1185 if (namlen == 1 && dp->d_ino == ip->i_number)
1186 continue;
1187 if (dp->d_name[1] == '.' && dp->d_ino == parentino)
1188 continue;
1189 return (0);
1190 }
1191 return (1);
1192 }
1193
1194 /*
1195 * Check if source directory is in the path of the target directory.
1196 * Target is supplied locked, source is unlocked.
1197 * The target is always vput before returning.
1198 */
1199 int
1200 ufs_checkpath(source, target, cred)
1201 struct inode *source, *target;
1202 struct ucred *cred;
1203 {
1204 struct vnode *vp;
1205 int error, namlen;
1206 ino_t rootino;
1207 struct dirtemplate dirbuf;
1208
1209 vp = ITOV(target);
1210 if (target->i_number == source->i_number) {
1211 error = EEXIST;
1212 goto out;
1213 }
1214 rootino = ROOTINO;
1215 error = 0;
1216 if (target->i_number == rootino)
1217 goto out;
1218
1219 for (;;) {
1220 if (vp->v_type != VDIR) {
1221 error = ENOTDIR;
1222 break;
1223 }
1224 error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
1225 sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
1226 IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, (int *)0,
1227 (struct thread *)0);
1228 if (error != 0)
1229 break;
1230 # if (BYTE_ORDER == LITTLE_ENDIAN)
1231 if (OFSFMT(vp))
1232 namlen = dirbuf.dotdot_type;
1233 else
1234 namlen = dirbuf.dotdot_namlen;
1235 # else
1236 namlen = dirbuf.dotdot_namlen;
1237 # endif
1238 if (namlen != 2 ||
1239 dirbuf.dotdot_name[0] != '.' ||
1240 dirbuf.dotdot_name[1] != '.') {
1241 error = ENOTDIR;
1242 break;
1243 }
1244 if (dirbuf.dotdot_ino == source->i_number) {
1245 error = EINVAL;
1246 break;
1247 }
1248 if (dirbuf.dotdot_ino == rootino)
1249 break;
1250 vput(vp);
1251 error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino,
1252 LK_EXCLUSIVE, &vp);
1253 if (error) {
1254 vp = NULL;
1255 break;
1256 }
1257 }
1258
1259 out:
1260 if (error == ENOTDIR)
1261 printf("checkpath: .. not a directory\n");
1262 if (vp != NULL)
1263 vput(vp);
1264 return (error);
1265 }
Cache object: 60433c89fb2177f99a557a16eddc8ec0
|