1 /*-
2 * Copyright (c) 1994 Jan-Simon Pendry
3 * Copyright (c) 1994
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Jan-Simon Pendry.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
34 * $FreeBSD: releng/6.1/sys/fs/unionfs/union_subr.c 145585 2005-04-27 09:06:06Z jeff $
35 */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/fcntl.h>
40 #include <sys/file.h>
41 #include <sys/filedesc.h>
42 #include <sys/kernel.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/module.h>
46 #include <sys/mount.h>
47 #include <sys/mutex.h>
48 #include <sys/namei.h>
49 #include <sys/stat.h>
50 #include <sys/vnode.h>
51
52 #include <vm/vm.h>
53 #include <vm/vm_extern.h> /* for vnode_pager_setsize */
54 #include <vm/vm_object.h> /* for vm cache coherency */
55 #include <vm/uma.h>
56
57 #include <fs/unionfs/union.h>
58
59 #include <sys/proc.h>
60
61 extern int union_init(void);
62
63 /* must be power of two, otherwise change UNION_HASH() */
64 #define NHASH 32
65
66 /* unsigned int ... */
67 #define UNION_HASH(u, l) \
68 (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1))
69
70 static MALLOC_DEFINE(M_UNPATH, "unpath", "UNION path component");
71 static MALLOC_DEFINE(M_UNDCACHE, "undcac", "UNION directory cache");
72
73 static LIST_HEAD(unhead, union_node) unhead[NHASH];
74 static int unvplock[NHASH];
75
76 static void union_dircache_r(struct vnode *vp, struct vnode ***vppp,
77 int *cntp);
78 static int union_list_lock(int ix);
79 static void union_list_unlock(int ix);
80 static int union_relookup(struct union_mount *um, struct vnode *dvp,
81 struct vnode **vpp,
82 struct componentname *cnp,
83 struct componentname *cn, char *path,
84 int pathlen);
85 static void union_updatevp(struct union_node *un,
86 struct vnode *uppervp,
87 struct vnode *lowervp);
88 static void union_newlower(struct union_node *, struct vnode *);
89 static void union_newupper(struct union_node *, struct vnode *);
90 static int union_copyfile(struct vnode *, struct vnode *,
91 struct ucred *, struct thread *);
92 static int union_vn_create(struct vnode **, struct union_node *,
93 struct thread *);
94 static int union_vn_close(struct vnode *, int, struct ucred *,
95 struct thread *);
96
97 int
98 union_init()
99 {
100 int i;
101
102 for (i = 0; i < NHASH; i++)
103 LIST_INIT(&unhead[i]);
104 bzero((caddr_t)unvplock, sizeof(unvplock));
105 return (0);
106 }
107
108 static int
109 union_list_lock(ix)
110 int ix;
111 {
112 if (unvplock[ix] & UNVP_LOCKED) {
113 unvplock[ix] |= UNVP_WANT;
114 (void) tsleep( &unvplock[ix], PINOD, "unllck", 0);
115 return (1);
116 }
117 unvplock[ix] |= UNVP_LOCKED;
118 return (0);
119 }
120
121 static void
122 union_list_unlock(ix)
123 int ix;
124 {
125 unvplock[ix] &= ~UNVP_LOCKED;
126
127 if (unvplock[ix] & UNVP_WANT) {
128 unvplock[ix] &= ~UNVP_WANT;
129 wakeup( &unvplock[ix]);
130 }
131 }
132
133 /*
134 * union_updatevp:
135 *
136 * The uppervp, if not NULL, must be referenced and not locked by us
137 * The lowervp, if not NULL, must be referenced.
138 *
139 * If uppervp and lowervp match pointers already installed, then
140 * nothing happens. The passed vp's (when matching) are not adjusted.
141 *
142 * This routine may only be called by union_newupper() and
143 * union_newlower().
144 */
145
146 static void
147 union_updatevp(un, uppervp, lowervp)
148 struct union_node *un;
149 struct vnode *uppervp;
150 struct vnode *lowervp;
151 {
152 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
153 int nhash = UNION_HASH(uppervp, lowervp);
154 int docache = (lowervp != NULLVP || uppervp != NULLVP);
155 int lhash, uhash;
156
157 /*
158 * Ensure locking is ordered from lower to higher
159 * to avoid deadlocks.
160 */
161 if (nhash < ohash) {
162 lhash = nhash;
163 uhash = ohash;
164 } else {
165 lhash = ohash;
166 uhash = nhash;
167 }
168
169 if (lhash != uhash) {
170 while (union_list_lock(lhash))
171 continue;
172 }
173
174 while (union_list_lock(uhash))
175 continue;
176
177 if (ohash != nhash || !docache) {
178 if (un->un_flags & UN_CACHED) {
179 un->un_flags &= ~UN_CACHED;
180 LIST_REMOVE(un, un_cache);
181 }
182 }
183
184 if (ohash != nhash)
185 union_list_unlock(ohash);
186
187 if (un->un_lowervp != lowervp) {
188 if (un->un_lowervp) {
189 vrele(un->un_lowervp);
190 if (un->un_path) {
191 free(un->un_path, M_UNPATH);
192 un->un_path = 0;
193 }
194 }
195 un->un_lowervp = lowervp;
196 un->un_lowersz = VNOVAL;
197 }
198
199 if (un->un_uppervp != uppervp) {
200 if (un->un_uppervp)
201 vrele(un->un_uppervp);
202 un->un_uppervp = uppervp;
203 un->un_uppersz = VNOVAL;
204 }
205
206 if (docache && (ohash != nhash)) {
207 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
208 un->un_flags |= UN_CACHED;
209 }
210
211 union_list_unlock(nhash);
212 }
213
214 /*
215 * Set a new lowervp. The passed lowervp must be referenced and will be
216 * stored in the vp in a referenced state.
217 */
218
219 static void
220 union_newlower(un, lowervp)
221 struct union_node *un;
222 struct vnode *lowervp;
223 {
224 union_updatevp(un, un->un_uppervp, lowervp);
225 }
226
227 /*
228 * Set a new uppervp. The passed uppervp must be locked and will be
229 * stored in the vp in a locked state. The caller should not unlock
230 * uppervp.
231 */
232
233 static void
234 union_newupper(un, uppervp)
235 struct union_node *un;
236 struct vnode *uppervp;
237 {
238 union_updatevp(un, uppervp, un->un_lowervp);
239 }
240
241 /*
242 * Keep track of size changes in the underlying vnodes.
243 * If the size changes, then callback to the vm layer
244 * giving priority to the upper layer size.
245 */
246 void
247 union_newsize(vp, uppersz, lowersz)
248 struct vnode *vp;
249 off_t uppersz, lowersz;
250 {
251 struct union_node *un;
252 off_t sz;
253
254 /* only interested in regular files */
255 if (vp->v_type != VREG)
256 return;
257
258 un = VTOUNION(vp);
259 sz = VNOVAL;
260
261 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
262 un->un_uppersz = uppersz;
263 if (sz == VNOVAL)
264 sz = un->un_uppersz;
265 }
266
267 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
268 un->un_lowersz = lowersz;
269 if (sz == VNOVAL)
270 sz = un->un_lowersz;
271 }
272
273 if (sz != VNOVAL) {
274 UDEBUG(("union: %s size now %ld\n",
275 (uppersz != VNOVAL ? "upper" : "lower"), (long)sz));
276 /*
277 * There is no need to change size of non-existent object.
278 */
279 /* vnode_pager_setsize(vp, sz); */
280 }
281 }
282
283 /*
284 * union_allocvp: allocate a union_node and associate it with a
285 * parent union_node and one or two vnodes.
286 *
287 * vpp Holds the returned vnode locked and referenced if no
288 * error occurs.
289 *
290 * mp Holds the mount point. mp may or may not be busied.
291 * allocvp() makes no changes to mp.
292 *
293 * dvp Holds the parent union_node to the one we wish to create.
294 * XXX may only be used to traverse an uncopied lowervp-based
295 * tree? XXX
296 *
297 * dvp may or may not be locked. allocvp() makes no changes
298 * to dvp.
299 *
300 * upperdvp Holds the parent vnode to uppervp, generally used along
301 * with path component information to create a shadow of
302 * lowervp when uppervp does not exist.
303 *
304 * upperdvp is referenced but unlocked on entry, and will be
305 * dereferenced on return.
306 *
307 * uppervp Holds the new uppervp vnode to be stored in the
308 * union_node we are allocating. uppervp is referenced but
309 * not locked, and will be dereferenced on return.
310 *
311 * lowervp Holds the new lowervp vnode to be stored in the
312 * union_node we are allocating. lowervp is referenced but
313 * not locked, and will be dereferenced on return.
314 *
315 * cnp Holds path component information to be coupled with
316 * lowervp and upperdvp to allow unionfs to create an uppervp
317 * later on. Only used if lowervp is valid. The contents
318 * of cnp is only valid for the duration of the call.
319 *
320 * docache Determine whether this node should be entered in the
321 * cache or whether it should be destroyed as soon as possible.
322 *
323 * All union_nodes are maintained on a singly-linked
324 * list. New nodes are only allocated when they cannot
325 * be found on this list. Entries on the list are
326 * removed when the vfs reclaim entry is called.
327 *
328 * A single lock is kept for the entire list. This is
329 * needed because the getnewvnode() function can block
330 * waiting for a vnode to become free, in which case there
331 * may be more than one process trying to get the same
332 * vnode. This lock is only taken if we are going to
333 * call getnewvnode(), since the kernel itself is single-threaded.
334 *
335 * If an entry is found on the list, then call vget() to
336 * take a reference. This is done because there may be
337 * zero references to it and so it needs to removed from
338 * the vnode free list.
339 */
340
341 int
342 union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache)
343 struct vnode **vpp;
344 struct mount *mp;
345 struct vnode *dvp; /* parent union vnode */
346 struct vnode *upperdvp; /* parent vnode of uppervp */
347 struct componentname *cnp; /* may be null */
348 struct vnode *uppervp; /* may be null */
349 struct vnode *lowervp; /* may be null */
350 int docache;
351 {
352 int error;
353 struct union_node *un = 0;
354 struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
355 struct thread *td = (cnp) ? cnp->cn_thread : curthread;
356 int hash = 0;
357 int vflag;
358 int try;
359
360 if (uppervp == NULLVP && lowervp == NULLVP)
361 panic("union: unidentifiable allocation");
362
363 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
364 vrele(lowervp);
365 lowervp = NULLVP;
366 }
367
368 /* detect the root vnode (and aliases) */
369 vflag = 0;
370 if ((uppervp == um->um_uppervp) &&
371 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
372 if (lowervp == NULLVP) {
373 lowervp = um->um_lowervp;
374 if (lowervp != NULLVP)
375 VREF(lowervp);
376 }
377 vflag = VV_ROOT;
378 }
379
380 loop:
381 if (!docache) {
382 un = 0;
383 } else for (try = 0; try < 3; try++) {
384 switch (try) {
385 case 0:
386 if (lowervp == NULLVP)
387 continue;
388 hash = UNION_HASH(uppervp, lowervp);
389 break;
390
391 case 1:
392 if (uppervp == NULLVP)
393 continue;
394 hash = UNION_HASH(uppervp, NULLVP);
395 break;
396
397 case 2:
398 if (lowervp == NULLVP)
399 continue;
400 hash = UNION_HASH(NULLVP, lowervp);
401 break;
402 }
403
404 while (union_list_lock(hash))
405 continue;
406
407 LIST_FOREACH(un, &unhead[hash], un_cache) {
408 if ((un->un_lowervp == lowervp ||
409 un->un_lowervp == NULLVP) &&
410 (un->un_uppervp == uppervp ||
411 un->un_uppervp == NULLVP) &&
412 (UNIONTOV(un)->v_mount == mp)) {
413 if (vget(UNIONTOV(un), 0,
414 cnp ? cnp->cn_thread : NULL)) {
415 union_list_unlock(hash);
416 goto loop;
417 }
418 break;
419 }
420 }
421
422 union_list_unlock(hash);
423
424 if (un)
425 break;
426 }
427
428 if (un) {
429 /*
430 * Obtain a lock on the union_node. Everything is unlocked
431 * except for dvp, so check that case. If they match, our
432 * new un is already locked. Otherwise we have to lock our
433 * new un.
434 *
435 * A potential deadlock situation occurs when we are holding
436 * one lock while trying to get another. We must follow
437 * strict ordering rules to avoid it. We try to locate dvp
438 * by scanning up from un_vnode, since the most likely
439 * scenario is un being under dvp.
440 */
441
442 if (dvp && un->un_vnode != dvp) {
443 struct vnode *scan = un->un_vnode;
444
445 do {
446 scan = VTOUNION(scan)->un_pvp;
447 } while (scan && scan->v_op == &union_vnodeops &&
448 scan != dvp);
449 if (scan != dvp) {
450 /*
451 * our new un is above dvp (we never saw dvp
452 * while moving up the tree).
453 */
454 VREF(dvp);
455 VOP_UNLOCK(dvp, 0, td);
456 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td);
457 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
458 vrele(dvp);
459 } else {
460 /*
461 * our new un is under dvp
462 */
463 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td);
464 }
465 } else if (dvp == NULLVP) {
466 /*
467 * dvp is NULL, we need to lock un.
468 */
469 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td);
470 } else {
471 /*
472 * dvp == un->un_vnode, we are already locked.
473 */
474 error = 0;
475 }
476
477 if (error)
478 goto loop;
479
480 /*
481 * At this point, the union_node is locked and referenced.
482 *
483 * uppervp is locked and referenced or NULL, lowervp is
484 * referenced or NULL.
485 */
486 UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n",
487 un, un->un_vnode, un->un_uppervp,
488 (un->un_uppervp ? vrefcnt(un->un_uppervp) : -99),
489 uppervp,
490 (uppervp ? vrefcnt(uppervp) : -99)
491 ));
492
493 if (uppervp != un->un_uppervp) {
494 KASSERT(uppervp == NULL || vrefcnt(uppervp) > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", vrefcnt(uppervp)));
495 union_newupper(un, uppervp);
496 } else if (uppervp) {
497 KASSERT(vrefcnt(uppervp) > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", vrefcnt(uppervp)));
498 vrele(uppervp);
499 }
500
501 /*
502 * Save information about the lower layer.
503 * This needs to keep track of pathname
504 * and directory information which union_vn_create()
505 * might need.
506 */
507 if (lowervp != un->un_lowervp) {
508 union_newlower(un, lowervp);
509 if (cnp && (lowervp != NULLVP)) {
510 un->un_path = malloc(cnp->cn_namelen+1,
511 M_UNPATH, M_WAITOK);
512 bcopy(cnp->cn_nameptr, un->un_path,
513 cnp->cn_namelen);
514 un->un_path[cnp->cn_namelen] = '\0';
515 }
516 } else if (lowervp) {
517 vrele(lowervp);
518 }
519
520 /*
521 * and upperdvp
522 */
523 if (upperdvp != un->un_dirvp) {
524 if (un->un_dirvp)
525 vrele(un->un_dirvp);
526 un->un_dirvp = upperdvp;
527 } else if (upperdvp) {
528 vrele(upperdvp);
529 }
530
531 *vpp = UNIONTOV(un);
532 return (0);
533 }
534
535 if (docache) {
536 /*
537 * Otherwise lock the vp list while we call getnewvnode()
538 * since that can block.
539 */
540 hash = UNION_HASH(uppervp, lowervp);
541
542 if (union_list_lock(hash))
543 goto loop;
544 }
545
546 /*
547 * Create new node rather than replace old node.
548 */
549
550 error = getnewvnode("union", mp, &union_vnodeops, vpp);
551 if (error) {
552 /*
553 * If an error occurs, clear out vnodes.
554 */
555 if (lowervp)
556 vrele(lowervp);
557 if (uppervp)
558 vrele(uppervp);
559 if (upperdvp)
560 vrele(upperdvp);
561 *vpp = NULL;
562 goto out;
563 }
564
565 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
566 M_TEMP, M_WAITOK);
567
568 (*vpp)->v_vflag |= vflag;
569 if (uppervp)
570 (*vpp)->v_type = uppervp->v_type;
571 else
572 (*vpp)->v_type = lowervp->v_type;
573
574 un = VTOUNION(*vpp);
575 bzero(un, sizeof(*un));
576
577 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td);
578
579 un->un_vnode = *vpp;
580 un->un_uppervp = uppervp;
581 un->un_uppersz = VNOVAL;
582 un->un_lowervp = lowervp;
583 un->un_lowersz = VNOVAL;
584 un->un_dirvp = upperdvp;
585 un->un_pvp = dvp; /* only parent dir in new allocation */
586 if (dvp != NULLVP)
587 VREF(dvp);
588 un->un_dircache = NULL;
589 un->un_openl = 0;
590
591 if (cnp && (lowervp != NULLVP)) {
592 un->un_path = malloc(cnp->cn_namelen+1, M_UNPATH, M_WAITOK);
593 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
594 un->un_path[cnp->cn_namelen] = '\0';
595 } else {
596 un->un_path = NULL;
597 un->un_dirvp = NULL;
598 }
599
600 if (docache) {
601 LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
602 un->un_flags |= UN_CACHED;
603 }
604
605 out:
606 if (docache)
607 union_list_unlock(hash);
608
609 return (error);
610 }
611
612 int
613 union_freevp(vp)
614 struct vnode *vp;
615 {
616 struct union_node *un = VTOUNION(vp);
617
618 if (un->un_flags & UN_CACHED) {
619 un->un_flags &= ~UN_CACHED;
620 LIST_REMOVE(un, un_cache);
621 }
622
623 if (un->un_pvp != NULLVP) {
624 vrele(un->un_pvp);
625 un->un_pvp = NULL;
626 }
627 if (un->un_uppervp != NULLVP) {
628 vrele(un->un_uppervp);
629 un->un_uppervp = NULL;
630 }
631 if (un->un_lowervp != NULLVP) {
632 vrele(un->un_lowervp);
633 un->un_lowervp = NULL;
634 }
635 if (un->un_dirvp != NULLVP) {
636 vrele(un->un_dirvp);
637 un->un_dirvp = NULL;
638 }
639 if (un->un_path) {
640 free(un->un_path, M_UNPATH);
641 un->un_path = NULL;
642 }
643
644 FREE(vp->v_data, M_TEMP);
645 vp->v_data = 0;
646 vp->v_object = NULL;
647
648 return (0);
649 }
650
651 /*
652 * copyfile. Copy the vnode (fvp) to the vnode (tvp)
653 * using a sequence of reads and writes. Both (fvp)
654 * and (tvp) are locked on entry and exit.
655 *
656 * fvp and tvp are both exclusive locked on call, but their refcount's
657 * haven't been bumped at all.
658 */
659 static int
660 union_copyfile(fvp, tvp, cred, td)
661 struct vnode *fvp;
662 struct vnode *tvp;
663 struct ucred *cred;
664 struct thread *td;
665 {
666 char *buf;
667 struct uio uio;
668 struct iovec iov;
669 int error = 0;
670
671 /*
672 * strategy:
673 * Allocate a buffer of size MAXBSIZE.
674 * Loop doing reads and writes, keeping track
675 * of the current uio offset.
676 * Give up at the first sign of trouble.
677 */
678
679 bzero(&uio, sizeof(uio));
680
681 uio.uio_td = td;
682 uio.uio_segflg = UIO_SYSSPACE;
683 uio.uio_offset = 0;
684
685 VOP_LEASE(fvp, td, cred, LEASE_READ);
686 VOP_LEASE(tvp, td, cred, LEASE_WRITE);
687
688 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
689
690 /* ugly loop follows... */
691 do {
692 off_t offset = uio.uio_offset;
693 int count;
694 int bufoffset;
695
696 /*
697 * Setup for big read.
698 */
699 uio.uio_iov = &iov;
700 uio.uio_iovcnt = 1;
701 iov.iov_base = buf;
702 iov.iov_len = MAXBSIZE;
703 uio.uio_resid = iov.iov_len;
704 uio.uio_rw = UIO_READ;
705
706 if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0)
707 break;
708
709 /*
710 * Get bytes read, handle read eof case and setup for
711 * write loop.
712 */
713 if ((count = MAXBSIZE - uio.uio_resid) == 0)
714 break;
715 bufoffset = 0;
716
717 /*
718 * Write until an error occurs or our buffer has been
719 * exhausted, then update the offset for the next read.
720 */
721 while (bufoffset < count) {
722 uio.uio_iov = &iov;
723 uio.uio_iovcnt = 1;
724 iov.iov_base = buf + bufoffset;
725 iov.iov_len = count - bufoffset;
726 uio.uio_offset = offset + bufoffset;
727 uio.uio_rw = UIO_WRITE;
728 uio.uio_resid = iov.iov_len;
729
730 if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0)
731 break;
732 bufoffset += (count - bufoffset) - uio.uio_resid;
733 }
734 uio.uio_offset = offset + bufoffset;
735 } while (error == 0);
736
737 free(buf, M_TEMP);
738 return (error);
739 }
740
741 /*
742 *
743 * un's vnode is assumed to be locked on entry and remains locked on exit.
744 */
745
746 int
747 union_copyup(un, docopy, cred, td)
748 struct union_node *un;
749 int docopy;
750 struct ucred *cred;
751 struct thread *td;
752 {
753 int error;
754 struct mount *mp;
755 struct vnode *lvp, *uvp;
756
757 /*
758 * If the user does not have read permission, the vnode should not
759 * be copied to upper layer.
760 */
761 vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, td);
762 error = VOP_ACCESS(un->un_lowervp, VREAD, cred, td);
763 VOP_UNLOCK(un->un_lowervp, 0, td);
764 if (error)
765 return (error);
766
767 if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0)
768 return (error);
769 if ((error = union_vn_create(&uvp, un, td)) != 0) {
770 vn_finished_write(mp);
771 return (error);
772 }
773
774 lvp = un->un_lowervp;
775
776 KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp)));
777 if (docopy) {
778 /*
779 * XX - should not ignore errors
780 * from VOP_CLOSE()
781 */
782 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td);
783 error = VOP_OPEN(lvp, FREAD, cred, td, -1);
784 if (error == 0) {
785 error = union_copyfile(lvp, uvp, cred, td);
786 VOP_UNLOCK(lvp, 0, td);
787 (void) VOP_CLOSE(lvp, FREAD, cred, td);
788 }
789 if (error == 0)
790 UDEBUG(("union: copied up %s\n", un->un_path));
791
792 }
793 VOP_UNLOCK(uvp, 0, td);
794 vn_finished_write(mp);
795 union_newupper(un, uvp);
796 KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp)));
797 union_vn_close(uvp, FWRITE, cred, td);
798 KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp)));
799 /*
800 * Subsequent IOs will go to the top layer, so
801 * call close on the lower vnode and open on the
802 * upper vnode to ensure that the filesystem keeps
803 * its references counts right. This doesn't do
804 * the right thing with (cred) and (FREAD) though.
805 * Ignoring error returns is not right, either.
806 */
807 if (error == 0) {
808 int i;
809
810 for (i = 0; i < un->un_openl; i++) {
811 (void) VOP_CLOSE(lvp, FREAD, cred, td);
812 (void) VOP_OPEN(uvp, FREAD, cred, td, -1);
813 }
814 un->un_openl = 0;
815 }
816
817 return (error);
818
819 }
820
821 /*
822 * union_relookup:
823 *
824 * dvp should be locked on entry and will be locked on return. No
825 * net change in the ref count will occur.
826 *
827 * If an error is returned, *vpp will be invalid, otherwise it
828 * will hold a locked, referenced vnode. If *vpp == dvp then
829 * remember that only one exclusive lock is held.
830 */
831
832 static int
833 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
834 struct union_mount *um;
835 struct vnode *dvp;
836 struct vnode **vpp;
837 struct componentname *cnp;
838 struct componentname *cn;
839 char *path;
840 int pathlen;
841 {
842 int error;
843
844 /*
845 * A new componentname structure must be faked up because
846 * there is no way to know where the upper level cnp came
847 * from or what it is being used for. This must duplicate
848 * some of the work done by NDINIT(), some of the work done
849 * by namei(), some of the work done by lookup() and some of
850 * the work done by VOP_LOOKUP() when given a CREATE flag.
851 * Conclusion: Horrible.
852 */
853 cn->cn_namelen = pathlen;
854 cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
855 bcopy(path, cn->cn_pnbuf, cn->cn_namelen);
856 cn->cn_pnbuf[cn->cn_namelen] = '\0';
857
858 cn->cn_nameiop = CREATE;
859 cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN);
860 cn->cn_thread = cnp->cn_thread;
861 if (um->um_op == UNMNT_ABOVE)
862 cn->cn_cred = cnp->cn_cred;
863 else
864 cn->cn_cred = um->um_cred;
865 cn->cn_nameptr = cn->cn_pnbuf;
866 cn->cn_consume = cnp->cn_consume;
867
868 VREF(dvp);
869 VOP_UNLOCK(dvp, 0, cnp->cn_thread);
870
871 /*
872 * Pass dvp unlocked and referenced on call to relookup().
873 *
874 * If an error occurs, dvp will be returned unlocked and dereferenced.
875 */
876
877 if ((error = relookup(dvp, vpp, cn)) != 0) {
878 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_thread);
879 return(error);
880 }
881
882 /*
883 * If no error occurs, dvp will be returned locked with the reference
884 * left as before, and vpp will be returned referenced and locked.
885 *
886 * We want to return with dvp as it was passed to us, so we get
887 * rid of our reference.
888 */
889 vrele(dvp);
890 return (0);
891 }
892
893 /*
894 * Create a shadow directory in the upper layer.
895 * The new vnode is returned locked.
896 *
897 * (um) points to the union mount structure for access to the
898 * the mounting process's credentials.
899 * (dvp) is the directory in which to create the shadow directory,
900 * It is locked (but not ref'd) on entry and return.
901 * (cnp) is the component name to be created.
902 * (vpp) is the returned newly created shadow directory, which
903 * is returned locked and ref'd
904 */
905 int
906 union_mkshadow(um, dvp, cnp, vpp)
907 struct union_mount *um;
908 struct vnode *dvp;
909 struct componentname *cnp;
910 struct vnode **vpp;
911 {
912 int error;
913 struct vattr va;
914 struct thread *td = cnp->cn_thread;
915 struct componentname cn;
916 struct mount *mp;
917
918 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
919 return (error);
920 if ((error = union_relookup(um, dvp, vpp, cnp, &cn,
921 cnp->cn_nameptr, cnp->cn_namelen)) != 0) {
922 vn_finished_write(mp);
923 return (error);
924 }
925
926 if (*vpp) {
927 if (cn.cn_flags & HASBUF) {
928 uma_zfree(namei_zone, cn.cn_pnbuf);
929 cn.cn_flags &= ~HASBUF;
930 }
931 if (dvp == *vpp)
932 vrele(*vpp);
933 else
934 vput(*vpp);
935 vn_finished_write(mp);
936 *vpp = NULLVP;
937 return (EEXIST);
938 }
939
940 /*
941 * Policy: when creating the shadow directory in the
942 * upper layer, create it owned by the user who did
943 * the mount, group from parent directory, and mode
944 * 777 modified by umask (ie mostly identical to the
945 * mkdir syscall). (jsp, kb)
946 */
947
948 VATTR_NULL(&va);
949 va.va_type = VDIR;
950 va.va_mode = um->um_cmode;
951
952 /* VOP_LEASE: dvp is locked */
953 VOP_LEASE(dvp, td, cn.cn_cred, LEASE_WRITE);
954
955 error = VOP_MKDIR(dvp, vpp, &cn, &va);
956 if (cn.cn_flags & HASBUF) {
957 uma_zfree(namei_zone, cn.cn_pnbuf);
958 cn.cn_flags &= ~HASBUF;
959 }
960 /*vput(dvp);*/
961 vn_finished_write(mp);
962 return (error);
963 }
964
965 /*
966 * Create a whiteout entry in the upper layer.
967 *
968 * (um) points to the union mount structure for access to the
969 * the mounting process's credentials.
970 * (dvp) is the directory in which to create the whiteout.
971 * It is locked on entry and return.
972 * (cnp) is the component name to be created.
973 */
974 int
975 union_mkwhiteout(um, dvp, cnp, path)
976 struct union_mount *um;
977 struct vnode *dvp;
978 struct componentname *cnp;
979 char *path;
980 {
981 int error;
982 struct thread *td = cnp->cn_thread;
983 struct vnode *wvp;
984 struct componentname cn;
985 struct mount *mp;
986
987 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
988 return (error);
989 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
990 if (error) {
991 vn_finished_write(mp);
992 return (error);
993 }
994
995 if (wvp) {
996 if (cn.cn_flags & HASBUF) {
997 uma_zfree(namei_zone, cn.cn_pnbuf);
998 cn.cn_flags &= ~HASBUF;
999 }
1000 if (wvp == dvp)
1001 vrele(wvp);
1002 else
1003 vput(wvp);
1004 vn_finished_write(mp);
1005 return (EEXIST);
1006 }
1007
1008 /* VOP_LEASE: dvp is locked */
1009 VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE);
1010
1011 error = VOP_WHITEOUT(dvp, &cn, CREATE);
1012 if (cn.cn_flags & HASBUF) {
1013 uma_zfree(namei_zone, cn.cn_pnbuf);
1014 cn.cn_flags &= ~HASBUF;
1015 }
1016 vn_finished_write(mp);
1017 return (error);
1018 }
1019
1020 /*
1021 * union_vn_create: creates and opens a new shadow file
1022 * on the upper union layer. This function is similar
1023 * in spirit to calling vn_open() but it avoids calling namei().
1024 * The problem with calling namei() is that a) it locks too many
1025 * things, and b) it doesn't start at the "right" directory,
1026 * whereas relookup() is told where to start.
1027 *
1028 * On entry, the vnode associated with un is locked. It remains locked
1029 * on return.
1030 *
1031 * If no error occurs, *vpp contains a locked referenced vnode for your
1032 * use. If an error occurs *vpp iis undefined.
1033 */
1034 static int
1035 union_vn_create(vpp, un, td)
1036 struct vnode **vpp;
1037 struct union_node *un;
1038 struct thread *td;
1039 {
1040 struct vnode *vp;
1041 struct ucred *cred = td->td_ucred;
1042 struct vattr vat;
1043 struct vattr *vap = &vat;
1044 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
1045 int error;
1046 int cmode;
1047 struct componentname cn;
1048
1049 *vpp = NULLVP;
1050 FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1051 cmode = UN_FILEMODE & ~td->td_proc->p_fd->fd_cmask;
1052 FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1053
1054 /*
1055 * Build a new componentname structure (for the same
1056 * reasons outlines in union_mkshadow()).
1057 * The difference here is that the file is owned by
1058 * the current user, rather than by the person who
1059 * did the mount, since the current user needs to be
1060 * able to write the file (that's why it is being
1061 * copied in the first place).
1062 */
1063 cn.cn_namelen = strlen(un->un_path);
1064 cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
1065 bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
1066 cn.cn_nameiop = CREATE;
1067 cn.cn_flags = ISOPEN|LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN;
1068 cn.cn_thread = td;
1069 cn.cn_cred = td->td_ucred;
1070 cn.cn_nameptr = cn.cn_pnbuf;
1071 cn.cn_consume = 0;
1072
1073 /*
1074 * Pass dvp unlocked and referenced on call to relookup().
1075 *
1076 * If an error occurs, dvp will be returned unlocked and dereferenced.
1077 */
1078 VREF(un->un_dirvp);
1079 error = relookup(un->un_dirvp, &vp, &cn);
1080 if (error)
1081 return (error);
1082
1083 /*
1084 * If no error occurs, dvp will be returned locked with the reference
1085 * left as before, and vpp will be returned referenced and locked.
1086 */
1087 if (vp) {
1088 vput(un->un_dirvp);
1089 if (cn.cn_flags & HASBUF) {
1090 uma_zfree(namei_zone, cn.cn_pnbuf);
1091 cn.cn_flags &= ~HASBUF;
1092 }
1093 if (vp == un->un_dirvp)
1094 vrele(vp);
1095 else
1096 vput(vp);
1097 return (EEXIST);
1098 }
1099
1100 /*
1101 * Good - there was no race to create the file
1102 * so go ahead and create it. The permissions
1103 * on the file will be 0666 modified by the
1104 * current user's umask. Access to the file, while
1105 * it is unioned, will require access to the top *and*
1106 * bottom files. Access when not unioned will simply
1107 * require access to the top-level file.
1108 * TODO: confirm choice of access permissions.
1109 */
1110 VATTR_NULL(vap);
1111 vap->va_type = VREG;
1112 vap->va_mode = cmode;
1113 VOP_LEASE(un->un_dirvp, td, cred, LEASE_WRITE);
1114 error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap);
1115 if (cn.cn_flags & HASBUF) {
1116 uma_zfree(namei_zone, cn.cn_pnbuf);
1117 cn.cn_flags &= ~HASBUF;
1118 }
1119 vput(un->un_dirvp);
1120 if (error)
1121 return (error);
1122
1123 error = VOP_OPEN(vp, fmode, cred, td, -1);
1124 if (error) {
1125 vput(vp);
1126 return (error);
1127 }
1128 vp->v_writecount++;
1129 *vpp = vp;
1130 return (0);
1131 }
1132
1133 static int
1134 union_vn_close(vp, fmode, cred, td)
1135 struct vnode *vp;
1136 int fmode;
1137 struct ucred *cred;
1138 struct thread *td;
1139 {
1140
1141 if (fmode & FWRITE)
1142 --vp->v_writecount;
1143 return (VOP_CLOSE(vp, fmode, cred, td));
1144 }
1145
1146 /*
1147 * union_removed_upper:
1148 *
1149 * An upper-only file/directory has been removed; un-cache it so
1150 * that unionfs vnode gets reclaimed and the last uppervp reference
1151 * disappears.
1152 *
1153 * Called with union_node unlocked.
1154 */
1155
1156 void
1157 union_removed_upper(un)
1158 struct union_node *un;
1159 {
1160 if (un->un_flags & UN_CACHED) {
1161 int hash = UNION_HASH(un->un_uppervp, un->un_lowervp);
1162
1163 while (union_list_lock(hash))
1164 continue;
1165 un->un_flags &= ~UN_CACHED;
1166 LIST_REMOVE(un, un_cache);
1167 union_list_unlock(hash);
1168 }
1169 }
1170
1171 /*
1172 * Determine whether a whiteout is needed
1173 * during a remove/rmdir operation.
1174 */
1175 int
1176 union_dowhiteout(un, cred, td)
1177 struct union_node *un;
1178 struct ucred *cred;
1179 struct thread *td;
1180 {
1181 struct vattr va;
1182
1183 if (un->un_lowervp != NULLVP)
1184 return (1);
1185
1186 if (VOP_GETATTR(un->un_uppervp, &va, cred, td) == 0 &&
1187 (va.va_flags & OPAQUE))
1188 return (1);
1189
1190 return (0);
1191 }
1192
1193 static void
1194 union_dircache_r(vp, vppp, cntp)
1195 struct vnode *vp;
1196 struct vnode ***vppp;
1197 int *cntp;
1198 {
1199 struct union_node *un;
1200
1201 if (vp->v_op != &union_vnodeops) {
1202 if (vppp) {
1203 VREF(vp);
1204 *(*vppp)++ = vp;
1205 if (--(*cntp) == 0)
1206 panic("union: dircache table too small");
1207 } else {
1208 (*cntp)++;
1209 }
1210 } else {
1211 un = VTOUNION(vp);
1212 if (un->un_uppervp != NULLVP)
1213 union_dircache_r(un->un_uppervp, vppp, cntp);
1214 if (un->un_lowervp != NULLVP)
1215 union_dircache_r(un->un_lowervp, vppp, cntp);
1216 }
1217 }
1218
1219 struct vnode *
1220 union_dircache_get(vp, td)
1221 struct vnode *vp;
1222 struct thread *td;
1223 {
1224 int cnt;
1225 struct vnode *nvp;
1226 struct vnode **vpp;
1227 struct vnode **dircache, **newdircache;
1228 struct union_node *un;
1229 int error;
1230
1231 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1232 un = VTOUNION(vp);
1233 dircache = un->un_dircache;
1234 newdircache = NULL;
1235
1236 nvp = NULLVP;
1237
1238 if (dircache == NULL) {
1239 cnt = 0;
1240 union_dircache_r(vp, 0, &cnt);
1241 cnt++;
1242 newdircache = dircache = malloc(cnt * sizeof(struct vnode *),
1243 M_UNDCACHE, M_WAITOK);
1244 vpp = dircache;
1245 union_dircache_r(vp, &vpp, &cnt);
1246 *vpp = NULLVP;
1247 vpp = dircache + 1;
1248 } else {
1249 vpp = dircache;
1250 do {
1251 if (*vpp++ == un->un_uppervp)
1252 break;
1253 } while (*vpp != NULLVP);
1254 }
1255
1256 if (*vpp == NULLVP)
1257 goto out;
1258
1259 /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td);*/
1260 UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? vrefcnt(*vpp) : -99)));
1261 VREF(*vpp);
1262 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0);
1263 UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? vrefcnt(*vpp) : -99)));
1264 if (error)
1265 goto out;
1266
1267 un->un_dircache = NULL;
1268 VTOUNION(nvp)->un_dircache = dircache;
1269 newdircache = NULL;
1270
1271 out:
1272 /*
1273 * If we allocated a new dircache and couldn't attach
1274 * it to a new vp, free the resources we allocated.
1275 */
1276 if (newdircache) {
1277 for (vpp = newdircache; *vpp != NULLVP; vpp++)
1278 vrele(*vpp);
1279 free(newdircache, M_UNDCACHE);
1280 }
1281
1282 VOP_UNLOCK(vp, 0, td);
1283 return (nvp);
1284 }
1285
1286 void
1287 union_dircache_free(struct union_node *un)
1288 {
1289 struct vnode **vpp;
1290
1291 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1292 vrele(*vpp);
1293 free(un->un_dircache, M_UNDCACHE);
1294 un->un_dircache = NULL;
1295 }
1296
1297 /*
1298 * Module glue to remove #ifdef UNION from vfs_syscalls.c
1299 */
1300 static int
1301 union_dircheck(struct thread *td, struct vnode **vp, struct file *fp)
1302 {
1303 int error = 0;
1304
1305 if ((*vp)->v_op == &union_vnodeops) {
1306 struct vnode *lvp;
1307
1308 lvp = union_dircache_get(*vp, td);
1309 if (lvp != NULLVP) {
1310 struct vattr va;
1311
1312 /*
1313 * If the directory is opaque,
1314 * then don't show lower entries
1315 */
1316 error = VOP_GETATTR(*vp, &va, fp->f_cred, td);
1317 if (va.va_flags & OPAQUE) {
1318 vput(lvp);
1319 lvp = NULLVP;
1320 }
1321 }
1322
1323 if (lvp != NULLVP) {
1324 error = VOP_OPEN(lvp, FREAD, fp->f_cred, td, -1);
1325 if (error) {
1326 vput(lvp);
1327 return (error);
1328 }
1329 VOP_UNLOCK(lvp, 0, td);
1330 FILE_LOCK(fp);
1331 fp->f_vnode = lvp;
1332 fp->f_data = lvp;
1333 fp->f_offset = 0;
1334 FILE_UNLOCK(fp);
1335 error = vn_close(*vp, FREAD, fp->f_cred, td);
1336 if (error)
1337 return (error);
1338 *vp = lvp;
1339 return -1; /* goto unionread */
1340 }
1341 }
1342 return error;
1343 }
1344
1345 static int
1346 union_modevent(module_t mod, int type, void *data)
1347 {
1348 switch (type) {
1349 case MOD_LOAD:
1350 union_dircheckp = union_dircheck;
1351 break;
1352 case MOD_UNLOAD:
1353 union_dircheckp = NULL;
1354 break;
1355 default:
1356 return EOPNOTSUPP;
1357 break;
1358 }
1359 return 0;
1360 }
1361
1362 static moduledata_t union_mod = {
1363 "union_dircheck",
1364 union_modevent,
1365 NULL
1366 };
1367
1368 DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY);
Cache object: 7aaeae0f4669a2fb9c6c9473d01f545e
|