1 /*-
2 * Copyright (c) 1994 Jan-Simon Pendry
3 * Copyright (c) 1994
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Jan-Simon Pendry.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
34 * $FreeBSD$
35 */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/fcntl.h>
40 #include <sys/file.h>
41 #include <sys/filedesc.h>
42 #include <sys/kernel.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/module.h>
46 #include <sys/mount.h>
47 #include <sys/mutex.h>
48 #include <sys/namei.h>
49 #include <sys/stat.h>
50 #include <sys/vnode.h>
51
52 #include <vm/vm.h>
53 #include <vm/vm_extern.h> /* for vnode_pager_setsize */
54 #include <vm/vm_object.h> /* for vm cache coherency */
55 #include <vm/uma.h>
56
57 #include <fs/unionfs/union.h>
58
59 #include <sys/proc.h>
60
61 extern int union_init(void);
62
63 /* must be power of two, otherwise change UNION_HASH() */
64 #define NHASH 32
65
66 /* unsigned int ... */
67 #define UNION_HASH(u, l) \
68 (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1))
69
70 static MALLOC_DEFINE(M_UNPATH, "unpath", "UNION path component");
71 static MALLOC_DEFINE(M_UNDCACHE, "undcac", "UNION directory cache");
72
73 static LIST_HEAD(unhead, union_node) unhead[NHASH];
74 static int unvplock[NHASH];
75
76 static void union_dircache_r(struct vnode *vp, struct vnode ***vppp,
77 int *cntp);
78 static int union_list_lock(int ix);
79 static void union_list_unlock(int ix);
80 static int union_relookup(struct union_mount *um, struct vnode *dvp,
81 struct vnode **vpp,
82 struct componentname *cnp,
83 struct componentname *cn, char *path,
84 int pathlen);
85 static void union_updatevp(struct union_node *un,
86 struct vnode *uppervp,
87 struct vnode *lowervp);
88 static void union_newlower(struct union_node *, struct vnode *);
89 static void union_newupper(struct union_node *, struct vnode *);
90 static int union_copyfile(struct vnode *, struct vnode *,
91 struct ucred *, struct thread *);
92 static int union_vn_create(struct vnode **, struct union_node *,
93 struct thread *);
94 static int union_vn_close(struct vnode *, int, struct ucred *,
95 struct thread *);
96
97 int
98 union_init()
99 {
100 int i;
101
102 for (i = 0; i < NHASH; i++)
103 LIST_INIT(&unhead[i]);
104 bzero((caddr_t)unvplock, sizeof(unvplock));
105 return (0);
106 }
107
108 static int
109 union_list_lock(ix)
110 int ix;
111 {
112 if (unvplock[ix] & UNVP_LOCKED) {
113 unvplock[ix] |= UNVP_WANT;
114 (void) tsleep( &unvplock[ix], PINOD, "unllck", 0);
115 return (1);
116 }
117 unvplock[ix] |= UNVP_LOCKED;
118 return (0);
119 }
120
121 static void
122 union_list_unlock(ix)
123 int ix;
124 {
125 unvplock[ix] &= ~UNVP_LOCKED;
126
127 if (unvplock[ix] & UNVP_WANT) {
128 unvplock[ix] &= ~UNVP_WANT;
129 wakeup( &unvplock[ix]);
130 }
131 }
132
133 /*
134 * union_updatevp:
135 *
136 * The uppervp, if not NULL, must be referenced and not locked by us
137 * The lowervp, if not NULL, must be referenced.
138 *
139 * If uppervp and lowervp match pointers already installed, then
140 * nothing happens. The passed vp's (when matching) are not adjusted.
141 *
142 * This routine may only be called by union_newupper() and
143 * union_newlower().
144 */
145
146 static void
147 union_updatevp(un, uppervp, lowervp)
148 struct union_node *un;
149 struct vnode *uppervp;
150 struct vnode *lowervp;
151 {
152 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
153 int nhash = UNION_HASH(uppervp, lowervp);
154 int docache = (lowervp != NULLVP || uppervp != NULLVP);
155 int lhash, uhash;
156
157 /*
158 * Ensure locking is ordered from lower to higher
159 * to avoid deadlocks.
160 */
161 if (nhash < ohash) {
162 lhash = nhash;
163 uhash = ohash;
164 } else {
165 lhash = ohash;
166 uhash = nhash;
167 }
168
169 if (lhash != uhash) {
170 while (union_list_lock(lhash))
171 continue;
172 }
173
174 while (union_list_lock(uhash))
175 continue;
176
177 if (ohash != nhash || !docache) {
178 if (un->un_flags & UN_CACHED) {
179 un->un_flags &= ~UN_CACHED;
180 LIST_REMOVE(un, un_cache);
181 }
182 }
183
184 if (ohash != nhash)
185 union_list_unlock(ohash);
186
187 if (un->un_lowervp != lowervp) {
188 if (un->un_lowervp) {
189 vrele(un->un_lowervp);
190 if (un->un_path) {
191 free(un->un_path, M_UNPATH);
192 un->un_path = 0;
193 }
194 }
195 un->un_lowervp = lowervp;
196 un->un_lowersz = VNOVAL;
197 }
198
199 if (un->un_uppervp != uppervp) {
200 if (un->un_uppervp)
201 vrele(un->un_uppervp);
202 un->un_uppervp = uppervp;
203 un->un_uppersz = VNOVAL;
204 }
205
206 if (docache && (ohash != nhash)) {
207 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
208 un->un_flags |= UN_CACHED;
209 }
210
211 union_list_unlock(nhash);
212 }
213
214 /*
215 * Set a new lowervp. The passed lowervp must be referenced and will be
216 * stored in the vp in a referenced state.
217 */
218
219 static void
220 union_newlower(un, lowervp)
221 struct union_node *un;
222 struct vnode *lowervp;
223 {
224 union_updatevp(un, un->un_uppervp, lowervp);
225 }
226
227 /*
228 * Set a new uppervp. The passed uppervp must be locked and will be
229 * stored in the vp in a locked state. The caller should not unlock
230 * uppervp.
231 */
232
233 static void
234 union_newupper(un, uppervp)
235 struct union_node *un;
236 struct vnode *uppervp;
237 {
238 union_updatevp(un, uppervp, un->un_lowervp);
239 }
240
241 /*
242 * Keep track of size changes in the underlying vnodes.
243 * If the size changes, then callback to the vm layer
244 * giving priority to the upper layer size.
245 */
246 void
247 union_newsize(vp, uppersz, lowersz)
248 struct vnode *vp;
249 off_t uppersz, lowersz;
250 {
251 struct union_node *un;
252 off_t sz;
253
254 /* only interested in regular files */
255 if (vp->v_type != VREG)
256 return;
257
258 un = VTOUNION(vp);
259 sz = VNOVAL;
260
261 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
262 un->un_uppersz = uppersz;
263 if (sz == VNOVAL)
264 sz = un->un_uppersz;
265 }
266
267 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
268 un->un_lowersz = lowersz;
269 if (sz == VNOVAL)
270 sz = un->un_lowersz;
271 }
272
273 if (sz != VNOVAL) {
274 UDEBUG(("union: %s size now %ld\n",
275 (uppersz != VNOVAL ? "upper" : "lower"), (long)sz));
276 /*
277 * There is no need to change size of non-existent object.
278 */
279 /* vnode_pager_setsize(vp, sz); */
280 }
281 }
282
283 /*
284 * union_allocvp: allocate a union_node and associate it with a
285 * parent union_node and one or two vnodes.
286 *
287 * vpp Holds the returned vnode locked and referenced if no
288 * error occurs.
289 *
290 * mp Holds the mount point. mp may or may not be busied.
291 * allocvp() makes no changes to mp.
292 *
293 * dvp Holds the parent union_node to the one we wish to create.
294 * XXX may only be used to traverse an uncopied lowervp-based
295 * tree? XXX
296 *
297 * dvp may or may not be locked. allocvp() makes no changes
298 * to dvp.
299 *
300 * upperdvp Holds the parent vnode to uppervp, generally used along
301 * with path component information to create a shadow of
302 * lowervp when uppervp does not exist.
303 *
304 * upperdvp is referenced but unlocked on entry, and will be
305 * dereferenced on return.
306 *
307 * uppervp Holds the new uppervp vnode to be stored in the
308 * union_node we are allocating. uppervp is referenced but
309 * not locked, and will be dereferenced on return.
310 *
311 * lowervp Holds the new lowervp vnode to be stored in the
312 * union_node we are allocating. lowervp is referenced but
313 * not locked, and will be dereferenced on return.
314 *
315 * cnp Holds path component information to be coupled with
316 * lowervp and upperdvp to allow unionfs to create an uppervp
317 * later on. Only used if lowervp is valid. The contents
318 * of cnp is only valid for the duration of the call.
319 *
320 * docache Determine whether this node should be entered in the
321 * cache or whether it should be destroyed as soon as possible.
322 *
323 * All union_nodes are maintained on a singly-linked
324 * list. New nodes are only allocated when they cannot
325 * be found on this list. Entries on the list are
326 * removed when the vfs reclaim entry is called.
327 *
328 * A single lock is kept for the entire list. This is
329 * needed because the getnewvnode() function can block
330 * waiting for a vnode to become free, in which case there
331 * may be more than one process trying to get the same
332 * vnode. This lock is only taken if we are going to
333 * call getnewvnode(), since the kernel itself is single-threaded.
334 *
335 * If an entry is found on the list, then call vget() to
336 * take a reference. This is done because there may be
337 * zero references to it and so it needs to removed from
338 * the vnode free list.
339 */
340
341 int
342 union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache)
343 struct vnode **vpp;
344 struct mount *mp;
345 struct vnode *dvp; /* parent union vnode */
346 struct vnode *upperdvp; /* parent vnode of uppervp */
347 struct componentname *cnp; /* may be null */
348 struct vnode *uppervp; /* may be null */
349 struct vnode *lowervp; /* may be null */
350 int docache;
351 {
352 int error;
353 struct union_node *un = 0;
354 struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
355 struct thread *td = (cnp) ? cnp->cn_thread : curthread;
356 int hash = 0;
357 int vflag;
358 int try;
359
360 if (uppervp == NULLVP && lowervp == NULLVP)
361 panic("union: unidentifiable allocation");
362
363 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
364 vrele(lowervp);
365 lowervp = NULLVP;
366 }
367
368 /* detect the root vnode (and aliases) */
369 vflag = 0;
370 if ((uppervp == um->um_uppervp) &&
371 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
372 if (lowervp == NULLVP) {
373 lowervp = um->um_lowervp;
374 if (lowervp != NULLVP)
375 VREF(lowervp);
376 }
377 vflag = VV_ROOT;
378 }
379
380 loop:
381 if (!docache) {
382 un = 0;
383 } else for (try = 0; try < 3; try++) {
384 switch (try) {
385 case 0:
386 if (lowervp == NULLVP)
387 continue;
388 hash = UNION_HASH(uppervp, lowervp);
389 break;
390
391 case 1:
392 if (uppervp == NULLVP)
393 continue;
394 hash = UNION_HASH(uppervp, NULLVP);
395 break;
396
397 case 2:
398 if (lowervp == NULLVP)
399 continue;
400 hash = UNION_HASH(NULLVP, lowervp);
401 break;
402 }
403
404 while (union_list_lock(hash))
405 continue;
406
407 LIST_FOREACH(un, &unhead[hash], un_cache) {
408 if ((un->un_lowervp == lowervp ||
409 un->un_lowervp == NULLVP) &&
410 (un->un_uppervp == uppervp ||
411 un->un_uppervp == NULLVP) &&
412 (UNIONTOV(un)->v_mount == mp)) {
413 if (vget(UNIONTOV(un), 0,
414 cnp ? cnp->cn_thread : NULL)) {
415 union_list_unlock(hash);
416 goto loop;
417 }
418 break;
419 }
420 }
421
422 union_list_unlock(hash);
423
424 if (un)
425 break;
426 }
427
428 if (un) {
429 /*
430 * Obtain a lock on the union_node. Everything is unlocked
431 * except for dvp, so check that case. If they match, our
432 * new un is already locked. Otherwise we have to lock our
433 * new un.
434 *
435 * A potential deadlock situation occurs when we are holding
436 * one lock while trying to get another. We must follow
437 * strict ordering rules to avoid it. We try to locate dvp
438 * by scanning up from un_vnode, since the most likely
439 * scenario is un being under dvp.
440 */
441
442 if (dvp && un->un_vnode != dvp) {
443 struct vnode *scan = un->un_vnode;
444
445 do {
446 scan = VTOUNION(scan)->un_pvp;
447 } while (scan && scan->v_op == union_vnodeop_p &&
448 scan != dvp);
449 if (scan != dvp) {
450 /*
451 * our new un is above dvp (we never saw dvp
452 * while moving up the tree).
453 */
454 VREF(dvp);
455 VOP_UNLOCK(dvp, 0, td);
456 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td);
457 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
458 vrele(dvp);
459 } else {
460 /*
461 * our new un is under dvp
462 */
463 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td);
464 }
465 } else if (dvp == NULLVP) {
466 /*
467 * dvp is NULL, we need to lock un.
468 */
469 error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td);
470 } else {
471 /*
472 * dvp == un->un_vnode, we are already locked.
473 */
474 error = 0;
475 }
476
477 if (error)
478 goto loop;
479
480 /*
481 * At this point, the union_node is locked and referenced.
482 *
483 * uppervp is locked and referenced or NULL, lowervp is
484 * referenced or NULL.
485 */
486 UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n",
487 un, un->un_vnode, un->un_uppervp,
488 (un->un_uppervp ? vrefcnt(un->un_uppervp) : -99),
489 uppervp,
490 (uppervp ? vrefcnt(uppervp) : -99)
491 ));
492
493 if (uppervp != un->un_uppervp) {
494 KASSERT(uppervp == NULL || vrefcnt(uppervp) > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", vrefcnt(uppervp)));
495 union_newupper(un, uppervp);
496 } else if (uppervp) {
497 KASSERT(vrefcnt(uppervp) > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", vrefcnt(uppervp)));
498 vrele(uppervp);
499 }
500
501 /*
502 * Save information about the lower layer.
503 * This needs to keep track of pathname
504 * and directory information which union_vn_create()
505 * might need.
506 */
507 if (lowervp != un->un_lowervp) {
508 union_newlower(un, lowervp);
509 if (cnp && (lowervp != NULLVP)) {
510 un->un_path = malloc(cnp->cn_namelen+1,
511 M_UNPATH, M_WAITOK);
512 bcopy(cnp->cn_nameptr, un->un_path,
513 cnp->cn_namelen);
514 un->un_path[cnp->cn_namelen] = '\0';
515 }
516 } else if (lowervp) {
517 vrele(lowervp);
518 }
519
520 /*
521 * and upperdvp
522 */
523 if (upperdvp != un->un_dirvp) {
524 if (un->un_dirvp)
525 vrele(un->un_dirvp);
526 un->un_dirvp = upperdvp;
527 } else if (upperdvp) {
528 vrele(upperdvp);
529 }
530
531 *vpp = UNIONTOV(un);
532 return (0);
533 }
534
535 if (docache) {
536 /*
537 * Otherwise lock the vp list while we call getnewvnode()
538 * since that can block.
539 */
540 hash = UNION_HASH(uppervp, lowervp);
541
542 if (union_list_lock(hash))
543 goto loop;
544 }
545
546 /*
547 * Create new node rather than replace old node.
548 */
549
550 error = getnewvnode("union", mp, union_vnodeop_p, vpp);
551 if (error) {
552 /*
553 * If an error occurs, clear out vnodes.
554 */
555 if (lowervp)
556 vrele(lowervp);
557 if (uppervp)
558 vrele(uppervp);
559 if (upperdvp)
560 vrele(upperdvp);
561 *vpp = NULL;
562 goto out;
563 }
564
565 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
566 M_TEMP, M_WAITOK);
567
568 ASSERT_VOP_LOCKED(*vpp, "union_allocvp");
569 (*vpp)->v_vflag |= vflag;
570 if (uppervp)
571 (*vpp)->v_type = uppervp->v_type;
572 else
573 (*vpp)->v_type = lowervp->v_type;
574
575 un = VTOUNION(*vpp);
576 bzero(un, sizeof(*un));
577
578 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td);
579
580 un->un_vnode = *vpp;
581 un->un_uppervp = uppervp;
582 un->un_uppersz = VNOVAL;
583 un->un_lowervp = lowervp;
584 un->un_lowersz = VNOVAL;
585 un->un_dirvp = upperdvp;
586 un->un_pvp = dvp; /* only parent dir in new allocation */
587 if (dvp != NULLVP)
588 VREF(dvp);
589 un->un_dircache = NULL;
590 un->un_openl = 0;
591
592 if (cnp && (lowervp != NULLVP)) {
593 un->un_path = malloc(cnp->cn_namelen+1, M_UNPATH, M_WAITOK);
594 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
595 un->un_path[cnp->cn_namelen] = '\0';
596 } else {
597 un->un_path = NULL;
598 un->un_dirvp = NULL;
599 }
600
601 if (docache) {
602 LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
603 un->un_flags |= UN_CACHED;
604 }
605
606 out:
607 if (docache)
608 union_list_unlock(hash);
609
610 return (error);
611 }
612
613 int
614 union_freevp(vp)
615 struct vnode *vp;
616 {
617 struct union_node *un = VTOUNION(vp);
618
619 if (un->un_flags & UN_CACHED) {
620 un->un_flags &= ~UN_CACHED;
621 LIST_REMOVE(un, un_cache);
622 }
623
624 if (un->un_pvp != NULLVP) {
625 vrele(un->un_pvp);
626 un->un_pvp = NULL;
627 }
628 if (un->un_uppervp != NULLVP) {
629 vrele(un->un_uppervp);
630 un->un_uppervp = NULL;
631 }
632 if (un->un_lowervp != NULLVP) {
633 vrele(un->un_lowervp);
634 un->un_lowervp = NULL;
635 }
636 if (un->un_dirvp != NULLVP) {
637 vrele(un->un_dirvp);
638 un->un_dirvp = NULL;
639 }
640 if (un->un_path) {
641 free(un->un_path, M_UNPATH);
642 un->un_path = NULL;
643 }
644
645 FREE(vp->v_data, M_TEMP);
646 vp->v_data = 0;
647
648 return (0);
649 }
650
651 /*
652 * copyfile. Copy the vnode (fvp) to the vnode (tvp)
653 * using a sequence of reads and writes. Both (fvp)
654 * and (tvp) are locked on entry and exit.
655 *
656 * fvp and tvp are both exclusive locked on call, but their refcount's
657 * haven't been bumped at all.
658 */
659 static int
660 union_copyfile(fvp, tvp, cred, td)
661 struct vnode *fvp;
662 struct vnode *tvp;
663 struct ucred *cred;
664 struct thread *td;
665 {
666 char *buf;
667 struct uio uio;
668 struct iovec iov;
669 int error = 0;
670
671 /*
672 * strategy:
673 * Allocate a buffer of size MAXBSIZE.
674 * Loop doing reads and writes, keeping track
675 * of the current uio offset.
676 * Give up at the first sign of trouble.
677 */
678
679 bzero(&uio, sizeof(uio));
680
681 uio.uio_td = td;
682 uio.uio_segflg = UIO_SYSSPACE;
683 uio.uio_offset = 0;
684
685 VOP_LEASE(fvp, td, cred, LEASE_READ);
686 VOP_LEASE(tvp, td, cred, LEASE_WRITE);
687
688 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
689
690 /* ugly loop follows... */
691 do {
692 off_t offset = uio.uio_offset;
693 int count;
694 int bufoffset;
695
696 /*
697 * Setup for big read.
698 */
699 uio.uio_iov = &iov;
700 uio.uio_iovcnt = 1;
701 iov.iov_base = buf;
702 iov.iov_len = MAXBSIZE;
703 uio.uio_resid = iov.iov_len;
704 uio.uio_rw = UIO_READ;
705
706 if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0)
707 break;
708
709 /*
710 * Get bytes read, handle read eof case and setup for
711 * write loop.
712 */
713 if ((count = MAXBSIZE - uio.uio_resid) == 0)
714 break;
715 bufoffset = 0;
716
717 /*
718 * Write until an error occurs or our buffer has been
719 * exhausted, then update the offset for the next read.
720 */
721 while (bufoffset < count) {
722 uio.uio_iov = &iov;
723 uio.uio_iovcnt = 1;
724 iov.iov_base = buf + bufoffset;
725 iov.iov_len = count - bufoffset;
726 uio.uio_offset = offset + bufoffset;
727 uio.uio_rw = UIO_WRITE;
728 uio.uio_resid = iov.iov_len;
729
730 if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0)
731 break;
732 bufoffset += (count - bufoffset) - uio.uio_resid;
733 }
734 uio.uio_offset = offset + bufoffset;
735 } while (error == 0);
736
737 free(buf, M_TEMP);
738 return (error);
739 }
740
741 /*
742 *
743 * un's vnode is assumed to be locked on entry and remains locked on exit.
744 */
745
746 int
747 union_copyup(un, docopy, cred, td)
748 struct union_node *un;
749 int docopy;
750 struct ucred *cred;
751 struct thread *td;
752 {
753 int error;
754 struct mount *mp;
755 struct vnode *lvp, *uvp;
756
757 /*
758 * If the user does not have read permission, the vnode should not
759 * be copied to upper layer.
760 */
761 vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, td);
762 error = VOP_ACCESS(un->un_lowervp, VREAD, cred, td);
763 VOP_UNLOCK(un->un_lowervp, 0, td);
764 if (error)
765 return (error);
766
767 if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0)
768 return (error);
769 if ((error = union_vn_create(&uvp, un, td)) != 0) {
770 vn_finished_write(mp);
771 return (error);
772 }
773
774 lvp = un->un_lowervp;
775
776 KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp)));
777 if (docopy) {
778 /*
779 * XX - should not ignore errors
780 * from VOP_CLOSE()
781 */
782 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td);
783 error = VOP_OPEN(lvp, FREAD, cred, td, -1);
784 if (error == 0 && vn_canvmio(lvp) == TRUE)
785 error = vfs_object_create(lvp, td, cred);
786 if (error == 0) {
787 error = union_copyfile(lvp, uvp, cred, td);
788 VOP_UNLOCK(lvp, 0, td);
789 (void) VOP_CLOSE(lvp, FREAD, cred, td);
790 }
791 if (error == 0)
792 UDEBUG(("union: copied up %s\n", un->un_path));
793
794 }
795 VOP_UNLOCK(uvp, 0, td);
796 vn_finished_write(mp);
797 union_newupper(un, uvp);
798 KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp)));
799 union_vn_close(uvp, FWRITE, cred, td);
800 KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp)));
801 /*
802 * Subsequent IOs will go to the top layer, so
803 * call close on the lower vnode and open on the
804 * upper vnode to ensure that the filesystem keeps
805 * its references counts right. This doesn't do
806 * the right thing with (cred) and (FREAD) though.
807 * Ignoring error returns is not right, either.
808 */
809 if (error == 0) {
810 int i;
811
812 for (i = 0; i < un->un_openl; i++) {
813 (void) VOP_CLOSE(lvp, FREAD, cred, td);
814 (void) VOP_OPEN(uvp, FREAD, cred, td, -1);
815 }
816 if (un->un_openl) {
817 if (vn_canvmio(uvp) == TRUE)
818 error = vfs_object_create(uvp, td, cred);
819 }
820 un->un_openl = 0;
821 }
822
823 return (error);
824
825 }
826
827 /*
828 * union_relookup:
829 *
830 * dvp should be locked on entry and will be locked on return. No
831 * net change in the ref count will occur.
832 *
833 * If an error is returned, *vpp will be invalid, otherwise it
834 * will hold a locked, referenced vnode. If *vpp == dvp then
835 * remember that only one exclusive lock is held.
836 */
837
838 static int
839 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
840 struct union_mount *um;
841 struct vnode *dvp;
842 struct vnode **vpp;
843 struct componentname *cnp;
844 struct componentname *cn;
845 char *path;
846 int pathlen;
847 {
848 int error;
849
850 /*
851 * A new componentname structure must be faked up because
852 * there is no way to know where the upper level cnp came
853 * from or what it is being used for. This must duplicate
854 * some of the work done by NDINIT(), some of the work done
855 * by namei(), some of the work done by lookup() and some of
856 * the work done by VOP_LOOKUP() when given a CREATE flag.
857 * Conclusion: Horrible.
858 */
859 cn->cn_namelen = pathlen;
860 cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
861 bcopy(path, cn->cn_pnbuf, cn->cn_namelen);
862 cn->cn_pnbuf[cn->cn_namelen] = '\0';
863
864 cn->cn_nameiop = CREATE;
865 cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN);
866 cn->cn_thread = cnp->cn_thread;
867 if (um->um_op == UNMNT_ABOVE)
868 cn->cn_cred = cnp->cn_cred;
869 else
870 cn->cn_cred = um->um_cred;
871 cn->cn_nameptr = cn->cn_pnbuf;
872 cn->cn_consume = cnp->cn_consume;
873
874 VREF(dvp);
875 VOP_UNLOCK(dvp, 0, cnp->cn_thread);
876
877 /*
878 * Pass dvp unlocked and referenced on call to relookup().
879 *
880 * If an error occurs, dvp will be returned unlocked and dereferenced.
881 */
882
883 if ((error = relookup(dvp, vpp, cn)) != 0) {
884 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_thread);
885 return(error);
886 }
887
888 /*
889 * If no error occurs, dvp will be returned locked with the reference
890 * left as before, and vpp will be returned referenced and locked.
891 *
892 * We want to return with dvp as it was passed to us, so we get
893 * rid of our reference.
894 */
895 vrele(dvp);
896 return (0);
897 }
898
899 /*
900 * Create a shadow directory in the upper layer.
901 * The new vnode is returned locked.
902 *
903 * (um) points to the union mount structure for access to the
904 * the mounting process's credentials.
905 * (dvp) is the directory in which to create the shadow directory,
906 * It is locked (but not ref'd) on entry and return.
907 * (cnp) is the component name to be created.
908 * (vpp) is the returned newly created shadow directory, which
909 * is returned locked and ref'd
910 */
911 int
912 union_mkshadow(um, dvp, cnp, vpp)
913 struct union_mount *um;
914 struct vnode *dvp;
915 struct componentname *cnp;
916 struct vnode **vpp;
917 {
918 int error;
919 struct vattr va;
920 struct thread *td = cnp->cn_thread;
921 struct componentname cn;
922 struct mount *mp;
923
924 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
925 return (error);
926 if ((error = union_relookup(um, dvp, vpp, cnp, &cn,
927 cnp->cn_nameptr, cnp->cn_namelen)) != 0) {
928 vn_finished_write(mp);
929 return (error);
930 }
931
932 if (*vpp) {
933 if (cn.cn_flags & HASBUF) {
934 uma_zfree(namei_zone, cn.cn_pnbuf);
935 cn.cn_flags &= ~HASBUF;
936 }
937 if (dvp == *vpp)
938 vrele(*vpp);
939 else
940 vput(*vpp);
941 vn_finished_write(mp);
942 *vpp = NULLVP;
943 return (EEXIST);
944 }
945
946 /*
947 * Policy: when creating the shadow directory in the
948 * upper layer, create it owned by the user who did
949 * the mount, group from parent directory, and mode
950 * 777 modified by umask (ie mostly identical to the
951 * mkdir syscall). (jsp, kb)
952 */
953
954 VATTR_NULL(&va);
955 va.va_type = VDIR;
956 va.va_mode = um->um_cmode;
957
958 /* VOP_LEASE: dvp is locked */
959 VOP_LEASE(dvp, td, cn.cn_cred, LEASE_WRITE);
960
961 error = VOP_MKDIR(dvp, vpp, &cn, &va);
962 if (cn.cn_flags & HASBUF) {
963 uma_zfree(namei_zone, cn.cn_pnbuf);
964 cn.cn_flags &= ~HASBUF;
965 }
966 /*vput(dvp);*/
967 vn_finished_write(mp);
968 return (error);
969 }
970
971 /*
972 * Create a whiteout entry in the upper layer.
973 *
974 * (um) points to the union mount structure for access to the
975 * the mounting process's credentials.
976 * (dvp) is the directory in which to create the whiteout.
977 * It is locked on entry and return.
978 * (cnp) is the component name to be created.
979 */
980 int
981 union_mkwhiteout(um, dvp, cnp, path)
982 struct union_mount *um;
983 struct vnode *dvp;
984 struct componentname *cnp;
985 char *path;
986 {
987 int error;
988 struct thread *td = cnp->cn_thread;
989 struct vnode *wvp;
990 struct componentname cn;
991 struct mount *mp;
992
993 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
994 return (error);
995 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
996 if (error) {
997 vn_finished_write(mp);
998 return (error);
999 }
1000
1001 if (wvp) {
1002 if (cn.cn_flags & HASBUF) {
1003 uma_zfree(namei_zone, cn.cn_pnbuf);
1004 cn.cn_flags &= ~HASBUF;
1005 }
1006 if (wvp == dvp)
1007 vrele(wvp);
1008 else
1009 vput(wvp);
1010 vn_finished_write(mp);
1011 return (EEXIST);
1012 }
1013
1014 /* VOP_LEASE: dvp is locked */
1015 VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE);
1016
1017 error = VOP_WHITEOUT(dvp, &cn, CREATE);
1018 if (cn.cn_flags & HASBUF) {
1019 uma_zfree(namei_zone, cn.cn_pnbuf);
1020 cn.cn_flags &= ~HASBUF;
1021 }
1022 vn_finished_write(mp);
1023 return (error);
1024 }
1025
1026 /*
1027 * union_vn_create: creates and opens a new shadow file
1028 * on the upper union layer. This function is similar
1029 * in spirit to calling vn_open() but it avoids calling namei().
1030 * The problem with calling namei() is that a) it locks too many
1031 * things, and b) it doesn't start at the "right" directory,
1032 * whereas relookup() is told where to start.
1033 *
1034 * On entry, the vnode associated with un is locked. It remains locked
1035 * on return.
1036 *
1037 * If no error occurs, *vpp contains a locked referenced vnode for your
1038 * use. If an error occurs *vpp iis undefined.
1039 */
1040 static int
1041 union_vn_create(vpp, un, td)
1042 struct vnode **vpp;
1043 struct union_node *un;
1044 struct thread *td;
1045 {
1046 struct vnode *vp;
1047 struct ucred *cred = td->td_ucred;
1048 struct vattr vat;
1049 struct vattr *vap = &vat;
1050 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
1051 int error;
1052 int cmode;
1053 struct componentname cn;
1054
1055 *vpp = NULLVP;
1056 FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1057 cmode = UN_FILEMODE & ~td->td_proc->p_fd->fd_cmask;
1058 FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1059
1060 /*
1061 * Build a new componentname structure (for the same
1062 * reasons outlines in union_mkshadow()).
1063 * The difference here is that the file is owned by
1064 * the current user, rather than by the person who
1065 * did the mount, since the current user needs to be
1066 * able to write the file (that's why it is being
1067 * copied in the first place).
1068 */
1069 cn.cn_namelen = strlen(un->un_path);
1070 cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
1071 bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
1072 cn.cn_nameiop = CREATE;
1073 cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN);
1074 cn.cn_thread = td;
1075 cn.cn_cred = td->td_ucred;
1076 cn.cn_nameptr = cn.cn_pnbuf;
1077 cn.cn_consume = 0;
1078
1079 /*
1080 * Pass dvp unlocked and referenced on call to relookup().
1081 *
1082 * If an error occurs, dvp will be returned unlocked and dereferenced.
1083 */
1084 VREF(un->un_dirvp);
1085 error = relookup(un->un_dirvp, &vp, &cn);
1086 if (error)
1087 return (error);
1088
1089 /*
1090 * If no error occurs, dvp will be returned locked with the reference
1091 * left as before, and vpp will be returned referenced and locked.
1092 */
1093 if (vp) {
1094 vput(un->un_dirvp);
1095 if (cn.cn_flags & HASBUF) {
1096 uma_zfree(namei_zone, cn.cn_pnbuf);
1097 cn.cn_flags &= ~HASBUF;
1098 }
1099 if (vp == un->un_dirvp)
1100 vrele(vp);
1101 else
1102 vput(vp);
1103 return (EEXIST);
1104 }
1105
1106 /*
1107 * Good - there was no race to create the file
1108 * so go ahead and create it. The permissions
1109 * on the file will be 0666 modified by the
1110 * current user's umask. Access to the file, while
1111 * it is unioned, will require access to the top *and*
1112 * bottom files. Access when not unioned will simply
1113 * require access to the top-level file.
1114 * TODO: confirm choice of access permissions.
1115 */
1116 VATTR_NULL(vap);
1117 vap->va_type = VREG;
1118 vap->va_mode = cmode;
1119 VOP_LEASE(un->un_dirvp, td, cred, LEASE_WRITE);
1120 error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap);
1121 if (cn.cn_flags & HASBUF) {
1122 uma_zfree(namei_zone, cn.cn_pnbuf);
1123 cn.cn_flags &= ~HASBUF;
1124 }
1125 vput(un->un_dirvp);
1126 if (error)
1127 return (error);
1128
1129 error = VOP_OPEN(vp, fmode, cred, td, -1);
1130 if (error == 0 && vn_canvmio(vp) == TRUE)
1131 error = vfs_object_create(vp, td, cred);
1132 if (error) {
1133 vput(vp);
1134 return (error);
1135 }
1136 vp->v_writecount++;
1137 *vpp = vp;
1138 return (0);
1139 }
1140
1141 static int
1142 union_vn_close(vp, fmode, cred, td)
1143 struct vnode *vp;
1144 int fmode;
1145 struct ucred *cred;
1146 struct thread *td;
1147 {
1148
1149 if (fmode & FWRITE)
1150 --vp->v_writecount;
1151 return (VOP_CLOSE(vp, fmode, cred, td));
1152 }
1153
1154 /*
1155 * union_removed_upper:
1156 *
1157 * An upper-only file/directory has been removed; un-cache it so
1158 * that unionfs vnode gets reclaimed and the last uppervp reference
1159 * disappears.
1160 *
1161 * Called with union_node unlocked.
1162 */
1163
1164 void
1165 union_removed_upper(un)
1166 struct union_node *un;
1167 {
1168 if (un->un_flags & UN_CACHED) {
1169 int hash = UNION_HASH(un->un_uppervp, un->un_lowervp);
1170
1171 while (union_list_lock(hash))
1172 continue;
1173 un->un_flags &= ~UN_CACHED;
1174 LIST_REMOVE(un, un_cache);
1175 union_list_unlock(hash);
1176 }
1177 }
1178
1179 /*
1180 * Determine whether a whiteout is needed
1181 * during a remove/rmdir operation.
1182 */
1183 int
1184 union_dowhiteout(un, cred, td)
1185 struct union_node *un;
1186 struct ucred *cred;
1187 struct thread *td;
1188 {
1189 struct vattr va;
1190
1191 if (un->un_lowervp != NULLVP)
1192 return (1);
1193
1194 if (VOP_GETATTR(un->un_uppervp, &va, cred, td) == 0 &&
1195 (va.va_flags & OPAQUE))
1196 return (1);
1197
1198 return (0);
1199 }
1200
1201 static void
1202 union_dircache_r(vp, vppp, cntp)
1203 struct vnode *vp;
1204 struct vnode ***vppp;
1205 int *cntp;
1206 {
1207 struct union_node *un;
1208
1209 if (vp->v_op != union_vnodeop_p) {
1210 if (vppp) {
1211 VREF(vp);
1212 *(*vppp)++ = vp;
1213 if (--(*cntp) == 0)
1214 panic("union: dircache table too small");
1215 } else {
1216 (*cntp)++;
1217 }
1218 } else {
1219 un = VTOUNION(vp);
1220 if (un->un_uppervp != NULLVP)
1221 union_dircache_r(un->un_uppervp, vppp, cntp);
1222 if (un->un_lowervp != NULLVP)
1223 union_dircache_r(un->un_lowervp, vppp, cntp);
1224 }
1225 }
1226
1227 struct vnode *
1228 union_dircache_get(vp, td)
1229 struct vnode *vp;
1230 struct thread *td;
1231 {
1232 int cnt;
1233 struct vnode *nvp;
1234 struct vnode **vpp;
1235 struct vnode **dircache, **newdircache;
1236 struct union_node *un;
1237 int error;
1238
1239 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1240 un = VTOUNION(vp);
1241 dircache = un->un_dircache;
1242 newdircache = NULL;
1243
1244 nvp = NULLVP;
1245
1246 if (dircache == NULL) {
1247 cnt = 0;
1248 union_dircache_r(vp, 0, &cnt);
1249 cnt++;
1250 newdircache = dircache = malloc(cnt * sizeof(struct vnode *),
1251 M_UNDCACHE, M_WAITOK);
1252 vpp = dircache;
1253 union_dircache_r(vp, &vpp, &cnt);
1254 *vpp = NULLVP;
1255 vpp = dircache + 1;
1256 } else {
1257 vpp = dircache;
1258 do {
1259 if (*vpp++ == un->un_uppervp)
1260 break;
1261 } while (*vpp != NULLVP);
1262 }
1263
1264 if (*vpp == NULLVP)
1265 goto out;
1266
1267 /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td);*/
1268 UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? vrefcnt(*vpp) : -99)));
1269 VREF(*vpp);
1270 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0);
1271 UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? vrefcnt(*vpp) : -99)));
1272 if (error)
1273 goto out;
1274
1275 un->un_dircache = NULL;
1276 VTOUNION(nvp)->un_dircache = dircache;
1277 newdircache = NULL;
1278
1279 out:
1280 /*
1281 * If we allocated a new dircache and couldn't attach
1282 * it to a new vp, free the resources we allocated.
1283 */
1284 if (newdircache) {
1285 for (vpp = newdircache; *vpp != NULLVP; vpp++)
1286 vrele(*vpp);
1287 free(newdircache, M_UNDCACHE);
1288 }
1289
1290 VOP_UNLOCK(vp, 0, td);
1291 return (nvp);
1292 }
1293
1294 void
1295 union_dircache_free(struct union_node *un)
1296 {
1297 struct vnode **vpp;
1298
1299 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1300 vrele(*vpp);
1301 free(un->un_dircache, M_UNDCACHE);
1302 un->un_dircache = NULL;
1303 }
1304
1305 /*
1306 * Module glue to remove #ifdef UNION from vfs_syscalls.c
1307 */
1308 static int
1309 union_dircheck(struct thread *td, struct vnode **vp, struct file *fp)
1310 {
1311 int error = 0;
1312
1313 if ((*vp)->v_op == union_vnodeop_p) {
1314 struct vnode *lvp;
1315
1316 lvp = union_dircache_get(*vp, td);
1317 if (lvp != NULLVP) {
1318 struct vattr va;
1319
1320 /*
1321 * If the directory is opaque,
1322 * then don't show lower entries
1323 */
1324 error = VOP_GETATTR(*vp, &va, fp->f_cred, td);
1325 if (va.va_flags & OPAQUE) {
1326 vput(lvp);
1327 lvp = NULLVP;
1328 }
1329 }
1330
1331 if (lvp != NULLVP) {
1332 error = VOP_OPEN(lvp, FREAD, fp->f_cred, td, -1);
1333 if (error == 0 && vn_canvmio(lvp) == TRUE)
1334 error = vfs_object_create(lvp, td, fp->f_cred);
1335 if (error) {
1336 vput(lvp);
1337 return (error);
1338 }
1339 VOP_UNLOCK(lvp, 0, td);
1340 FILE_LOCK(fp);
1341 fp->f_vnode = lvp;
1342 fp->f_data = lvp;
1343 fp->f_offset = 0;
1344 FILE_UNLOCK(fp);
1345 error = vn_close(*vp, FREAD, fp->f_cred, td);
1346 if (error)
1347 return (error);
1348 *vp = lvp;
1349 return -1; /* goto unionread */
1350 }
1351 }
1352 return error;
1353 }
1354
1355 static int
1356 union_modevent(module_t mod, int type, void *data)
1357 {
1358 switch (type) {
1359 case MOD_LOAD:
1360 union_dircheckp = union_dircheck;
1361 break;
1362 case MOD_UNLOAD:
1363 union_dircheckp = NULL;
1364 break;
1365 default:
1366 return EOPNOTSUPP;
1367 break;
1368 }
1369 return 0;
1370 }
1371
1372 static moduledata_t union_mod = {
1373 "union_dircheck",
1374 union_modevent,
1375 NULL
1376 };
1377
1378 DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY);
Cache object: 5b04dbc41154d16d7f2f47fb6120e052
|