1 /* $NetBSD: union_subr.c,v 1.21.2.1 2007/02/17 23:27:46 tron Exp $ */
2
3 /*
4 * Copyright (c) 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
35 */
36
37 /*
38 * Copyright (c) 1994 Jan-Simon Pendry
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the University of
54 * California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 * may be used to endorse or promote products derived from this software
57 * without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
72 */
73
74 #include <sys/cdefs.h>
75 __KERNEL_RCSID(0, "$NetBSD: union_subr.c,v 1.21.2.1 2007/02/17 23:27:46 tron Exp $");
76
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/proc.h>
80 #include <sys/time.h>
81 #include <sys/kernel.h>
82 #include <sys/vnode.h>
83 #include <sys/namei.h>
84 #include <sys/malloc.h>
85 #include <sys/file.h>
86 #include <sys/filedesc.h>
87 #include <sys/queue.h>
88 #include <sys/mount.h>
89 #include <sys/stat.h>
90 #include <sys/kauth.h>
91
92 #include <uvm/uvm_extern.h>
93
94 #include <fs/union/union.h>
95
96 #ifdef DIAGNOSTIC
97 #include <sys/proc.h>
98 #endif
99
100 /* must be power of two, otherwise change UNION_HASH() */
101 #define NHASH 32
102
103 /* unsigned int ... */
104 #define UNION_HASH(u, l) \
105 (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
106
107 static LIST_HEAD(unhead, union_node) unhead[NHASH];
108 static int unvplock[NHASH];
109
110 static int union_list_lock(int);
111 static void union_list_unlock(int);
112 void union_updatevp(struct union_node *, struct vnode *, struct vnode *);
113 static int union_relookup(struct union_mount *, struct vnode *,
114 struct vnode **, struct componentname *,
115 struct componentname *, const char *, int);
116 int union_vn_close(struct vnode *, int, kauth_cred_t, struct lwp *);
117 static void union_dircache_r(struct vnode *, struct vnode ***, int *);
118 struct vnode *union_dircache(struct vnode *, struct lwp *);
119
120 void
121 union_init()
122 {
123 int i;
124
125 for (i = 0; i < NHASH; i++)
126 LIST_INIT(&unhead[i]);
127 memset(unvplock, 0, sizeof(unvplock));
128 }
129
130 /*
131 * Free global unionfs resources.
132 */
133 void
134 union_done()
135 {
136
137 /* Make sure to unset the readdir hook. */
138 vn_union_readdir_hook = NULL;
139 }
140
141 static int
142 union_list_lock(ix)
143 int ix;
144 {
145
146 if (unvplock[ix] & UN_LOCKED) {
147 unvplock[ix] |= UN_WANTED;
148 (void) tsleep(&unvplock[ix], PINOD, "unionlk", 0);
149 return (1);
150 }
151
152 unvplock[ix] |= UN_LOCKED;
153
154 return (0);
155 }
156
157 static void
158 union_list_unlock(ix)
159 int ix;
160 {
161
162 unvplock[ix] &= ~UN_LOCKED;
163
164 if (unvplock[ix] & UN_WANTED) {
165 unvplock[ix] &= ~UN_WANTED;
166 wakeup(&unvplock[ix]);
167 }
168 }
169
170 void
171 union_updatevp(un, uppervp, lowervp)
172 struct union_node *un;
173 struct vnode *uppervp;
174 struct vnode *lowervp;
175 {
176 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
177 int nhash = UNION_HASH(uppervp, lowervp);
178 int docache = (lowervp != NULLVP || uppervp != NULLVP);
179 int lhash, uhash;
180
181 /*
182 * Ensure locking is ordered from lower to higher
183 * to avoid deadlocks.
184 */
185 if (nhash < ohash) {
186 lhash = nhash;
187 uhash = ohash;
188 } else {
189 lhash = ohash;
190 uhash = nhash;
191 }
192
193 if (lhash != uhash)
194 while (union_list_lock(lhash))
195 continue;
196
197 while (union_list_lock(uhash))
198 continue;
199
200 if (ohash != nhash || !docache) {
201 if (un->un_flags & UN_CACHED) {
202 un->un_flags &= ~UN_CACHED;
203 LIST_REMOVE(un, un_cache);
204 }
205 }
206
207 if (ohash != nhash)
208 union_list_unlock(ohash);
209
210 if (un->un_lowervp != lowervp) {
211 if (un->un_lowervp) {
212 vrele(un->un_lowervp);
213 if (un->un_path) {
214 free(un->un_path, M_TEMP);
215 un->un_path = 0;
216 }
217 if (un->un_dirvp) {
218 vrele(un->un_dirvp);
219 un->un_dirvp = NULLVP;
220 }
221 }
222 un->un_lowervp = lowervp;
223 un->un_lowersz = VNOVAL;
224 }
225
226 if (un->un_uppervp != uppervp) {
227 if (un->un_uppervp)
228 vrele(un->un_uppervp);
229
230 un->un_uppervp = uppervp;
231 un->un_uppersz = VNOVAL;
232 }
233
234 if (docache && (ohash != nhash)) {
235 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
236 un->un_flags |= UN_CACHED;
237 }
238
239 union_list_unlock(nhash);
240 }
241
242 void
243 union_newlower(un, lowervp)
244 struct union_node *un;
245 struct vnode *lowervp;
246 {
247
248 union_updatevp(un, un->un_uppervp, lowervp);
249 }
250
251 void
252 union_newupper(un, uppervp)
253 struct union_node *un;
254 struct vnode *uppervp;
255 {
256
257 union_updatevp(un, uppervp, un->un_lowervp);
258 }
259
260 /*
261 * Keep track of size changes in the underlying vnodes.
262 * If the size changes, then callback to the vm layer
263 * giving priority to the upper layer size.
264 */
265 void
266 union_newsize(vp, uppersz, lowersz)
267 struct vnode *vp;
268 off_t uppersz, lowersz;
269 {
270 struct union_node *un;
271 off_t sz;
272
273 /* only interested in regular files */
274 if (vp->v_type != VREG)
275 return;
276
277 un = VTOUNION(vp);
278 sz = VNOVAL;
279
280 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
281 un->un_uppersz = uppersz;
282 if (sz == VNOVAL)
283 sz = un->un_uppersz;
284 }
285
286 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
287 un->un_lowersz = lowersz;
288 if (sz == VNOVAL)
289 sz = un->un_lowersz;
290 }
291
292 if (sz != VNOVAL) {
293 #ifdef UNION_DIAGNOSTIC
294 printf("union: %s size now %qd\n",
295 uppersz != VNOVAL ? "upper" : "lower", sz);
296 #endif
297 uvm_vnp_setsize(vp, sz);
298 }
299 }
300
301 /*
302 * allocate a union_node/vnode pair. the vnode is
303 * referenced and locked. the new vnode is returned
304 * via (vpp). (mp) is the mountpoint of the union filesystem,
305 * (dvp) is the parent directory where the upper layer object
306 * should exist (but doesn't) and (cnp) is the componentname
307 * information which is partially copied to allow the upper
308 * layer object to be created at a later time. (uppervp)
309 * and (lowervp) reference the upper and lower layer objects
310 * being mapped. either, but not both, can be nil.
311 * if supplied, (uppervp) is locked.
312 * the reference is either maintained in the new union_node
313 * object which is allocated, or they are vrele'd.
314 *
315 * all union_nodes are maintained on a singly-linked
316 * list. new nodes are only allocated when they cannot
317 * be found on this list. entries on the list are
318 * removed when the vfs reclaim entry is called.
319 *
320 * a single lock is kept for the entire list. this is
321 * needed because the getnewvnode() function can block
322 * waiting for a vnode to become free, in which case there
323 * may be more than one process trying to get the same
324 * vnode. this lock is only taken if we are going to
325 * call getnewvnode, since the kernel itself is single-threaded.
326 *
327 * if an entry is found on the list, then call vget() to
328 * take a reference. this is done because there may be
329 * zero references to it and so it needs to removed from
330 * the vnode free list.
331 */
332 int
333 union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
334 struct vnode **vpp;
335 struct mount *mp;
336 struct vnode *undvp; /* parent union vnode */
337 struct vnode *dvp; /* may be null */
338 struct componentname *cnp; /* may be null */
339 struct vnode *uppervp; /* may be null */
340 struct vnode *lowervp; /* may be null */
341 int docache;
342 {
343 int error;
344 struct union_node *un = NULL;
345 struct vnode *xlowervp = NULLVP;
346 struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
347 int hash = 0;
348 int vflag;
349 int try;
350
351 if (uppervp == NULLVP && lowervp == NULLVP)
352 panic("union: unidentifiable allocation");
353
354 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
355 xlowervp = lowervp;
356 lowervp = NULLVP;
357 }
358
359 /* detect the root vnode (and aliases) */
360 vflag = VLAYER;
361 if ((uppervp == um->um_uppervp) &&
362 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
363 if (lowervp == NULLVP) {
364 lowervp = um->um_lowervp;
365 if (lowervp != NULLVP)
366 VREF(lowervp);
367 }
368 vflag = VROOT;
369 }
370
371 loop:
372 if (!docache) {
373 un = 0;
374 } else for (try = 0; try < 3; try++) {
375 switch (try) {
376 case 0:
377 if (lowervp == NULLVP)
378 continue;
379 hash = UNION_HASH(uppervp, lowervp);
380 break;
381
382 case 1:
383 if (uppervp == NULLVP)
384 continue;
385 hash = UNION_HASH(uppervp, NULLVP);
386 break;
387
388 case 2:
389 if (lowervp == NULLVP)
390 continue;
391 hash = UNION_HASH(NULLVP, lowervp);
392 break;
393 }
394
395 while (union_list_lock(hash))
396 continue;
397
398 for (un = unhead[hash].lh_first; un != 0;
399 un = un->un_cache.le_next) {
400 if ((un->un_lowervp == lowervp ||
401 un->un_lowervp == NULLVP) &&
402 (un->un_uppervp == uppervp ||
403 un->un_uppervp == NULLVP) &&
404 (UNIONTOV(un)->v_mount == mp)) {
405 if (vget(UNIONTOV(un), 0)) {
406 union_list_unlock(hash);
407 goto loop;
408 }
409 break;
410 }
411 }
412
413 union_list_unlock(hash);
414
415 if (un)
416 break;
417 }
418
419 if (un) {
420 /*
421 * Obtain a lock on the union_node.
422 * uppervp is locked, though un->un_uppervp
423 * may not be. this doesn't break the locking
424 * hierarchy since in the case that un->un_uppervp
425 * is not yet locked it will be vrele'd and replaced
426 * with uppervp.
427 */
428
429 if ((dvp != NULLVP) && (uppervp == dvp)) {
430 /*
431 * Access ``.'', so (un) will already
432 * be locked. Since this process has
433 * the lock on (uppervp) no other
434 * process can hold the lock on (un).
435 */
436 #ifdef DIAGNOSTIC
437 if ((un->un_flags & UN_LOCKED) == 0)
438 panic("union: . not locked");
439 else if (curproc && un->un_pid != curproc->p_pid &&
440 un->un_pid > -1 && curproc->p_pid > -1)
441 panic("union: allocvp not lock owner");
442 #endif
443 } else {
444 if (un->un_flags & UN_LOCKED) {
445 vrele(UNIONTOV(un));
446 un->un_flags |= UN_WANTED;
447 (void) tsleep(&un->un_flags, PINOD,
448 "unionalloc", 0);
449 goto loop;
450 }
451 un->un_flags |= UN_LOCKED;
452
453 #ifdef DIAGNOSTIC
454 if (curproc)
455 un->un_pid = curproc->p_pid;
456 else
457 un->un_pid = -1;
458 #endif
459 }
460
461 /*
462 * At this point, the union_node is locked,
463 * un->un_uppervp may not be locked, and uppervp
464 * is locked or nil.
465 */
466
467 /*
468 * Save information about the upper layer.
469 */
470 if (uppervp != un->un_uppervp) {
471 union_newupper(un, uppervp);
472 } else if (uppervp) {
473 vrele(uppervp);
474 }
475
476 if (un->un_uppervp) {
477 un->un_flags |= UN_ULOCK;
478 un->un_flags &= ~UN_KLOCK;
479 }
480
481 /*
482 * Save information about the lower layer.
483 * This needs to keep track of pathname
484 * and directory information which union_vn_create
485 * might need.
486 */
487 if (lowervp != un->un_lowervp) {
488 union_newlower(un, lowervp);
489 if (cnp && (lowervp != NULLVP)) {
490 un->un_hash = cnp->cn_hash;
491 un->un_path = malloc(cnp->cn_namelen+1,
492 M_TEMP, M_WAITOK);
493 memcpy(un->un_path, cnp->cn_nameptr,
494 cnp->cn_namelen);
495 un->un_path[cnp->cn_namelen] = '\0';
496 VREF(dvp);
497 un->un_dirvp = dvp;
498 }
499 } else if (lowervp) {
500 vrele(lowervp);
501 }
502 *vpp = UNIONTOV(un);
503 return (0);
504 }
505
506 if (docache) {
507 /*
508 * otherwise lock the vp list while we call getnewvnode
509 * since that can block.
510 */
511 hash = UNION_HASH(uppervp, lowervp);
512
513 if (union_list_lock(hash))
514 goto loop;
515 }
516
517 error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
518 if (error) {
519 if (uppervp) {
520 if (dvp == uppervp)
521 vrele(uppervp);
522 else
523 vput(uppervp);
524 }
525 if (lowervp)
526 vrele(lowervp);
527
528 goto out;
529 }
530
531 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
532 M_TEMP, M_WAITOK);
533
534 (*vpp)->v_flag |= vflag;
535 (*vpp)->v_vnlock = NULL; /* Make upper layers call VOP_LOCK */
536 if (uppervp)
537 (*vpp)->v_type = uppervp->v_type;
538 else
539 (*vpp)->v_type = lowervp->v_type;
540 un = VTOUNION(*vpp);
541 un->un_vnode = *vpp;
542 un->un_uppervp = uppervp;
543 un->un_uppersz = VNOVAL;
544 un->un_lowervp = lowervp;
545 un->un_lowersz = VNOVAL;
546 un->un_pvp = undvp;
547 if (undvp != NULLVP)
548 VREF(undvp);
549 un->un_dircache = 0;
550 un->un_openl = 0;
551 un->un_flags = UN_LOCKED;
552 if (un->un_uppervp)
553 un->un_flags |= UN_ULOCK;
554 #ifdef DIAGNOSTIC
555 if (curproc)
556 un->un_pid = curproc->p_pid;
557 else
558 un->un_pid = -1;
559 #endif
560 if (dvp && cnp && (lowervp != NULLVP)) {
561 un->un_hash = cnp->cn_hash;
562 un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
563 memcpy(un->un_path, cnp->cn_nameptr, cnp->cn_namelen);
564 un->un_path[cnp->cn_namelen] = '\0';
565 VREF(dvp);
566 un->un_dirvp = dvp;
567 } else {
568 un->un_hash = 0;
569 un->un_path = 0;
570 un->un_dirvp = 0;
571 }
572
573 if (docache) {
574 LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
575 un->un_flags |= UN_CACHED;
576 }
577
578 if (xlowervp)
579 vrele(xlowervp);
580
581 out:
582 if (docache)
583 union_list_unlock(hash);
584
585 return (error);
586 }
587
588 int
589 union_freevp(vp)
590 struct vnode *vp;
591 {
592 struct union_node *un = VTOUNION(vp);
593
594 if (un->un_flags & UN_CACHED) {
595 un->un_flags &= ~UN_CACHED;
596 LIST_REMOVE(un, un_cache);
597 }
598
599 if (un->un_pvp != NULLVP)
600 vrele(un->un_pvp);
601 if (un->un_uppervp != NULLVP)
602 vrele(un->un_uppervp);
603 if (un->un_lowervp != NULLVP)
604 vrele(un->un_lowervp);
605 if (un->un_dirvp != NULLVP)
606 vrele(un->un_dirvp);
607 if (un->un_path)
608 free(un->un_path, M_TEMP);
609
610 FREE(vp->v_data, M_TEMP);
611 vp->v_data = 0;
612
613 return (0);
614 }
615
616 /*
617 * copyfile. copy the vnode (fvp) to the vnode (tvp)
618 * using a sequence of reads and writes. both (fvp)
619 * and (tvp) are locked on entry and exit.
620 */
621 int
622 union_copyfile(fvp, tvp, cred, l)
623 struct vnode *fvp;
624 struct vnode *tvp;
625 kauth_cred_t cred;
626 struct lwp *l;
627 {
628 char *tbuf;
629 struct uio uio;
630 struct iovec iov;
631 int error = 0;
632
633 /*
634 * strategy:
635 * allocate a buffer of size MAXBSIZE.
636 * loop doing reads and writes, keeping track
637 * of the current uio offset.
638 * give up at the first sign of trouble.
639 */
640
641 uio.uio_offset = 0;
642 UIO_SETUP_SYSSPACE(&uio);
643
644 VOP_UNLOCK(fvp, 0); /* XXX */
645 VOP_LEASE(fvp, l, cred, LEASE_READ);
646 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
647 VOP_UNLOCK(tvp, 0); /* XXX */
648 VOP_LEASE(tvp, l, cred, LEASE_WRITE);
649 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
650
651 tbuf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
652
653 /* ugly loop follows... */
654 do {
655 off_t offset = uio.uio_offset;
656
657 uio.uio_iov = &iov;
658 uio.uio_iovcnt = 1;
659 iov.iov_base = tbuf;
660 iov.iov_len = MAXBSIZE;
661 uio.uio_resid = iov.iov_len;
662 uio.uio_rw = UIO_READ;
663 error = VOP_READ(fvp, &uio, 0, cred);
664
665 if (error == 0) {
666 uio.uio_iov = &iov;
667 uio.uio_iovcnt = 1;
668 iov.iov_base = tbuf;
669 iov.iov_len = MAXBSIZE - uio.uio_resid;
670 uio.uio_offset = offset;
671 uio.uio_rw = UIO_WRITE;
672 uio.uio_resid = iov.iov_len;
673
674 if (uio.uio_resid == 0)
675 break;
676
677 do {
678 error = VOP_WRITE(tvp, &uio, 0, cred);
679 } while ((uio.uio_resid > 0) && (error == 0));
680 }
681
682 } while (error == 0);
683
684 free(tbuf, M_TEMP);
685 return (error);
686 }
687
688 /*
689 * (un) is assumed to be locked on entry and remains
690 * locked on exit.
691 */
692 int
693 union_copyup(un, docopy, cred, l)
694 struct union_node *un;
695 int docopy;
696 kauth_cred_t cred;
697 struct lwp *l;
698 {
699 int error;
700 struct mount *mp;
701 struct vnode *lvp, *uvp;
702 struct vattr lvattr, uvattr;
703
704 if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | V_PCATCH)) != 0)
705 return (error);
706 error = union_vn_create(&uvp, un, l);
707 if (error) {
708 vn_finished_write(mp, 0);
709 return (error);
710 }
711
712 /* at this point, uppervp is locked */
713 union_newupper(un, uvp);
714 un->un_flags |= UN_ULOCK;
715
716 lvp = un->un_lowervp;
717
718 if (docopy) {
719 /*
720 * XX - should not ignore errors
721 * from VOP_CLOSE
722 */
723 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
724
725 error = VOP_GETATTR(lvp, &lvattr, cred, l);
726 if (error == 0)
727 error = VOP_OPEN(lvp, FREAD, cred, l);
728 if (error == 0) {
729 error = union_copyfile(lvp, uvp, cred, l);
730 (void) VOP_CLOSE(lvp, FREAD, cred, l);
731 }
732 if (error == 0) {
733 /* Copy permissions up too */
734 VATTR_NULL(&uvattr);
735 uvattr.va_mode = lvattr.va_mode;
736 uvattr.va_flags = lvattr.va_flags;
737 error = VOP_SETATTR(uvp, &uvattr, cred, l);
738 }
739 VOP_UNLOCK(lvp, 0);
740 #ifdef UNION_DIAGNOSTIC
741 if (error == 0)
742 uprintf("union: copied up %s\n", un->un_path);
743 #endif
744
745 }
746 vn_finished_write(mp, 0);
747 union_vn_close(uvp, FWRITE, cred, l);
748
749 /*
750 * Subsequent IOs will go to the top layer, so
751 * call close on the lower vnode and open on the
752 * upper vnode to ensure that the filesystem keeps
753 * its references counts right. This doesn't do
754 * the right thing with (cred) and (FREAD) though.
755 * Ignoring error returns is not right, either.
756 */
757 if (error == 0) {
758 int i;
759
760 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
761 for (i = 0; i < un->un_openl; i++) {
762 (void) VOP_CLOSE(lvp, FREAD, cred, l);
763 (void) VOP_OPEN(uvp, FREAD, cred, l);
764 }
765 un->un_openl = 0;
766 VOP_UNLOCK(lvp, 0);
767 }
768
769 return (error);
770
771 }
772
773 static int
774 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
775 struct union_mount *um;
776 struct vnode *dvp;
777 struct vnode **vpp;
778 struct componentname *cnp;
779 struct componentname *cn;
780 const char *path;
781 int pathlen;
782 {
783 int error;
784
785 /*
786 * A new componentname structure must be faked up because
787 * there is no way to know where the upper level cnp came
788 * from or what it is being used for. This must duplicate
789 * some of the work done by NDINIT, some of the work done
790 * by namei, some of the work done by lookup and some of
791 * the work done by VOP_LOOKUP when given a CREATE flag.
792 * Conclusion: Horrible.
793 *
794 * The pathname buffer will be PNBUF_PUT'd by VOP_MKDIR.
795 */
796 cn->cn_namelen = pathlen;
797 if ((cn->cn_namelen + 1) > MAXPATHLEN)
798 return (ENAMETOOLONG);
799 cn->cn_pnbuf = PNBUF_GET();
800 memcpy(cn->cn_pnbuf, path, cn->cn_namelen);
801 cn->cn_pnbuf[cn->cn_namelen] = '\0';
802
803 cn->cn_nameiop = CREATE;
804 cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|ISLASTCN);
805 cn->cn_lwp = cnp->cn_lwp;
806 if (um->um_op == UNMNT_ABOVE)
807 cn->cn_cred = cnp->cn_cred;
808 else
809 cn->cn_cred = um->um_cred;
810 cn->cn_nameptr = cn->cn_pnbuf;
811 cn->cn_hash = cnp->cn_hash;
812 cn->cn_consume = cnp->cn_consume;
813
814 error = relookup(dvp, vpp, cn);
815 if (error) {
816 PNBUF_PUT(cn->cn_pnbuf);
817 cn->cn_pnbuf = 0;
818 }
819
820 return (error);
821 }
822
823 /*
824 * Create a shadow directory in the upper layer.
825 * The new vnode is returned locked.
826 *
827 * (um) points to the union mount structure for access to the
828 * the mounting process's credentials.
829 * (dvp) is the directory in which to create the shadow directory.
830 * it is unlocked on entry and exit.
831 * (cnp) is the componentname to be created.
832 * (vpp) is the returned newly created shadow directory, which
833 * is returned locked.
834 *
835 * N.B. We still attempt to create shadow directories even if the union
836 * is mounted read-only, which is a little nonintuitive.
837 */
838 int
839 union_mkshadow(um, dvp, cnp, vpp)
840 struct union_mount *um;
841 struct vnode *dvp;
842 struct componentname *cnp;
843 struct vnode **vpp;
844 {
845 int error;
846 struct vattr va;
847 struct lwp *l = cnp->cn_lwp;
848 struct componentname cn;
849 struct mount *mp;
850
851 if ((error = vn_start_write(dvp, &mp, V_WAIT | V_PCATCH)) != 0)
852 return (error);
853 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
854 error = union_relookup(um, dvp, vpp, cnp, &cn,
855 cnp->cn_nameptr, cnp->cn_namelen);
856 if (error) {
857 VOP_UNLOCK(dvp, 0);
858 vn_finished_write(mp, 0);
859 return (error);
860 }
861
862 if (*vpp) {
863 VOP_ABORTOP(dvp, &cn);
864 if (dvp != *vpp)
865 VOP_UNLOCK(dvp, 0);
866 vput(*vpp);
867 vn_finished_write(mp, 0);
868 *vpp = NULLVP;
869 return (EEXIST);
870 }
871
872 /*
873 * policy: when creating the shadow directory in the
874 * upper layer, create it owned by the user who did
875 * the mount, group from parent directory, and mode
876 * 777 modified by umask (ie mostly identical to the
877 * mkdir syscall). (jsp, kb)
878 */
879
880 VATTR_NULL(&va);
881 va.va_type = VDIR;
882 va.va_mode = um->um_cmode;
883
884 /* VOP_LEASE: dvp is locked */
885 VOP_LEASE(dvp, l, cn.cn_cred, LEASE_WRITE);
886
887 vref(dvp);
888 error = VOP_MKDIR(dvp, vpp, &cn, &va);
889 vn_finished_write(mp, 0);
890 return (error);
891 }
892
893 /*
894 * Create a whiteout entry in the upper layer.
895 *
896 * (um) points to the union mount structure for access to the
897 * the mounting process's credentials.
898 * (dvp) is the directory in which to create the whiteout.
899 * it is locked on entry and exit.
900 * (cnp) is the componentname to be created.
901 */
902 int
903 union_mkwhiteout(um, dvp, cnp, path)
904 struct union_mount *um;
905 struct vnode *dvp;
906 struct componentname *cnp;
907 char *path;
908 {
909 int error;
910 struct lwp *l = cnp->cn_lwp;
911 struct vnode *wvp;
912 struct componentname cn;
913 struct mount *mp;
914
915 VOP_UNLOCK(dvp, 0);
916 if ((error = vn_start_write(dvp, &mp, V_WAIT | V_PCATCH)) != 0)
917 return (error);
918 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
919 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
920 if (error) {
921 vn_finished_write(mp, 0);
922 return (error);
923 }
924
925 if (wvp) {
926 VOP_ABORTOP(dvp, &cn);
927 if (dvp != wvp)
928 VOP_UNLOCK(dvp, 0);
929 vput(wvp);
930 vn_finished_write(mp, 0);
931 return (EEXIST);
932 }
933
934 /* VOP_LEASE: dvp is locked */
935 VOP_LEASE(dvp, l, l->l_cred, LEASE_WRITE);
936
937 error = VOP_WHITEOUT(dvp, &cn, CREATE);
938 if (error)
939 VOP_ABORTOP(dvp, &cn);
940
941 vn_finished_write(mp, 0);
942
943 return (error);
944 }
945
946 /*
947 * union_vn_create: creates and opens a new shadow file
948 * on the upper union layer. this function is similar
949 * in spirit to calling vn_open but it avoids calling namei().
950 * the problem with calling namei is that a) it locks too many
951 * things, and b) it doesn't start at the "right" directory,
952 * whereas relookup is told where to start.
953 */
954 int
955 union_vn_create(vpp, un, l)
956 struct vnode **vpp;
957 struct union_node *un;
958 struct lwp *l;
959 {
960 struct vnode *vp;
961 kauth_cred_t cred = l->l_cred;
962 struct vattr vat;
963 struct vattr *vap = &vat;
964 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
965 int error;
966 int cmode = UN_FILEMODE & ~l->l_proc->p_cwdi->cwdi_cmask;
967 struct componentname cn;
968
969 *vpp = NULLVP;
970
971 /*
972 * Build a new componentname structure (for the same
973 * reasons outlines in union_mkshadow).
974 * The difference here is that the file is owned by
975 * the current user, rather than by the person who
976 * did the mount, since the current user needs to be
977 * able to write the file (that's why it is being
978 * copied in the first place).
979 */
980 cn.cn_namelen = strlen(un->un_path);
981 if ((cn.cn_namelen + 1) > MAXPATHLEN)
982 return (ENAMETOOLONG);
983 cn.cn_pnbuf = PNBUF_GET();
984 memcpy(cn.cn_pnbuf, un->un_path, cn.cn_namelen+1);
985 cn.cn_nameiop = CREATE;
986 cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|ISLASTCN);
987 cn.cn_lwp = l;
988 cn.cn_cred = l->l_cred;
989 cn.cn_nameptr = cn.cn_pnbuf;
990 cn.cn_hash = un->un_hash;
991 cn.cn_consume = 0;
992
993 vn_lock(un->un_dirvp, LK_EXCLUSIVE | LK_RETRY);
994 error = relookup(un->un_dirvp, &vp, &cn);
995 if (error) {
996 VOP_UNLOCK(un->un_dirvp, 0);
997 return (error);
998 }
999
1000 if (vp) {
1001 VOP_ABORTOP(un->un_dirvp, &cn);
1002 if (un->un_dirvp != vp)
1003 VOP_UNLOCK(un->un_dirvp, 0);
1004 vput(vp);
1005 return (EEXIST);
1006 }
1007
1008 /*
1009 * Good - there was no race to create the file
1010 * so go ahead and create it. The permissions
1011 * on the file will be 0666 modified by the
1012 * current user's umask. Access to the file, while
1013 * it is unioned, will require access to the top *and*
1014 * bottom files. Access when not unioned will simply
1015 * require access to the top-level file.
1016 * TODO: confirm choice of access permissions.
1017 */
1018 VATTR_NULL(vap);
1019 vap->va_type = VREG;
1020 vap->va_mode = cmode;
1021 VOP_LEASE(un->un_dirvp, l, cred, LEASE_WRITE);
1022 vref(un->un_dirvp);
1023 if ((error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap)) != 0)
1024 return (error);
1025
1026 if ((error = VOP_OPEN(vp, fmode, cred, l)) != 0) {
1027 vput(vp);
1028 return (error);
1029 }
1030
1031 vp->v_writecount++;
1032 *vpp = vp;
1033 return (0);
1034 }
1035
1036 int
1037 union_vn_close(vp, fmode, cred, l)
1038 struct vnode *vp;
1039 int fmode;
1040 kauth_cred_t cred;
1041 struct lwp *l;
1042 {
1043
1044 if (fmode & FWRITE)
1045 --vp->v_writecount;
1046 return (VOP_CLOSE(vp, fmode, cred, l));
1047 }
1048
1049 void
1050 union_removed_upper(un)
1051 struct union_node *un;
1052 {
1053 #if 1
1054 /*
1055 * We do not set the uppervp to NULLVP here, because lowervp
1056 * may also be NULLVP, so this routine would end up creating
1057 * a bogus union node with no upper or lower VP (that causes
1058 * pain in many places that assume at least one VP exists).
1059 * Since we've removed this node from the cache hash chains,
1060 * it won't be found again. When all current holders
1061 * release it, union_inactive() will vgone() it.
1062 */
1063 union_diruncache(un);
1064 #else
1065 union_newupper(un, NULLVP);
1066 #endif
1067
1068 if (un->un_flags & UN_CACHED) {
1069 un->un_flags &= ~UN_CACHED;
1070 LIST_REMOVE(un, un_cache);
1071 }
1072
1073 if (un->un_flags & UN_ULOCK) {
1074 un->un_flags &= ~UN_ULOCK;
1075 VOP_UNLOCK(un->un_uppervp, 0);
1076 }
1077 }
1078
1079 #if 0
1080 struct vnode *
1081 union_lowervp(vp)
1082 struct vnode *vp;
1083 {
1084 struct union_node *un = VTOUNION(vp);
1085
1086 if ((un->un_lowervp != NULLVP) &&
1087 (vp->v_type == un->un_lowervp->v_type)) {
1088 if (vget(un->un_lowervp, 0) == 0)
1089 return (un->un_lowervp);
1090 }
1091
1092 return (NULLVP);
1093 }
1094 #endif
1095
1096 /*
1097 * determine whether a whiteout is needed
1098 * during a remove/rmdir operation.
1099 */
1100 int
1101 union_dowhiteout(un, cred, l)
1102 struct union_node *un;
1103 kauth_cred_t cred;
1104 struct lwp *l;
1105 {
1106 struct vattr va;
1107
1108 if (un->un_lowervp != NULLVP)
1109 return (1);
1110
1111 if (VOP_GETATTR(un->un_uppervp, &va, cred, l) == 0 &&
1112 (va.va_flags & OPAQUE))
1113 return (1);
1114
1115 return (0);
1116 }
1117
1118 static void
1119 union_dircache_r(vp, vppp, cntp)
1120 struct vnode *vp;
1121 struct vnode ***vppp;
1122 int *cntp;
1123 {
1124 struct union_node *un;
1125
1126 if (vp->v_op != union_vnodeop_p) {
1127 if (vppp) {
1128 VREF(vp);
1129 *(*vppp)++ = vp;
1130 if (--(*cntp) == 0)
1131 panic("union: dircache table too small");
1132 } else {
1133 (*cntp)++;
1134 }
1135
1136 return;
1137 }
1138
1139 un = VTOUNION(vp);
1140 if (un->un_uppervp != NULLVP)
1141 union_dircache_r(un->un_uppervp, vppp, cntp);
1142 if (un->un_lowervp != NULLVP)
1143 union_dircache_r(un->un_lowervp, vppp, cntp);
1144 }
1145
1146 struct vnode *
1147 union_dircache(struct vnode *vp, struct lwp *l)
1148 {
1149 int cnt;
1150 struct vnode *nvp = NULLVP;
1151 struct vnode **vpp;
1152 struct vnode **dircache;
1153 int error;
1154
1155 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1156 dircache = VTOUNION(vp)->un_dircache;
1157
1158 nvp = NULLVP;
1159
1160 if (dircache == 0) {
1161 cnt = 0;
1162 union_dircache_r(vp, 0, &cnt);
1163 cnt++;
1164 dircache = (struct vnode **)
1165 malloc(cnt * sizeof(struct vnode *),
1166 M_TEMP, M_WAITOK);
1167 vpp = dircache;
1168 union_dircache_r(vp, &vpp, &cnt);
1169 VTOUNION(vp)->un_dircache = dircache;
1170 *vpp = NULLVP;
1171 vpp = dircache + 1;
1172 } else {
1173 vpp = dircache;
1174 do {
1175 if (*vpp++ == VTOUNION(vp)->un_uppervp)
1176 break;
1177 } while (*vpp != NULLVP);
1178 }
1179
1180 if (*vpp == NULLVP)
1181 goto out;
1182
1183 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
1184 VREF(*vpp);
1185 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
1186 if (!error) {
1187 VTOUNION(vp)->un_dircache = 0;
1188 VTOUNION(nvp)->un_dircache = dircache;
1189 }
1190
1191 out:
1192 VOP_UNLOCK(vp, 0);
1193 return (nvp);
1194 }
1195
1196 void
1197 union_diruncache(un)
1198 struct union_node *un;
1199 {
1200 struct vnode **vpp;
1201
1202 if (un->un_dircache != 0) {
1203 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1204 vrele(*vpp);
1205 free(un->un_dircache, M_TEMP);
1206 un->un_dircache = 0;
1207 }
1208 }
1209
1210 /*
1211 * This hook is called from vn_readdir() to switch to lower directory
1212 * entry after the upper directory is read.
1213 */
1214 int
1215 union_readdirhook(struct vnode **vpp, struct file *fp, struct lwp *l)
1216 {
1217 struct vnode *vp = *vpp, *lvp;
1218 struct vattr va;
1219 int error;
1220
1221 if (vp->v_op != union_vnodeop_p)
1222 return (0);
1223
1224 if ((lvp = union_dircache(vp, l)) == NULLVP)
1225 return (0);
1226
1227 /*
1228 * If the directory is opaque,
1229 * then don't show lower entries
1230 */
1231 error = VOP_GETATTR(vp, &va, fp->f_cred, l);
1232 if (error || (va.va_flags & OPAQUE)) {
1233 vput(lvp);
1234 return (error);
1235 }
1236
1237 error = VOP_OPEN(lvp, FREAD, fp->f_cred, l);
1238 if (error) {
1239 vput(lvp);
1240 return (error);
1241 }
1242 VOP_UNLOCK(lvp, 0);
1243 fp->f_data = lvp;
1244 fp->f_offset = 0;
1245 error = vn_close(vp, FREAD, fp->f_cred, l);
1246 if (error)
1247 return (error);
1248 *vpp = lvp;
1249 return (0);
1250 }
Cache object: 719065918f068e4aa647ea959032ac37
|