1 /* $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD
5 *
6 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to The NetBSD Foundation
10 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11 * 2005 program.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 /*
36 * tmpfs vnode interface.
37 */
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/dirent.h>
44 #include <sys/fcntl.h>
45 #include <sys/file.h>
46 #include <sys/filio.h>
47 #include <sys/limits.h>
48 #include <sys/lockf.h>
49 #include <sys/lock.h>
50 #include <sys/mount.h>
51 #include <sys/namei.h>
52 #include <sys/priv.h>
53 #include <sys/proc.h>
54 #include <sys/rwlock.h>
55 #include <sys/sched.h>
56 #include <sys/smr.h>
57 #include <sys/stat.h>
58 #include <sys/sysctl.h>
59 #include <sys/unistd.h>
60 #include <sys/vnode.h>
61 #include <security/audit/audit.h>
62 #include <security/mac/mac_framework.h>
63
64 #include <vm/vm.h>
65 #include <vm/vm_param.h>
66 #include <vm/vm_object.h>
67 #include <vm/vm_page.h>
68 #include <vm/vm_pager.h>
69 #include <vm/swap_pager.h>
70
71 #include <fs/tmpfs/tmpfs_vnops.h>
72 #include <fs/tmpfs/tmpfs.h>
73
74 SYSCTL_DECL(_vfs_tmpfs);
75 VFS_SMR_DECLARE;
76
77 static volatile int tmpfs_rename_restarts;
78 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD,
79 __DEVOLATILE(int *, &tmpfs_rename_restarts), 0,
80 "Times rename had to restart due to lock contention");
81
82 static int
83 tmpfs_vn_get_ino_alloc(struct mount *mp, void *arg, int lkflags,
84 struct vnode **rvp)
85 {
86
87 return (tmpfs_alloc_vp(mp, arg, lkflags, rvp));
88 }
89
90 static int
91 tmpfs_lookup1(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
92 {
93 struct tmpfs_dirent *de;
94 struct tmpfs_node *dnode, *pnode;
95 struct tmpfs_mount *tm;
96 int error;
97
98 /* Caller assumes responsibility for ensuring access (VEXEC). */
99 dnode = VP_TO_TMPFS_DIR(dvp);
100 *vpp = NULLVP;
101
102 /* We cannot be requesting the parent directory of the root node. */
103 MPASS(IMPLIES(dnode->tn_type == VDIR &&
104 dnode->tn_dir.tn_parent == dnode,
105 !(cnp->cn_flags & ISDOTDOT)));
106
107 TMPFS_ASSERT_LOCKED(dnode);
108 if (dnode->tn_dir.tn_parent == NULL) {
109 error = ENOENT;
110 goto out;
111 }
112 if (cnp->cn_flags & ISDOTDOT) {
113 tm = VFS_TO_TMPFS(dvp->v_mount);
114 pnode = dnode->tn_dir.tn_parent;
115 tmpfs_ref_node(pnode);
116 error = vn_vget_ino_gen(dvp, tmpfs_vn_get_ino_alloc,
117 pnode, cnp->cn_lkflags, vpp);
118 tmpfs_free_node(tm, pnode);
119 if (error != 0)
120 goto out;
121 } else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
122 VREF(dvp);
123 *vpp = dvp;
124 error = 0;
125 } else {
126 de = tmpfs_dir_lookup(dnode, NULL, cnp);
127 if (de != NULL && de->td_node == NULL)
128 cnp->cn_flags |= ISWHITEOUT;
129 if (de == NULL || de->td_node == NULL) {
130 /*
131 * The entry was not found in the directory.
132 * This is OK if we are creating or renaming an
133 * entry and are working on the last component of
134 * the path name.
135 */
136 if ((cnp->cn_flags & ISLASTCN) &&
137 (cnp->cn_nameiop == CREATE || \
138 cnp->cn_nameiop == RENAME ||
139 (cnp->cn_nameiop == DELETE &&
140 cnp->cn_flags & DOWHITEOUT &&
141 cnp->cn_flags & ISWHITEOUT))) {
142 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
143 curthread);
144 if (error != 0)
145 goto out;
146
147 error = EJUSTRETURN;
148 } else
149 error = ENOENT;
150 } else {
151 struct tmpfs_node *tnode;
152
153 /*
154 * The entry was found, so get its associated
155 * tmpfs_node.
156 */
157 tnode = de->td_node;
158
159 /*
160 * If we are not at the last path component and
161 * found a non-directory or non-link entry (which
162 * may itself be pointing to a directory), raise
163 * an error.
164 */
165 if ((tnode->tn_type != VDIR &&
166 tnode->tn_type != VLNK) &&
167 !(cnp->cn_flags & ISLASTCN)) {
168 error = ENOTDIR;
169 goto out;
170 }
171
172 /*
173 * If we are deleting or renaming the entry, keep
174 * track of its tmpfs_dirent so that it can be
175 * easily deleted later.
176 */
177 if ((cnp->cn_flags & ISLASTCN) &&
178 (cnp->cn_nameiop == DELETE ||
179 cnp->cn_nameiop == RENAME)) {
180 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
181 curthread);
182 if (error != 0)
183 goto out;
184
185 /* Allocate a new vnode on the matching entry. */
186 error = tmpfs_alloc_vp(dvp->v_mount, tnode,
187 cnp->cn_lkflags, vpp);
188 if (error != 0)
189 goto out;
190
191 if ((dnode->tn_mode & S_ISTXT) &&
192 VOP_ACCESS(dvp, VADMIN, cnp->cn_cred,
193 curthread) && VOP_ACCESS(*vpp, VADMIN,
194 cnp->cn_cred, curthread)) {
195 error = EPERM;
196 vput(*vpp);
197 *vpp = NULL;
198 goto out;
199 }
200 } else {
201 error = tmpfs_alloc_vp(dvp->v_mount, tnode,
202 cnp->cn_lkflags, vpp);
203 if (error != 0)
204 goto out;
205 }
206 }
207 }
208
209 /*
210 * Store the result of this lookup in the cache. Avoid this if the
211 * request was for creation, as it does not improve timings on
212 * emprical tests.
213 */
214 if ((cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
215 cache_enter(dvp, *vpp, cnp);
216
217 out:
218 /*
219 * If there were no errors, *vpp cannot be null and it must be
220 * locked.
221 */
222 MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp)));
223
224 return (error);
225 }
226
227 static int
228 tmpfs_cached_lookup(struct vop_cachedlookup_args *v)
229 {
230
231 return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp));
232 }
233
234 static int
235 tmpfs_lookup(struct vop_lookup_args *v)
236 {
237 struct vnode *dvp = v->a_dvp;
238 struct vnode **vpp = v->a_vpp;
239 struct componentname *cnp = v->a_cnp;
240 int error;
241
242 /* Check accessibility of requested node as a first step. */
243 error = vn_dir_check_exec(dvp, cnp);
244 if (error != 0)
245 return (error);
246
247 return (tmpfs_lookup1(dvp, vpp, cnp));
248 }
249
250 static int
251 tmpfs_create(struct vop_create_args *v)
252 {
253 struct vnode *dvp = v->a_dvp;
254 struct vnode **vpp = v->a_vpp;
255 struct componentname *cnp = v->a_cnp;
256 struct vattr *vap = v->a_vap;
257 int error;
258
259 MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
260
261 error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
262 if (error == 0 && (cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
263 cache_enter(dvp, *vpp, cnp);
264 return (error);
265 }
266
267 static int
268 tmpfs_mknod(struct vop_mknod_args *v)
269 {
270 struct vnode *dvp = v->a_dvp;
271 struct vnode **vpp = v->a_vpp;
272 struct componentname *cnp = v->a_cnp;
273 struct vattr *vap = v->a_vap;
274
275 if (vap->va_type != VBLK && vap->va_type != VCHR &&
276 vap->va_type != VFIFO)
277 return (EINVAL);
278
279 return (tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL));
280 }
281
282 struct fileops tmpfs_fnops;
283
284 static int
285 tmpfs_open(struct vop_open_args *v)
286 {
287 struct vnode *vp;
288 struct tmpfs_node *node;
289 struct file *fp;
290 int error, mode;
291
292 vp = v->a_vp;
293 mode = v->a_mode;
294 node = VP_TO_TMPFS_NODE(vp);
295
296 /*
297 * The file is still active but all its names have been removed
298 * (e.g. by a "rmdir $(pwd)"). It cannot be opened any more as
299 * it is about to die.
300 */
301 if (node->tn_links < 1)
302 return (ENOENT);
303
304 /* If the file is marked append-only, deny write requests. */
305 if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
306 error = EPERM;
307 else {
308 error = 0;
309 /* For regular files, the call below is nop. */
310 KASSERT(vp->v_type != VREG || (node->tn_reg.tn_aobj->flags &
311 OBJ_DEAD) == 0, ("dead object"));
312 vnode_create_vobject(vp, node->tn_size, v->a_td);
313 }
314
315 fp = v->a_fp;
316 MPASS(fp == NULL || fp->f_data == NULL);
317 if (error == 0 && fp != NULL && vp->v_type == VREG) {
318 tmpfs_ref_node(node);
319 finit_vnode(fp, mode, node, &tmpfs_fnops);
320 }
321
322 return (error);
323 }
324
325 static int
326 tmpfs_close(struct vop_close_args *v)
327 {
328 struct vnode *vp = v->a_vp;
329
330 /* Update node times. */
331 tmpfs_update(vp);
332
333 return (0);
334 }
335
336 int
337 tmpfs_fo_close(struct file *fp, struct thread *td)
338 {
339 struct tmpfs_node *node;
340
341 node = fp->f_data;
342 if (node != NULL) {
343 MPASS(node->tn_type == VREG);
344 tmpfs_free_node(node->tn_reg.tn_tmp, node);
345 }
346 return (vnops.fo_close(fp, td));
347 }
348
349 /*
350 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
351 * the comment above cache_fplookup for details.
352 */
353 int
354 tmpfs_fplookup_vexec(struct vop_fplookup_vexec_args *v)
355 {
356 struct vnode *vp;
357 struct tmpfs_node *node;
358 struct ucred *cred;
359 mode_t all_x, mode;
360
361 vp = v->a_vp;
362 node = VP_TO_TMPFS_NODE_SMR(vp);
363 if (__predict_false(node == NULL))
364 return (EAGAIN);
365
366 all_x = S_IXUSR | S_IXGRP | S_IXOTH;
367 mode = atomic_load_short(&node->tn_mode);
368 if (__predict_true((mode & all_x) == all_x))
369 return (0);
370
371 cred = v->a_cred;
372 return (vaccess_vexec_smr(mode, node->tn_uid, node->tn_gid, cred));
373 }
374
375 int
376 tmpfs_access(struct vop_access_args *v)
377 {
378 struct vnode *vp = v->a_vp;
379 accmode_t accmode = v->a_accmode;
380 struct ucred *cred = v->a_cred;
381 mode_t all_x = S_IXUSR | S_IXGRP | S_IXOTH;
382 int error;
383 struct tmpfs_node *node;
384
385 MPASS(VOP_ISLOCKED(vp));
386
387 node = VP_TO_TMPFS_NODE(vp);
388
389 /*
390 * Common case path lookup.
391 */
392 if (__predict_true(accmode == VEXEC && (node->tn_mode & all_x) == all_x))
393 return (0);
394
395 switch (vp->v_type) {
396 case VDIR:
397 /* FALLTHROUGH */
398 case VLNK:
399 /* FALLTHROUGH */
400 case VREG:
401 if (accmode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
402 error = EROFS;
403 goto out;
404 }
405 break;
406
407 case VBLK:
408 /* FALLTHROUGH */
409 case VCHR:
410 /* FALLTHROUGH */
411 case VSOCK:
412 /* FALLTHROUGH */
413 case VFIFO:
414 break;
415
416 default:
417 error = EINVAL;
418 goto out;
419 }
420
421 if (accmode & VWRITE && node->tn_flags & IMMUTABLE) {
422 error = EPERM;
423 goto out;
424 }
425
426 error = vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid,
427 accmode, cred);
428
429 out:
430 MPASS(VOP_ISLOCKED(vp));
431
432 return (error);
433 }
434
435 int
436 tmpfs_stat(struct vop_stat_args *v)
437 {
438 struct vnode *vp = v->a_vp;
439 struct stat *sb = v->a_sb;
440 struct tmpfs_node *node;
441 int error;
442
443 node = VP_TO_TMPFS_NODE(vp);
444
445 tmpfs_update_getattr(vp);
446
447 error = vop_stat_helper_pre(v);
448 if (__predict_false(error))
449 return (error);
450
451 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
452 sb->st_ino = node->tn_id;
453 sb->st_mode = node->tn_mode | VTTOIF(vp->v_type);
454 sb->st_nlink = node->tn_links;
455 sb->st_uid = node->tn_uid;
456 sb->st_gid = node->tn_gid;
457 sb->st_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
458 node->tn_rdev : NODEV;
459 sb->st_size = node->tn_size;
460 sb->st_atim.tv_sec = node->tn_atime.tv_sec;
461 sb->st_atim.tv_nsec = node->tn_atime.tv_nsec;
462 sb->st_mtim.tv_sec = node->tn_mtime.tv_sec;
463 sb->st_mtim.tv_nsec = node->tn_mtime.tv_nsec;
464 sb->st_ctim.tv_sec = node->tn_ctime.tv_sec;
465 sb->st_ctim.tv_nsec = node->tn_ctime.tv_nsec;
466 sb->st_birthtim.tv_sec = node->tn_birthtime.tv_sec;
467 sb->st_birthtim.tv_nsec = node->tn_birthtime.tv_nsec;
468 sb->st_blksize = PAGE_SIZE;
469 sb->st_flags = node->tn_flags;
470 sb->st_gen = node->tn_gen;
471 if (vp->v_type == VREG) {
472 #ifdef __ILP32__
473 vm_object_t obj = node->tn_reg.tn_aobj;
474
475 /* Handle torn read */
476 VM_OBJECT_RLOCK(obj);
477 #endif
478 sb->st_blocks = ptoa(node->tn_reg.tn_pages);
479 #ifdef __ILP32__
480 VM_OBJECT_RUNLOCK(obj);
481 #endif
482 } else {
483 sb->st_blocks = node->tn_size;
484 }
485 sb->st_blocks /= S_BLKSIZE;
486 return (vop_stat_helper_post(v, error));
487 }
488
489 int
490 tmpfs_getattr(struct vop_getattr_args *v)
491 {
492 struct vnode *vp = v->a_vp;
493 struct vattr *vap = v->a_vap;
494 struct tmpfs_node *node;
495
496 node = VP_TO_TMPFS_NODE(vp);
497
498 tmpfs_update_getattr(vp);
499
500 vap->va_type = vp->v_type;
501 vap->va_mode = node->tn_mode;
502 vap->va_nlink = node->tn_links;
503 vap->va_uid = node->tn_uid;
504 vap->va_gid = node->tn_gid;
505 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
506 vap->va_fileid = node->tn_id;
507 vap->va_size = node->tn_size;
508 vap->va_blocksize = PAGE_SIZE;
509 vap->va_atime = node->tn_atime;
510 vap->va_mtime = node->tn_mtime;
511 vap->va_ctime = node->tn_ctime;
512 vap->va_birthtime = node->tn_birthtime;
513 vap->va_gen = node->tn_gen;
514 vap->va_flags = node->tn_flags;
515 vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
516 node->tn_rdev : NODEV;
517 if (vp->v_type == VREG) {
518 #ifdef __ILP32__
519 vm_object_t obj = node->tn_reg.tn_aobj;
520
521 VM_OBJECT_RLOCK(obj);
522 #endif
523 vap->va_bytes = ptoa(node->tn_reg.tn_pages);
524 #ifdef __ILP32__
525 VM_OBJECT_RUNLOCK(obj);
526 #endif
527 } else {
528 vap->va_bytes = node->tn_size;
529 }
530 vap->va_filerev = 0;
531
532 return (0);
533 }
534
535 int
536 tmpfs_setattr(struct vop_setattr_args *v)
537 {
538 struct vnode *vp = v->a_vp;
539 struct vattr *vap = v->a_vap;
540 struct ucred *cred = v->a_cred;
541 struct thread *td = curthread;
542
543 int error;
544
545 MPASS(VOP_ISLOCKED(vp));
546 ASSERT_VOP_IN_SEQC(vp);
547
548 error = 0;
549
550 /* Abort if any unsettable attribute is given. */
551 if (vap->va_type != VNON ||
552 vap->va_nlink != VNOVAL ||
553 vap->va_fsid != VNOVAL ||
554 vap->va_fileid != VNOVAL ||
555 vap->va_blocksize != VNOVAL ||
556 vap->va_gen != VNOVAL ||
557 vap->va_rdev != VNOVAL ||
558 vap->va_bytes != VNOVAL)
559 error = EINVAL;
560
561 if (error == 0 && (vap->va_flags != VNOVAL))
562 error = tmpfs_chflags(vp, vap->va_flags, cred, td);
563
564 if (error == 0 && (vap->va_size != VNOVAL))
565 error = tmpfs_chsize(vp, vap->va_size, cred, td);
566
567 if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
568 error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
569
570 if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
571 error = tmpfs_chmod(vp, vap->va_mode, cred, td);
572
573 if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
574 vap->va_atime.tv_nsec != VNOVAL) ||
575 (vap->va_mtime.tv_sec != VNOVAL &&
576 vap->va_mtime.tv_nsec != VNOVAL) ||
577 (vap->va_birthtime.tv_sec != VNOVAL &&
578 vap->va_birthtime.tv_nsec != VNOVAL)))
579 error = tmpfs_chtimes(vp, vap, cred, td);
580
581 /*
582 * Update the node times. We give preference to the error codes
583 * generated by this function rather than the ones that may arise
584 * from tmpfs_update.
585 */
586 tmpfs_update(vp);
587
588 MPASS(VOP_ISLOCKED(vp));
589
590 return (error);
591 }
592
593 static int
594 tmpfs_read(struct vop_read_args *v)
595 {
596 struct vnode *vp;
597 struct uio *uio;
598 struct tmpfs_node *node;
599
600 vp = v->a_vp;
601 if (vp->v_type != VREG)
602 return (EISDIR);
603 uio = v->a_uio;
604 if (uio->uio_offset < 0)
605 return (EINVAL);
606 node = VP_TO_TMPFS_NODE(vp);
607 tmpfs_set_accessed(VFS_TO_TMPFS(vp->v_mount), node);
608 return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio));
609 }
610
611 static int
612 tmpfs_read_pgcache(struct vop_read_pgcache_args *v)
613 {
614 struct vnode *vp;
615 struct tmpfs_node *node;
616 vm_object_t object;
617 off_t size;
618 int error;
619
620 vp = v->a_vp;
621 VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) != 0, vp);
622
623 if (v->a_uio->uio_offset < 0)
624 return (EINVAL);
625
626 error = EJUSTRETURN;
627 vfs_smr_enter();
628
629 node = VP_TO_TMPFS_NODE_SMR(vp);
630 if (node == NULL)
631 goto out_smr;
632 MPASS(node->tn_type == VREG);
633 MPASS(node->tn_refcount >= 1);
634 object = node->tn_reg.tn_aobj;
635 if (object == NULL)
636 goto out_smr;
637
638 MPASS(object->type == tmpfs_pager_type);
639 MPASS((object->flags & (OBJ_ANON | OBJ_DEAD | OBJ_SWAP)) ==
640 OBJ_SWAP);
641 if (!VN_IS_DOOMED(vp)) {
642 /* size cannot become shorter due to rangelock. */
643 size = node->tn_size;
644 tmpfs_set_accessed(node->tn_reg.tn_tmp, node);
645 vfs_smr_exit();
646 error = uiomove_object(object, size, v->a_uio);
647 return (error);
648 }
649 out_smr:
650 vfs_smr_exit();
651 return (error);
652 }
653
654 static int
655 tmpfs_write(struct vop_write_args *v)
656 {
657 struct vnode *vp;
658 struct uio *uio;
659 struct tmpfs_node *node;
660 off_t oldsize;
661 ssize_t r;
662 int error, ioflag;
663 mode_t newmode;
664
665 vp = v->a_vp;
666 uio = v->a_uio;
667 ioflag = v->a_ioflag;
668 error = 0;
669 node = VP_TO_TMPFS_NODE(vp);
670 oldsize = node->tn_size;
671
672 if (uio->uio_offset < 0 || vp->v_type != VREG)
673 return (EINVAL);
674 if (uio->uio_resid == 0)
675 return (0);
676 if (ioflag & IO_APPEND)
677 uio->uio_offset = node->tn_size;
678 error = vn_rlimit_fsizex(vp, uio, VFS_TO_TMPFS(vp->v_mount)->
679 tm_maxfilesize, &r, uio->uio_td);
680 if (error != 0) {
681 vn_rlimit_fsizex_res(uio, r);
682 return (error);
683 }
684
685 if (uio->uio_offset + uio->uio_resid > node->tn_size) {
686 error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid,
687 FALSE);
688 if (error != 0)
689 goto out;
690 }
691
692 error = uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio);
693 node->tn_status |= TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED;
694 node->tn_accessed = true;
695 if (node->tn_mode & (S_ISUID | S_ISGID)) {
696 if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID)) {
697 newmode = node->tn_mode & ~(S_ISUID | S_ISGID);
698 vn_seqc_write_begin(vp);
699 atomic_store_short(&node->tn_mode, newmode);
700 vn_seqc_write_end(vp);
701 }
702 }
703 if (error != 0)
704 (void)tmpfs_reg_resize(vp, oldsize, TRUE);
705
706 out:
707 MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
708 MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
709
710 vn_rlimit_fsizex_res(uio, r);
711 return (error);
712 }
713
714 static int
715 tmpfs_deallocate(struct vop_deallocate_args *v)
716 {
717 return (tmpfs_reg_punch_hole(v->a_vp, v->a_offset, v->a_len));
718 }
719
720 static int
721 tmpfs_fsync(struct vop_fsync_args *v)
722 {
723 struct vnode *vp = v->a_vp;
724
725 MPASS(VOP_ISLOCKED(vp));
726
727 tmpfs_check_mtime(vp);
728 tmpfs_update(vp);
729
730 return (0);
731 }
732
733 static int
734 tmpfs_remove(struct vop_remove_args *v)
735 {
736 struct vnode *dvp = v->a_dvp;
737 struct vnode *vp = v->a_vp;
738
739 int error;
740 struct tmpfs_dirent *de;
741 struct tmpfs_mount *tmp;
742 struct tmpfs_node *dnode;
743 struct tmpfs_node *node;
744
745 MPASS(VOP_ISLOCKED(dvp));
746 MPASS(VOP_ISLOCKED(vp));
747
748 if (vp->v_type == VDIR) {
749 error = EISDIR;
750 goto out;
751 }
752
753 dnode = VP_TO_TMPFS_DIR(dvp);
754 node = VP_TO_TMPFS_NODE(vp);
755 tmp = VFS_TO_TMPFS(vp->v_mount);
756 de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
757 MPASS(de != NULL);
758
759 /* Files marked as immutable or append-only cannot be deleted. */
760 if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
761 (dnode->tn_flags & APPEND)) {
762 error = EPERM;
763 goto out;
764 }
765
766 /* Remove the entry from the directory; as it is a file, we do not
767 * have to change the number of hard links of the directory. */
768 tmpfs_dir_detach(dvp, de);
769 if (v->a_cnp->cn_flags & DOWHITEOUT)
770 tmpfs_dir_whiteout_add(dvp, v->a_cnp);
771
772 /* Free the directory entry we just deleted. Note that the node
773 * referred by it will not be removed until the vnode is really
774 * reclaimed. */
775 tmpfs_free_dirent(tmp, de);
776
777 node->tn_status |= TMPFS_NODE_CHANGED;
778 node->tn_accessed = true;
779 error = 0;
780
781 out:
782 return (error);
783 }
784
785 static int
786 tmpfs_link(struct vop_link_args *v)
787 {
788 struct vnode *dvp = v->a_tdvp;
789 struct vnode *vp = v->a_vp;
790 struct componentname *cnp = v->a_cnp;
791
792 int error;
793 struct tmpfs_dirent *de;
794 struct tmpfs_node *node;
795
796 MPASS(VOP_ISLOCKED(dvp));
797 MPASS(dvp != vp); /* XXX When can this be false? */
798 node = VP_TO_TMPFS_NODE(vp);
799
800 /* Ensure that we do not overflow the maximum number of links imposed
801 * by the system. */
802 MPASS(node->tn_links <= TMPFS_LINK_MAX);
803 if (node->tn_links == TMPFS_LINK_MAX) {
804 error = EMLINK;
805 goto out;
806 }
807
808 /* We cannot create links of files marked immutable or append-only. */
809 if (node->tn_flags & (IMMUTABLE | APPEND)) {
810 error = EPERM;
811 goto out;
812 }
813
814 /* Allocate a new directory entry to represent the node. */
815 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
816 cnp->cn_nameptr, cnp->cn_namelen, &de);
817 if (error != 0)
818 goto out;
819
820 /* Insert the new directory entry into the appropriate directory. */
821 if (cnp->cn_flags & ISWHITEOUT)
822 tmpfs_dir_whiteout_remove(dvp, cnp);
823 tmpfs_dir_attach(dvp, de);
824
825 /* vp link count has changed, so update node times. */
826 node->tn_status |= TMPFS_NODE_CHANGED;
827 tmpfs_update(vp);
828
829 error = 0;
830
831 out:
832 return (error);
833 }
834
835 /*
836 * We acquire all but fdvp locks using non-blocking acquisitions. If we
837 * fail to acquire any lock in the path we will drop all held locks,
838 * acquire the new lock in a blocking fashion, and then release it and
839 * restart the rename. This acquire/release step ensures that we do not
840 * spin on a lock waiting for release. On error release all vnode locks
841 * and decrement references the way tmpfs_rename() would do.
842 */
843 static int
844 tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp,
845 struct vnode *tdvp, struct vnode **tvpp,
846 struct componentname *fcnp, struct componentname *tcnp)
847 {
848 struct vnode *nvp;
849 struct mount *mp;
850 struct tmpfs_dirent *de;
851 int error, restarts = 0;
852
853 VOP_UNLOCK(tdvp);
854 if (*tvpp != NULL && *tvpp != tdvp)
855 VOP_UNLOCK(*tvpp);
856 mp = fdvp->v_mount;
857
858 relock:
859 restarts += 1;
860 error = vn_lock(fdvp, LK_EXCLUSIVE);
861 if (error)
862 goto releout;
863 if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
864 VOP_UNLOCK(fdvp);
865 error = vn_lock(tdvp, LK_EXCLUSIVE);
866 if (error)
867 goto releout;
868 VOP_UNLOCK(tdvp);
869 goto relock;
870 }
871 /*
872 * Re-resolve fvp to be certain it still exists and fetch the
873 * correct vnode.
874 */
875 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp);
876 if (de == NULL) {
877 VOP_UNLOCK(fdvp);
878 VOP_UNLOCK(tdvp);
879 if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
880 (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
881 error = EINVAL;
882 else
883 error = ENOENT;
884 goto releout;
885 }
886 error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
887 if (error != 0) {
888 VOP_UNLOCK(fdvp);
889 VOP_UNLOCK(tdvp);
890 if (error != EBUSY)
891 goto releout;
892 error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp);
893 if (error != 0)
894 goto releout;
895 VOP_UNLOCK(nvp);
896 /*
897 * Concurrent rename race.
898 */
899 if (nvp == tdvp) {
900 vrele(nvp);
901 error = EINVAL;
902 goto releout;
903 }
904 vrele(*fvpp);
905 *fvpp = nvp;
906 goto relock;
907 }
908 vrele(*fvpp);
909 *fvpp = nvp;
910 VOP_UNLOCK(*fvpp);
911 /*
912 * Re-resolve tvp and acquire the vnode lock if present.
913 */
914 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp);
915 /*
916 * If tvp disappeared we just carry on.
917 */
918 if (de == NULL && *tvpp != NULL) {
919 vrele(*tvpp);
920 *tvpp = NULL;
921 }
922 /*
923 * Get the tvp ino if the lookup succeeded. We may have to restart
924 * if the non-blocking acquire fails.
925 */
926 if (de != NULL) {
927 nvp = NULL;
928 error = tmpfs_alloc_vp(mp, de->td_node,
929 LK_EXCLUSIVE | LK_NOWAIT, &nvp);
930 if (*tvpp != NULL)
931 vrele(*tvpp);
932 *tvpp = nvp;
933 if (error != 0) {
934 VOP_UNLOCK(fdvp);
935 VOP_UNLOCK(tdvp);
936 if (error != EBUSY)
937 goto releout;
938 error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE,
939 &nvp);
940 if (error != 0)
941 goto releout;
942 VOP_UNLOCK(nvp);
943 /*
944 * fdvp contains fvp, thus tvp (=fdvp) is not empty.
945 */
946 if (nvp == fdvp) {
947 error = ENOTEMPTY;
948 goto releout;
949 }
950 goto relock;
951 }
952 }
953 tmpfs_rename_restarts += restarts;
954
955 return (0);
956
957 releout:
958 vrele(fdvp);
959 vrele(*fvpp);
960 vrele(tdvp);
961 if (*tvpp != NULL)
962 vrele(*tvpp);
963 tmpfs_rename_restarts += restarts;
964
965 return (error);
966 }
967
968 static int
969 tmpfs_rename(struct vop_rename_args *v)
970 {
971 struct vnode *fdvp = v->a_fdvp;
972 struct vnode *fvp = v->a_fvp;
973 struct componentname *fcnp = v->a_fcnp;
974 struct vnode *tdvp = v->a_tdvp;
975 struct vnode *tvp = v->a_tvp;
976 struct componentname *tcnp = v->a_tcnp;
977 char *newname;
978 struct tmpfs_dirent *de;
979 struct tmpfs_mount *tmp;
980 struct tmpfs_node *fdnode;
981 struct tmpfs_node *fnode;
982 struct tmpfs_node *tnode;
983 struct tmpfs_node *tdnode;
984 int error;
985 bool want_seqc_end;
986
987 MPASS(VOP_ISLOCKED(tdvp));
988 MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp)));
989
990 want_seqc_end = false;
991
992 /*
993 * Disallow cross-device renames.
994 * XXX Why isn't this done by the caller?
995 */
996 if (fvp->v_mount != tdvp->v_mount ||
997 (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
998 error = EXDEV;
999 goto out;
1000 }
1001
1002 /* If source and target are the same file, there is nothing to do. */
1003 if (fvp == tvp) {
1004 error = 0;
1005 goto out;
1006 }
1007
1008 /*
1009 * If we need to move the directory between entries, lock the
1010 * source so that we can safely operate on it.
1011 */
1012 if (fdvp != tdvp && fdvp != tvp) {
1013 if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
1014 error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp,
1015 fcnp, tcnp);
1016 if (error != 0)
1017 return (error);
1018 ASSERT_VOP_ELOCKED(fdvp,
1019 "tmpfs_rename: fdvp not locked");
1020 ASSERT_VOP_ELOCKED(tdvp,
1021 "tmpfs_rename: tdvp not locked");
1022 if (tvp != NULL)
1023 ASSERT_VOP_ELOCKED(tvp,
1024 "tmpfs_rename: tvp not locked");
1025 if (fvp == tvp) {
1026 error = 0;
1027 goto out_locked;
1028 }
1029 }
1030 }
1031
1032 if (tvp != NULL)
1033 vn_seqc_write_begin(tvp);
1034 vn_seqc_write_begin(tdvp);
1035 vn_seqc_write_begin(fvp);
1036 vn_seqc_write_begin(fdvp);
1037 want_seqc_end = true;
1038
1039 tmp = VFS_TO_TMPFS(tdvp->v_mount);
1040 tdnode = VP_TO_TMPFS_DIR(tdvp);
1041 tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
1042 fdnode = VP_TO_TMPFS_DIR(fdvp);
1043 fnode = VP_TO_TMPFS_NODE(fvp);
1044 de = tmpfs_dir_lookup(fdnode, fnode, fcnp);
1045
1046 /*
1047 * Entry can disappear before we lock fdvp,
1048 * also avoid manipulating '.' and '..' entries.
1049 */
1050 if (de == NULL) {
1051 if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
1052 (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
1053 error = EINVAL;
1054 else
1055 error = ENOENT;
1056 goto out_locked;
1057 }
1058 MPASS(de->td_node == fnode);
1059
1060 /*
1061 * If re-naming a directory to another preexisting directory
1062 * ensure that the target directory is empty so that its
1063 * removal causes no side effects.
1064 * Kern_rename guarantees the destination to be a directory
1065 * if the source is one.
1066 */
1067 if (tvp != NULL) {
1068 MPASS(tnode != NULL);
1069
1070 if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1071 (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
1072 error = EPERM;
1073 goto out_locked;
1074 }
1075
1076 if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
1077 if (tnode->tn_size > 0) {
1078 error = ENOTEMPTY;
1079 goto out_locked;
1080 }
1081 } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
1082 error = ENOTDIR;
1083 goto out_locked;
1084 } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
1085 error = EISDIR;
1086 goto out_locked;
1087 } else {
1088 MPASS(fnode->tn_type != VDIR &&
1089 tnode->tn_type != VDIR);
1090 }
1091 }
1092
1093 if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
1094 || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1095 error = EPERM;
1096 goto out_locked;
1097 }
1098
1099 /*
1100 * Ensure that we have enough memory to hold the new name, if it
1101 * has to be changed.
1102 */
1103 if (fcnp->cn_namelen != tcnp->cn_namelen ||
1104 bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
1105 newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK);
1106 } else
1107 newname = NULL;
1108
1109 /*
1110 * If the node is being moved to another directory, we have to do
1111 * the move.
1112 */
1113 if (fdnode != tdnode) {
1114 /*
1115 * In case we are moving a directory, we have to adjust its
1116 * parent to point to the new parent.
1117 */
1118 if (de->td_node->tn_type == VDIR) {
1119 struct tmpfs_node *n;
1120
1121 /*
1122 * Ensure the target directory is not a child of the
1123 * directory being moved. Otherwise, we'd end up
1124 * with stale nodes.
1125 */
1126 n = tdnode;
1127 /*
1128 * TMPFS_LOCK guaranties that no nodes are freed while
1129 * traversing the list. Nodes can only be marked as
1130 * removed: tn_parent == NULL.
1131 */
1132 TMPFS_LOCK(tmp);
1133 TMPFS_NODE_LOCK(n);
1134 while (n != n->tn_dir.tn_parent) {
1135 struct tmpfs_node *parent;
1136
1137 if (n == fnode) {
1138 TMPFS_NODE_UNLOCK(n);
1139 TMPFS_UNLOCK(tmp);
1140 error = EINVAL;
1141 if (newname != NULL)
1142 free(newname, M_TMPFSNAME);
1143 goto out_locked;
1144 }
1145 parent = n->tn_dir.tn_parent;
1146 TMPFS_NODE_UNLOCK(n);
1147 if (parent == NULL) {
1148 n = NULL;
1149 break;
1150 }
1151 TMPFS_NODE_LOCK(parent);
1152 if (parent->tn_dir.tn_parent == NULL) {
1153 TMPFS_NODE_UNLOCK(parent);
1154 n = NULL;
1155 break;
1156 }
1157 n = parent;
1158 }
1159 TMPFS_UNLOCK(tmp);
1160 if (n == NULL) {
1161 error = EINVAL;
1162 if (newname != NULL)
1163 free(newname, M_TMPFSNAME);
1164 goto out_locked;
1165 }
1166 TMPFS_NODE_UNLOCK(n);
1167
1168 /* Adjust the parent pointer. */
1169 TMPFS_VALIDATE_DIR(fnode);
1170 TMPFS_NODE_LOCK(de->td_node);
1171 de->td_node->tn_dir.tn_parent = tdnode;
1172 TMPFS_NODE_UNLOCK(de->td_node);
1173
1174 /*
1175 * As a result of changing the target of the '..'
1176 * entry, the link count of the source and target
1177 * directories has to be adjusted.
1178 */
1179 TMPFS_NODE_LOCK(tdnode);
1180 TMPFS_ASSERT_LOCKED(tdnode);
1181 tdnode->tn_links++;
1182 TMPFS_NODE_UNLOCK(tdnode);
1183
1184 TMPFS_NODE_LOCK(fdnode);
1185 TMPFS_ASSERT_LOCKED(fdnode);
1186 fdnode->tn_links--;
1187 TMPFS_NODE_UNLOCK(fdnode);
1188 }
1189 }
1190
1191 /*
1192 * Do the move: just remove the entry from the source directory
1193 * and insert it into the target one.
1194 */
1195 tmpfs_dir_detach(fdvp, de);
1196
1197 if (fcnp->cn_flags & DOWHITEOUT)
1198 tmpfs_dir_whiteout_add(fdvp, fcnp);
1199 if (tcnp->cn_flags & ISWHITEOUT)
1200 tmpfs_dir_whiteout_remove(tdvp, tcnp);
1201
1202 /*
1203 * If the name has changed, we need to make it effective by changing
1204 * it in the directory entry.
1205 */
1206 if (newname != NULL) {
1207 MPASS(tcnp->cn_namelen <= MAXNAMLEN);
1208
1209 free(de->ud.td_name, M_TMPFSNAME);
1210 de->ud.td_name = newname;
1211 tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen);
1212
1213 fnode->tn_status |= TMPFS_NODE_CHANGED;
1214 tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1215 }
1216
1217 /*
1218 * If we are overwriting an entry, we have to remove the old one
1219 * from the target directory.
1220 */
1221 if (tvp != NULL) {
1222 struct tmpfs_dirent *tde;
1223
1224 /* Remove the old entry from the target directory. */
1225 tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
1226 tmpfs_dir_detach(tdvp, tde);
1227
1228 /*
1229 * Free the directory entry we just deleted. Note that the
1230 * node referred by it will not be removed until the vnode is
1231 * really reclaimed.
1232 */
1233 tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1234 }
1235
1236 tmpfs_dir_attach(tdvp, de);
1237
1238 if (tmpfs_use_nc(fvp)) {
1239 cache_vop_rename(fdvp, fvp, tdvp, tvp, fcnp, tcnp);
1240 }
1241
1242 error = 0;
1243
1244 out_locked:
1245 if (fdvp != tdvp && fdvp != tvp)
1246 VOP_UNLOCK(fdvp);
1247
1248 out:
1249 if (want_seqc_end) {
1250 if (tvp != NULL)
1251 vn_seqc_write_end(tvp);
1252 vn_seqc_write_end(tdvp);
1253 vn_seqc_write_end(fvp);
1254 vn_seqc_write_end(fdvp);
1255 }
1256
1257 /*
1258 * Release target nodes.
1259 * XXX: I don't understand when tdvp can be the same as tvp, but
1260 * other code takes care of this...
1261 */
1262 if (tdvp == tvp)
1263 vrele(tdvp);
1264 else
1265 vput(tdvp);
1266 if (tvp != NULL)
1267 vput(tvp);
1268
1269 /* Release source nodes. */
1270 vrele(fdvp);
1271 vrele(fvp);
1272
1273 return (error);
1274 }
1275
1276 static int
1277 tmpfs_mkdir(struct vop_mkdir_args *v)
1278 {
1279 struct vnode *dvp = v->a_dvp;
1280 struct vnode **vpp = v->a_vpp;
1281 struct componentname *cnp = v->a_cnp;
1282 struct vattr *vap = v->a_vap;
1283
1284 MPASS(vap->va_type == VDIR);
1285
1286 return (tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL));
1287 }
1288
1289 static int
1290 tmpfs_rmdir(struct vop_rmdir_args *v)
1291 {
1292 struct vnode *dvp = v->a_dvp;
1293 struct vnode *vp = v->a_vp;
1294
1295 int error;
1296 struct tmpfs_dirent *de;
1297 struct tmpfs_mount *tmp;
1298 struct tmpfs_node *dnode;
1299 struct tmpfs_node *node;
1300
1301 MPASS(VOP_ISLOCKED(dvp));
1302 MPASS(VOP_ISLOCKED(vp));
1303
1304 tmp = VFS_TO_TMPFS(dvp->v_mount);
1305 dnode = VP_TO_TMPFS_DIR(dvp);
1306 node = VP_TO_TMPFS_DIR(vp);
1307
1308 /* Directories with more than two entries ('.' and '..') cannot be
1309 * removed. */
1310 if (node->tn_size > 0) {
1311 error = ENOTEMPTY;
1312 goto out;
1313 }
1314
1315 if ((dnode->tn_flags & APPEND)
1316 || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1317 error = EPERM;
1318 goto out;
1319 }
1320
1321 /* This invariant holds only if we are not trying to remove "..".
1322 * We checked for that above so this is safe now. */
1323 MPASS(node->tn_dir.tn_parent == dnode);
1324
1325 /* Get the directory entry associated with node (vp). This was
1326 * filled by tmpfs_lookup while looking up the entry. */
1327 de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
1328 MPASS(TMPFS_DIRENT_MATCHES(de,
1329 v->a_cnp->cn_nameptr,
1330 v->a_cnp->cn_namelen));
1331
1332 /* Check flags to see if we are allowed to remove the directory. */
1333 if ((dnode->tn_flags & APPEND) != 0 ||
1334 (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) != 0) {
1335 error = EPERM;
1336 goto out;
1337 }
1338
1339 /* Detach the directory entry from the directory (dnode). */
1340 tmpfs_dir_detach(dvp, de);
1341 if (v->a_cnp->cn_flags & DOWHITEOUT)
1342 tmpfs_dir_whiteout_add(dvp, v->a_cnp);
1343
1344 /* No vnode should be allocated for this entry from this point */
1345 TMPFS_NODE_LOCK(node);
1346 node->tn_links--;
1347 node->tn_dir.tn_parent = NULL;
1348 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1349 node->tn_accessed = true;
1350
1351 TMPFS_NODE_UNLOCK(node);
1352
1353 TMPFS_NODE_LOCK(dnode);
1354 dnode->tn_links--;
1355 dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1356 dnode->tn_accessed = true;
1357 TMPFS_NODE_UNLOCK(dnode);
1358
1359 if (tmpfs_use_nc(dvp)) {
1360 cache_vop_rmdir(dvp, vp);
1361 }
1362
1363 /* Free the directory entry we just deleted. Note that the node
1364 * referred by it will not be removed until the vnode is really
1365 * reclaimed. */
1366 tmpfs_free_dirent(tmp, de);
1367
1368 /* Release the deleted vnode (will destroy the node, notify
1369 * interested parties and clean it from the cache). */
1370
1371 dnode->tn_status |= TMPFS_NODE_CHANGED;
1372 tmpfs_update(dvp);
1373
1374 error = 0;
1375
1376 out:
1377 return (error);
1378 }
1379
1380 static int
1381 tmpfs_symlink(struct vop_symlink_args *v)
1382 {
1383 struct vnode *dvp = v->a_dvp;
1384 struct vnode **vpp = v->a_vpp;
1385 struct componentname *cnp = v->a_cnp;
1386 struct vattr *vap = v->a_vap;
1387 const char *target = v->a_target;
1388
1389 #ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
1390 MPASS(vap->va_type == VLNK);
1391 #else
1392 vap->va_type = VLNK;
1393 #endif
1394
1395 return (tmpfs_alloc_file(dvp, vpp, vap, cnp, target));
1396 }
1397
1398 static int
1399 tmpfs_readdir(struct vop_readdir_args *va)
1400 {
1401 struct vnode *vp;
1402 struct uio *uio;
1403 struct tmpfs_mount *tm;
1404 struct tmpfs_node *node;
1405 uint64_t **cookies;
1406 int *eofflag, *ncookies;
1407 ssize_t startresid;
1408 int error, maxcookies;
1409
1410 vp = va->a_vp;
1411 uio = va->a_uio;
1412 eofflag = va->a_eofflag;
1413 cookies = va->a_cookies;
1414 ncookies = va->a_ncookies;
1415
1416 /* This operation only makes sense on directory nodes. */
1417 if (vp->v_type != VDIR)
1418 return (ENOTDIR);
1419
1420 maxcookies = 0;
1421 node = VP_TO_TMPFS_DIR(vp);
1422 tm = VFS_TO_TMPFS(vp->v_mount);
1423
1424 startresid = uio->uio_resid;
1425
1426 /* Allocate cookies for NFS and compat modules. */
1427 if (cookies != NULL && ncookies != NULL) {
1428 maxcookies = howmany(node->tn_size,
1429 sizeof(struct tmpfs_dirent)) + 2;
1430 *cookies = malloc(maxcookies * sizeof(**cookies), M_TEMP,
1431 M_WAITOK);
1432 *ncookies = 0;
1433 }
1434
1435 if (cookies == NULL)
1436 error = tmpfs_dir_getdents(tm, node, uio, 0, NULL, NULL);
1437 else
1438 error = tmpfs_dir_getdents(tm, node, uio, maxcookies, *cookies,
1439 ncookies);
1440
1441 /* Buffer was filled without hitting EOF. */
1442 if (error == EJUSTRETURN)
1443 error = (uio->uio_resid != startresid) ? 0 : EINVAL;
1444
1445 if (error != 0 && cookies != NULL && ncookies != NULL) {
1446 free(*cookies, M_TEMP);
1447 *cookies = NULL;
1448 *ncookies = 0;
1449 }
1450
1451 if (eofflag != NULL)
1452 *eofflag =
1453 (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1454
1455 return (error);
1456 }
1457
1458 static int
1459 tmpfs_readlink(struct vop_readlink_args *v)
1460 {
1461 struct vnode *vp = v->a_vp;
1462 struct uio *uio = v->a_uio;
1463
1464 int error;
1465 struct tmpfs_node *node;
1466
1467 MPASS(uio->uio_offset == 0);
1468 MPASS(vp->v_type == VLNK);
1469
1470 node = VP_TO_TMPFS_NODE(vp);
1471
1472 error = uiomove(node->tn_link_target, MIN(node->tn_size, uio->uio_resid),
1473 uio);
1474 tmpfs_set_accessed(VFS_TO_TMPFS(vp->v_mount), node);
1475
1476 return (error);
1477 }
1478
1479 /*
1480 * VOP_FPLOOKUP_SYMLINK routines are subject to special circumstances, see
1481 * the comment above cache_fplookup for details.
1482 *
1483 * Check tmpfs_alloc_node for tmpfs-specific synchronisation notes.
1484 */
1485 static int
1486 tmpfs_fplookup_symlink(struct vop_fplookup_symlink_args *v)
1487 {
1488 struct vnode *vp;
1489 struct tmpfs_node *node;
1490 char *symlink;
1491
1492 vp = v->a_vp;
1493 node = VP_TO_TMPFS_NODE_SMR(vp);
1494 if (__predict_false(node == NULL))
1495 return (EAGAIN);
1496 if (!atomic_load_char(&node->tn_link_smr))
1497 return (EAGAIN);
1498 symlink = atomic_load_ptr(&node->tn_link_target);
1499 if (symlink == NULL)
1500 return (EAGAIN);
1501
1502 return (cache_symlink_resolve(v->a_fpl, symlink, node->tn_size));
1503 }
1504
1505 static int
1506 tmpfs_inactive(struct vop_inactive_args *v)
1507 {
1508 struct vnode *vp;
1509 struct tmpfs_node *node;
1510
1511 vp = v->a_vp;
1512 node = VP_TO_TMPFS_NODE(vp);
1513 if (node->tn_links == 0)
1514 vrecycle(vp);
1515 else
1516 tmpfs_check_mtime(vp);
1517 return (0);
1518 }
1519
1520 static int
1521 tmpfs_need_inactive(struct vop_need_inactive_args *ap)
1522 {
1523 struct vnode *vp;
1524 struct tmpfs_node *node;
1525 struct vm_object *obj;
1526
1527 vp = ap->a_vp;
1528 node = VP_TO_TMPFS_NODE(vp);
1529 if (node->tn_links == 0)
1530 goto need;
1531 if (vp->v_type == VREG) {
1532 obj = vp->v_object;
1533 if (obj->generation != obj->cleangeneration)
1534 goto need;
1535 }
1536 return (0);
1537 need:
1538 return (1);
1539 }
1540
1541 int
1542 tmpfs_reclaim(struct vop_reclaim_args *v)
1543 {
1544 struct vnode *vp;
1545 struct tmpfs_mount *tmp;
1546 struct tmpfs_node *node;
1547 bool unlock;
1548
1549 vp = v->a_vp;
1550 node = VP_TO_TMPFS_NODE(vp);
1551 tmp = VFS_TO_TMPFS(vp->v_mount);
1552
1553 if (vp->v_type == VREG)
1554 tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj);
1555 vp->v_object = NULL;
1556
1557 TMPFS_LOCK(tmp);
1558 TMPFS_NODE_LOCK(node);
1559 tmpfs_free_vp(vp);
1560
1561 /*
1562 * If the node referenced by this vnode was deleted by the user,
1563 * we must free its associated data structures (now that the vnode
1564 * is being reclaimed).
1565 */
1566 unlock = true;
1567 if (node->tn_links == 0 &&
1568 (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) {
1569 node->tn_vpstate = TMPFS_VNODE_DOOMED;
1570 unlock = !tmpfs_free_node_locked(tmp, node, true);
1571 }
1572
1573 if (unlock) {
1574 TMPFS_NODE_UNLOCK(node);
1575 TMPFS_UNLOCK(tmp);
1576 }
1577
1578 MPASS(vp->v_data == NULL);
1579 return (0);
1580 }
1581
1582 int
1583 tmpfs_print(struct vop_print_args *v)
1584 {
1585 struct vnode *vp = v->a_vp;
1586
1587 struct tmpfs_node *node;
1588
1589 node = VP_TO_TMPFS_NODE(vp);
1590
1591 printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %jd\n",
1592 node, node->tn_flags, (uintmax_t)node->tn_links);
1593 printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n",
1594 node->tn_mode, node->tn_uid, node->tn_gid,
1595 (intmax_t)node->tn_size, node->tn_status);
1596
1597 if (vp->v_type == VFIFO)
1598 fifo_printinfo(vp);
1599
1600 printf("\n");
1601
1602 return (0);
1603 }
1604
1605 int
1606 tmpfs_pathconf(struct vop_pathconf_args *v)
1607 {
1608 struct vnode *vp = v->a_vp;
1609 int name = v->a_name;
1610 long *retval = v->a_retval;
1611
1612 int error;
1613
1614 error = 0;
1615
1616 switch (name) {
1617 case _PC_LINK_MAX:
1618 *retval = TMPFS_LINK_MAX;
1619 break;
1620
1621 case _PC_SYMLINK_MAX:
1622 *retval = MAXPATHLEN;
1623 break;
1624
1625 case _PC_NAME_MAX:
1626 *retval = NAME_MAX;
1627 break;
1628
1629 case _PC_PIPE_BUF:
1630 if (vp->v_type == VDIR || vp->v_type == VFIFO)
1631 *retval = PIPE_BUF;
1632 else
1633 error = EINVAL;
1634 break;
1635
1636 case _PC_CHOWN_RESTRICTED:
1637 *retval = 1;
1638 break;
1639
1640 case _PC_NO_TRUNC:
1641 *retval = 1;
1642 break;
1643
1644 case _PC_SYNC_IO:
1645 *retval = 1;
1646 break;
1647
1648 case _PC_FILESIZEBITS:
1649 *retval = 64;
1650 break;
1651
1652 case _PC_MIN_HOLE_SIZE:
1653 *retval = PAGE_SIZE;
1654 break;
1655
1656 default:
1657 error = vop_stdpathconf(v);
1658 }
1659
1660 return (error);
1661 }
1662
1663 static int
1664 tmpfs_vptofh(struct vop_vptofh_args *ap)
1665 /*
1666 vop_vptofh {
1667 IN struct vnode *a_vp;
1668 IN struct fid *a_fhp;
1669 };
1670 */
1671 {
1672 struct tmpfs_fid_data tfd;
1673 struct tmpfs_node *node;
1674 struct fid *fhp;
1675
1676 node = VP_TO_TMPFS_NODE(ap->a_vp);
1677 fhp = ap->a_fhp;
1678 fhp->fid_len = sizeof(tfd);
1679
1680 /*
1681 * Copy into fid_data from the stack to avoid unaligned pointer use.
1682 * See the comment in sys/mount.h on struct fid for details.
1683 */
1684 tfd.tfd_id = node->tn_id;
1685 tfd.tfd_gen = node->tn_gen;
1686 memcpy(fhp->fid_data, &tfd, fhp->fid_len);
1687
1688 return (0);
1689 }
1690
1691 static int
1692 tmpfs_whiteout(struct vop_whiteout_args *ap)
1693 {
1694 struct vnode *dvp = ap->a_dvp;
1695 struct componentname *cnp = ap->a_cnp;
1696 struct tmpfs_dirent *de;
1697
1698 switch (ap->a_flags) {
1699 case LOOKUP:
1700 return (0);
1701 case CREATE:
1702 de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
1703 if (de != NULL)
1704 return (de->td_node == NULL ? 0 : EEXIST);
1705 return (tmpfs_dir_whiteout_add(dvp, cnp));
1706 case DELETE:
1707 tmpfs_dir_whiteout_remove(dvp, cnp);
1708 return (0);
1709 default:
1710 panic("tmpfs_whiteout: unknown op");
1711 }
1712 }
1713
1714 static int
1715 tmpfs_vptocnp_dir(struct tmpfs_node *tn, struct tmpfs_node *tnp,
1716 struct tmpfs_dirent **pde)
1717 {
1718 struct tmpfs_dir_cursor dc;
1719 struct tmpfs_dirent *de;
1720
1721 for (de = tmpfs_dir_first(tnp, &dc); de != NULL;
1722 de = tmpfs_dir_next(tnp, &dc)) {
1723 if (de->td_node == tn) {
1724 *pde = de;
1725 return (0);
1726 }
1727 }
1728 return (ENOENT);
1729 }
1730
1731 static int
1732 tmpfs_vptocnp_fill(struct vnode *vp, struct tmpfs_node *tn,
1733 struct tmpfs_node *tnp, char *buf, size_t *buflen, struct vnode **dvp)
1734 {
1735 struct tmpfs_dirent *de;
1736 int error, i;
1737
1738 error = vn_vget_ino_gen(vp, tmpfs_vn_get_ino_alloc, tnp, LK_SHARED,
1739 dvp);
1740 if (error != 0)
1741 return (error);
1742 error = tmpfs_vptocnp_dir(tn, tnp, &de);
1743 if (error == 0) {
1744 i = *buflen;
1745 i -= de->td_namelen;
1746 if (i < 0) {
1747 error = ENOMEM;
1748 } else {
1749 bcopy(de->ud.td_name, buf + i, de->td_namelen);
1750 *buflen = i;
1751 }
1752 }
1753 if (error == 0) {
1754 if (vp != *dvp)
1755 VOP_UNLOCK(*dvp);
1756 } else {
1757 if (vp != *dvp)
1758 vput(*dvp);
1759 else
1760 vrele(vp);
1761 }
1762 return (error);
1763 }
1764
1765 static int
1766 tmpfs_vptocnp(struct vop_vptocnp_args *ap)
1767 {
1768 struct vnode *vp, **dvp;
1769 struct tmpfs_node *tn, *tnp, *tnp1;
1770 struct tmpfs_dirent *de;
1771 struct tmpfs_mount *tm;
1772 char *buf;
1773 size_t *buflen;
1774 int error;
1775
1776 vp = ap->a_vp;
1777 dvp = ap->a_vpp;
1778 buf = ap->a_buf;
1779 buflen = ap->a_buflen;
1780
1781 tm = VFS_TO_TMPFS(vp->v_mount);
1782 tn = VP_TO_TMPFS_NODE(vp);
1783 if (tn->tn_type == VDIR) {
1784 tnp = tn->tn_dir.tn_parent;
1785 if (tnp == NULL)
1786 return (ENOENT);
1787 tmpfs_ref_node(tnp);
1788 error = tmpfs_vptocnp_fill(vp, tn, tn->tn_dir.tn_parent, buf,
1789 buflen, dvp);
1790 tmpfs_free_node(tm, tnp);
1791 return (error);
1792 }
1793 restart:
1794 TMPFS_LOCK(tm);
1795 restart_locked:
1796 LIST_FOREACH_SAFE(tnp, &tm->tm_nodes_used, tn_entries, tnp1) {
1797 if (tnp->tn_type != VDIR)
1798 continue;
1799 TMPFS_NODE_LOCK(tnp);
1800 tmpfs_ref_node(tnp);
1801
1802 /*
1803 * tn_vnode cannot be instantiated while we hold the
1804 * node lock, so the directory cannot be changed while
1805 * we iterate over it. Do this to avoid instantiating
1806 * vnode for directories which cannot point to our
1807 * node.
1808 */
1809 error = tnp->tn_vnode == NULL ? tmpfs_vptocnp_dir(tn, tnp,
1810 &de) : 0;
1811
1812 if (error == 0) {
1813 TMPFS_NODE_UNLOCK(tnp);
1814 TMPFS_UNLOCK(tm);
1815 error = tmpfs_vptocnp_fill(vp, tn, tnp, buf, buflen,
1816 dvp);
1817 if (error == 0) {
1818 tmpfs_free_node(tm, tnp);
1819 return (0);
1820 }
1821 if (VN_IS_DOOMED(vp)) {
1822 tmpfs_free_node(tm, tnp);
1823 return (ENOENT);
1824 }
1825 TMPFS_LOCK(tm);
1826 TMPFS_NODE_LOCK(tnp);
1827 }
1828 if (tmpfs_free_node_locked(tm, tnp, false)) {
1829 goto restart;
1830 } else {
1831 KASSERT(tnp->tn_refcount > 0,
1832 ("node %p refcount zero", tnp));
1833 if (tnp->tn_attached) {
1834 tnp1 = LIST_NEXT(tnp, tn_entries);
1835 TMPFS_NODE_UNLOCK(tnp);
1836 } else {
1837 TMPFS_NODE_UNLOCK(tnp);
1838 goto restart_locked;
1839 }
1840 }
1841 }
1842 TMPFS_UNLOCK(tm);
1843 return (ENOENT);
1844 }
1845
1846 static off_t
1847 tmpfs_seek_data_locked(vm_object_t obj, off_t noff)
1848 {
1849 vm_page_t m;
1850 vm_pindex_t p, p_m, p_swp;
1851
1852 p = OFF_TO_IDX(noff);
1853 m = vm_page_find_least(obj, p);
1854
1855 /*
1856 * Microoptimize the most common case for SEEK_DATA, where
1857 * there is no hole and the page is resident.
1858 */
1859 if (m != NULL && vm_page_any_valid(m) && m->pindex == p)
1860 return (noff);
1861
1862 p_swp = swap_pager_find_least(obj, p);
1863 if (p_swp == p)
1864 return (noff);
1865
1866 p_m = m == NULL ? obj->size : m->pindex;
1867 return (IDX_TO_OFF(MIN(p_m, p_swp)));
1868 }
1869
1870 static off_t
1871 tmpfs_seek_next(off_t noff)
1872 {
1873 return (noff + PAGE_SIZE - (noff & PAGE_MASK));
1874 }
1875
1876 static int
1877 tmpfs_seek_clamp(struct tmpfs_node *tn, off_t *noff, bool seekdata)
1878 {
1879 if (*noff < tn->tn_size)
1880 return (0);
1881 if (seekdata)
1882 return (ENXIO);
1883 *noff = tn->tn_size;
1884 return (0);
1885 }
1886
1887 static off_t
1888 tmpfs_seek_hole_locked(vm_object_t obj, off_t noff)
1889 {
1890 vm_page_t m;
1891 vm_pindex_t p, p_swp;
1892
1893 for (;; noff = tmpfs_seek_next(noff)) {
1894 /*
1895 * Walk over the largest sequential run of the valid pages.
1896 */
1897 for (m = vm_page_lookup(obj, OFF_TO_IDX(noff));
1898 m != NULL && vm_page_any_valid(m);
1899 m = vm_page_next(m), noff = tmpfs_seek_next(noff))
1900 ;
1901
1902 /*
1903 * Found a hole in the object's page queue. Check if
1904 * there is a hole in the swap at the same place.
1905 */
1906 p = OFF_TO_IDX(noff);
1907 p_swp = swap_pager_find_least(obj, p);
1908 if (p_swp != p) {
1909 noff = IDX_TO_OFF(p);
1910 break;
1911 }
1912 }
1913 return (noff);
1914 }
1915
1916 static int
1917 tmpfs_seek_datahole(struct vnode *vp, off_t *off, bool seekdata)
1918 {
1919 struct tmpfs_node *tn;
1920 vm_object_t obj;
1921 off_t noff;
1922 int error;
1923
1924 if (vp->v_type != VREG)
1925 return (ENOTTY);
1926 tn = VP_TO_TMPFS_NODE(vp);
1927 noff = *off;
1928 if (noff < 0)
1929 return (ENXIO);
1930 error = tmpfs_seek_clamp(tn, &noff, seekdata);
1931 if (error != 0)
1932 return (error);
1933 obj = tn->tn_reg.tn_aobj;
1934
1935 VM_OBJECT_RLOCK(obj);
1936 noff = seekdata ? tmpfs_seek_data_locked(obj, noff) :
1937 tmpfs_seek_hole_locked(obj, noff);
1938 VM_OBJECT_RUNLOCK(obj);
1939
1940 error = tmpfs_seek_clamp(tn, &noff, seekdata);
1941 if (error == 0)
1942 *off = noff;
1943 return (error);
1944 }
1945
1946 static int
1947 tmpfs_ioctl(struct vop_ioctl_args *ap)
1948 {
1949 struct vnode *vp = ap->a_vp;
1950 int error = 0;
1951
1952 switch (ap->a_command) {
1953 case FIOSEEKDATA:
1954 case FIOSEEKHOLE:
1955 error = vn_lock(vp, LK_SHARED);
1956 if (error != 0) {
1957 error = EBADF;
1958 break;
1959 }
1960 error = tmpfs_seek_datahole(vp, (off_t *)ap->a_data,
1961 ap->a_command == FIOSEEKDATA);
1962 VOP_UNLOCK(vp);
1963 break;
1964 default:
1965 error = ENOTTY;
1966 break;
1967 }
1968 return (error);
1969 }
1970
1971 /*
1972 * Vnode operations vector used for files stored in a tmpfs file system.
1973 */
1974 struct vop_vector tmpfs_vnodeop_entries = {
1975 .vop_default = &default_vnodeops,
1976 .vop_lookup = vfs_cache_lookup,
1977 .vop_cachedlookup = tmpfs_cached_lookup,
1978 .vop_create = tmpfs_create,
1979 .vop_mknod = tmpfs_mknod,
1980 .vop_open = tmpfs_open,
1981 .vop_close = tmpfs_close,
1982 .vop_fplookup_vexec = tmpfs_fplookup_vexec,
1983 .vop_fplookup_symlink = tmpfs_fplookup_symlink,
1984 .vop_access = tmpfs_access,
1985 .vop_stat = tmpfs_stat,
1986 .vop_getattr = tmpfs_getattr,
1987 .vop_setattr = tmpfs_setattr,
1988 .vop_read = tmpfs_read,
1989 .vop_read_pgcache = tmpfs_read_pgcache,
1990 .vop_write = tmpfs_write,
1991 .vop_deallocate = tmpfs_deallocate,
1992 .vop_fsync = tmpfs_fsync,
1993 .vop_remove = tmpfs_remove,
1994 .vop_link = tmpfs_link,
1995 .vop_rename = tmpfs_rename,
1996 .vop_mkdir = tmpfs_mkdir,
1997 .vop_rmdir = tmpfs_rmdir,
1998 .vop_symlink = tmpfs_symlink,
1999 .vop_readdir = tmpfs_readdir,
2000 .vop_readlink = tmpfs_readlink,
2001 .vop_inactive = tmpfs_inactive,
2002 .vop_need_inactive = tmpfs_need_inactive,
2003 .vop_reclaim = tmpfs_reclaim,
2004 .vop_print = tmpfs_print,
2005 .vop_pathconf = tmpfs_pathconf,
2006 .vop_vptofh = tmpfs_vptofh,
2007 .vop_whiteout = tmpfs_whiteout,
2008 .vop_bmap = VOP_EOPNOTSUPP,
2009 .vop_vptocnp = tmpfs_vptocnp,
2010 .vop_lock1 = vop_lock,
2011 .vop_unlock = vop_unlock,
2012 .vop_islocked = vop_islocked,
2013 .vop_add_writecount = vop_stdadd_writecount_nomsync,
2014 .vop_ioctl = tmpfs_ioctl,
2015 };
2016 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_entries);
2017
2018 /*
2019 * Same vector for mounts which do not use namecache.
2020 */
2021 struct vop_vector tmpfs_vnodeop_nonc_entries = {
2022 .vop_default = &tmpfs_vnodeop_entries,
2023 .vop_lookup = tmpfs_lookup,
2024 };
2025 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_nonc_entries);
Cache object: 869cf35c30d89adeaf3bf5539a0d1599
|