1 /* $NetBSD: vfs_syscalls.c,v 1.279.2.6 2011/03/20 20:36:56 bouyer Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.279.2.6 2011/03/20 20:36:56 bouyer Exp $");
41
42 #include "opt_compat_netbsd.h"
43 #include "opt_compat_43.h"
44 #include "opt_fileassoc.h"
45 #include "opt_ktrace.h"
46 #include "fss.h"
47 #include "veriexec.h"
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/namei.h>
52 #include <sys/filedesc.h>
53 #include <sys/kernel.h>
54 #include <sys/file.h>
55 #include <sys/stat.h>
56 #include <sys/vnode.h>
57 #include <sys/mount.h>
58 #include <sys/proc.h>
59 #include <sys/uio.h>
60 #include <sys/malloc.h>
61 #include <sys/kmem.h>
62 #include <sys/dirent.h>
63 #include <sys/sysctl.h>
64 #include <sys/sa.h>
65 #include <sys/syscallargs.h>
66 #ifdef KTRACE
67 #include <sys/ktrace.h>
68 #endif
69 #ifdef FILEASSOC
70 #include <sys/fileassoc.h>
71 #endif /* FILEASSOC */
72 #if NVERIEXEC > 0
73 #include <sys/verified_exec.h>
74 #include <sys/syslog.h>
75 #endif /* NVERIEXEC > 0 */
76 #include <sys/kauth.h>
77
78 #include <miscfs/genfs/genfs.h>
79 #include <miscfs/syncfs/syncfs.h>
80
81 #ifdef COMPAT_30
82 #include "opt_nfsserver.h"
83 #include <nfs/rpcv2.h>
84 #endif
85 #include <nfs/nfsproto.h>
86 #ifdef COMPAT_30
87 #include <nfs/nfs.h>
88 #include <nfs/nfs_var.h>
89 #endif
90
91 #if NFSS > 0
92 #include <dev/fssvar.h>
93 #endif
94
95 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
96
97 static int change_dir(struct nameidata *, struct lwp *);
98 static int change_flags(struct vnode *, u_long, struct lwp *);
99 static int change_mode(struct vnode *, int, struct lwp *l);
100 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
101 static int change_utimes(struct vnode *vp, const struct timeval *,
102 struct lwp *l);
103 static int rename_files(const char *, const char *, struct lwp *, int);
104
105 void checkdirs(struct vnode *);
106
107 int dovfsusermount = 0;
108
109 /*
110 * Virtual File System System Calls
111 */
112
113 /*
114 * Mount a file system.
115 */
116
117 #if defined(COMPAT_09) || defined(COMPAT_43)
118 /*
119 * This table is used to maintain compatibility with 4.3BSD
120 * and NetBSD 0.9 mount syscalls. Note, the order is important!
121 *
122 * Do not modify this table. It should only contain filesystems
123 * supported by NetBSD 0.9 and 4.3BSD.
124 */
125 const char * const mountcompatnames[] = {
126 NULL, /* 0 = MOUNT_NONE */
127 MOUNT_FFS, /* 1 = MOUNT_UFS */
128 MOUNT_NFS, /* 2 */
129 MOUNT_MFS, /* 3 */
130 MOUNT_MSDOS, /* 4 */
131 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
132 MOUNT_FDESC, /* 6 */
133 MOUNT_KERNFS, /* 7 */
134 NULL, /* 8 = MOUNT_DEVFS */
135 MOUNT_AFS, /* 9 */
136 };
137 const int nmountcompatnames = sizeof(mountcompatnames) /
138 sizeof(mountcompatnames[0]);
139 #endif /* COMPAT_09 || COMPAT_43 */
140
141 /* ARGSUSED */
142 int
143 sys_mount(struct lwp *l, void *v, register_t *retval)
144 {
145 struct sys_mount_args /* {
146 syscallarg(const char *) type;
147 syscallarg(const char *) path;
148 syscallarg(int) flags;
149 syscallarg(void *) data;
150 } */ *uap = v;
151 struct vnode *vp;
152 struct mount *mp;
153 int error, flag = 0;
154 char fstypename[MFSNAMELEN];
155 struct vattr va;
156 struct nameidata nd;
157 struct vfsops *vfs;
158
159 /*
160 * if MNT_GETARGS is specified, it should be only flag.
161 */
162
163 if ((SCARG(uap, flags) & MNT_GETARGS) != 0 &&
164 (SCARG(uap, flags) & ~MNT_GETARGS) != 0) {
165 return EINVAL;
166 }
167
168 if (dovfsusermount == 0 && (SCARG(uap, flags) & MNT_GETARGS) == 0 &&
169 (error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
170 &l->l_acflag)))
171 return (error);
172 /*
173 * Get vnode to be covered
174 */
175 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
176 SCARG(uap, path), l);
177 if ((error = namei(&nd)) != 0)
178 return (error);
179 vp = nd.ni_vp;
180 /*
181 * A lookup in VFS_MOUNT might result in an attempt to
182 * lock this vnode again, so make the lock recursive.
183 */
184 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
185 if (SCARG(uap, flags) & (MNT_UPDATE | MNT_GETARGS)) {
186 if ((vp->v_flag & VROOT) == 0) {
187 vput(vp);
188 return (EINVAL);
189 }
190 mp = vp->v_mount;
191 flag = mp->mnt_flag;
192 vfs = mp->mnt_op;
193 /*
194 * We only allow the filesystem to be reloaded if it
195 * is currently mounted read-only.
196 */
197 if ((SCARG(uap, flags) & MNT_RELOAD) &&
198 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
199 vput(vp);
200 return (EOPNOTSUPP); /* Needs translation */
201 }
202 /*
203 * In "highly secure" mode, don't let the caller do anything
204 * but downgrade a filesystem from read-write to read-only.
205 * (see also below; MNT_UPDATE or MNT_GETARGS is required.)
206 */
207 if (securelevel >= 2 &&
208 SCARG(uap, flags) != MNT_GETARGS &&
209 SCARG(uap, flags) !=
210 (mp->mnt_flag | MNT_RDONLY |
211 MNT_RELOAD | MNT_FORCE | MNT_UPDATE)) {
212 vput(vp);
213 return (EPERM);
214 }
215 mp->mnt_flag |= SCARG(uap, flags) &
216 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
217 /*
218 * Only root, or the user that did the original mount is
219 * permitted to update it.
220 */
221 if ((mp->mnt_flag & MNT_GETARGS) == 0 &&
222 mp->mnt_stat.f_owner != kauth_cred_geteuid(l->l_cred) &&
223 (error = kauth_authorize_generic(l->l_cred,
224 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0) {
225 vput(vp);
226 return (error);
227 }
228 /*
229 * Do not allow NFS export by non-root users. For non-root
230 * users, silently enforce MNT_NOSUID and MNT_NODEV, and
231 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
232 */
233 if (kauth_cred_geteuid(l->l_cred) != 0) {
234 if (SCARG(uap, flags) & MNT_EXPORTED) {
235 vput(vp);
236 return (EPERM);
237 }
238 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
239 if (flag & MNT_NOEXEC)
240 SCARG(uap, flags) |= MNT_NOEXEC;
241 }
242 if (vfs_busy(mp, LK_NOWAIT, 0)) {
243 vput(vp);
244 return (EPERM);
245 }
246 goto update;
247 } else {
248 if (securelevel >= 2) {
249 vput(vp);
250 return (EPERM);
251 }
252 }
253 /*
254 * If the user is not root, ensure that they own the directory
255 * onto which we are attempting to mount.
256 */
257 if ((error = VOP_GETATTR(vp, &va, l->l_cred, l)) != 0 ||
258 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
259 (error = kauth_authorize_generic(l->l_cred,
260 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0)) {
261 vput(vp);
262 return (error);
263 }
264 /*
265 * Do not allow NFS export by non-root users. For non-root users,
266 * silently enforce MNT_NOSUID and MNT_NODEV, and MNT_NOEXEC if the
267 * mount point is already MNT_NOEXEC.
268 */
269 if (kauth_cred_geteuid(l->l_cred) != 0) {
270 if (SCARG(uap, flags) & MNT_EXPORTED) {
271 vput(vp);
272 return (EPERM);
273 }
274 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
275 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
276 SCARG(uap, flags) |= MNT_NOEXEC;
277 }
278 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) {
279 vput(vp);
280 return (error);
281 }
282 if (vp->v_type != VDIR) {
283 vput(vp);
284 return (ENOTDIR);
285 }
286 error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
287 if (error) {
288 #if defined(COMPAT_09) || defined(COMPAT_43)
289 /*
290 * Historically, filesystem types were identified by numbers.
291 * If we get an integer for the filesystem type instead of a
292 * string, we check to see if it matches one of the historic
293 * filesystem types.
294 */
295 u_long fsindex = (u_long)SCARG(uap, type);
296 if (fsindex >= nmountcompatnames ||
297 mountcompatnames[fsindex] == NULL) {
298 vput(vp);
299 return (ENODEV);
300 }
301 strncpy(fstypename, mountcompatnames[fsindex], MFSNAMELEN);
302 #else
303 vput(vp);
304 return (error);
305 #endif
306 }
307 #ifdef COMPAT_10
308 /* Accept `ufs' as an alias for `ffs'. */
309 if (!strncmp(fstypename, "ufs", MFSNAMELEN))
310 strncpy(fstypename, "ffs", MFSNAMELEN);
311 #endif
312 if ((vfs = vfs_getopsbyname(fstypename)) == NULL) {
313 vput(vp);
314 return (ENODEV);
315 }
316 if (vp->v_mountedhere != NULL) {
317 vput(vp);
318 return (EBUSY);
319 }
320
321 /*
322 * Allocate and initialize the file system.
323 */
324 mp = (struct mount *)malloc((u_long)sizeof(struct mount),
325 M_MOUNT, M_WAITOK);
326 memset((char *)mp, 0, (u_long)sizeof(struct mount));
327 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
328 simple_lock_init(&mp->mnt_slock);
329 (void)vfs_busy(mp, LK_NOWAIT, 0);
330 mp->mnt_op = vfs;
331 vfs->vfs_refcount++;
332 mp->mnt_vnodecovered = vp;
333 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
334 mp->mnt_unmounter = NULL;
335 mp->mnt_leaf = mp;
336 mount_initspecific(mp);
337
338 /*
339 * The underlying file system may refuse the mount for
340 * various reasons. Allow the user to force it to happen.
341 */
342 mp->mnt_flag |= SCARG(uap, flags) & MNT_FORCE;
343 update:
344 if ((SCARG(uap, flags) & MNT_GETARGS) == 0) {
345 /*
346 * Set the mount level flags.
347 */
348 if (SCARG(uap, flags) & MNT_RDONLY)
349 mp->mnt_flag |= MNT_RDONLY;
350 else if (mp->mnt_flag & MNT_RDONLY)
351 mp->mnt_iflag |= IMNT_WANTRDWR;
352 mp->mnt_flag &=
353 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
354 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
355 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
356 mp->mnt_flag |= SCARG(uap, flags) &
357 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
358 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
359 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
360 MNT_IGNORE);
361 }
362 /*
363 * Mount the filesystem.
364 */
365 error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, l);
366 if (mp->mnt_flag & (MNT_UPDATE | MNT_GETARGS)) {
367 VOP_UNLOCK(vp, 0);
368 #if defined(COMPAT_30) && defined(NFSSERVER)
369 if (mp->mnt_flag & MNT_UPDATE && error != 0) {
370 int error2;
371
372 /* Update failed; let's try and see if it was an
373 * export request. */
374 error2 = nfs_update_exports_30(mp, SCARG(uap, path),
375 SCARG(uap, data), l);
376
377 /* Only update error code if the export request was
378 * understood but some problem occurred while
379 * processing it. */
380 if (error2 != EJUSTRETURN)
381 error = error2;
382 }
383 #endif
384 if (mp->mnt_iflag & IMNT_WANTRDWR)
385 mp->mnt_flag &= ~MNT_RDONLY;
386 if (error)
387 mp->mnt_flag = flag;
388 mp->mnt_flag &=~
389 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
390 mp->mnt_iflag &=~ IMNT_WANTRDWR;
391 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
392 if (mp->mnt_syncer == NULL)
393 error = vfs_allocate_syncvnode(mp);
394 } else {
395 if (mp->mnt_syncer != NULL)
396 vfs_deallocate_syncvnode(mp);
397 }
398 vfs_unbusy(mp);
399 vrele(vp);
400 return (error);
401 }
402 /*
403 * Put the new filesystem on the mount list after root.
404 */
405 cache_purge(vp);
406 if (!error) {
407 mp->mnt_flag &=~
408 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
409 mp->mnt_iflag &=~ IMNT_WANTRDWR;
410 vp->v_mountedhere = mp;
411 simple_lock(&mountlist_slock);
412 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
413 simple_unlock(&mountlist_slock);
414 VOP_UNLOCK(vp, 0);
415 checkdirs(vp);
416 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
417 error = vfs_allocate_syncvnode(mp);
418 vfs_unbusy(mp);
419 (void) VFS_STATVFS(mp, &mp->mnt_stat, l);
420 if ((error = VFS_START(mp, 0, l)))
421 vrele(vp);
422 } else {
423 vp->v_mountedhere = (struct mount *)0;
424 vfs->vfs_refcount--;
425 vfs_unbusy(mp);
426 free(mp, M_MOUNT);
427 vput(vp);
428 }
429 return (error);
430 }
431
432 /*
433 * Scan all active processes to see if any of them have a current
434 * or root directory onto which the new filesystem has just been
435 * mounted. If so, replace them with the new mount point.
436 */
437 void
438 checkdirs(struct vnode *olddp)
439 {
440 struct cwdinfo *cwdi;
441 struct vnode *newdp;
442 struct proc *p;
443
444 if (olddp->v_usecount == 1)
445 return;
446 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
447 panic("mount: lost mount");
448 proclist_lock_read();
449 PROCLIST_FOREACH(p, &allproc) {
450 cwdi = p->p_cwdi;
451 if (!cwdi)
452 continue;
453 if (cwdi->cwdi_cdir == olddp) {
454 vrele(cwdi->cwdi_cdir);
455 VREF(newdp);
456 cwdi->cwdi_cdir = newdp;
457 }
458 if (cwdi->cwdi_rdir == olddp) {
459 vrele(cwdi->cwdi_rdir);
460 VREF(newdp);
461 cwdi->cwdi_rdir = newdp;
462 }
463 }
464 proclist_unlock_read();
465 if (rootvnode == olddp) {
466 vrele(rootvnode);
467 VREF(newdp);
468 rootvnode = newdp;
469 }
470 vput(newdp);
471 }
472
473 /*
474 * Unmount a file system.
475 *
476 * Note: unmount takes a path to the vnode mounted on as argument,
477 * not special file (as before).
478 */
479 /* ARGSUSED */
480 int
481 sys_unmount(struct lwp *l, void *v, register_t *retval)
482 {
483 struct sys_unmount_args /* {
484 syscallarg(const char *) path;
485 syscallarg(int) flags;
486 } */ *uap = v;
487 struct vnode *vp;
488 struct mount *mp;
489 int error;
490 struct nameidata nd;
491
492 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
493 SCARG(uap, path), l);
494 if ((error = namei(&nd)) != 0)
495 return (error);
496 vp = nd.ni_vp;
497 mp = vp->v_mount;
498
499 /*
500 * Only root, or the user that did the original mount is
501 * permitted to unmount this filesystem.
502 */
503 if ((mp->mnt_stat.f_owner != kauth_cred_geteuid(l->l_cred)) &&
504 (error = kauth_authorize_generic(l->l_cred,
505 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0) {
506 vput(vp);
507 return (error);
508 }
509
510 /*
511 * Don't allow unmounting the root file system.
512 */
513 if (mp->mnt_flag & MNT_ROOTFS) {
514 vput(vp);
515 return (EINVAL);
516 }
517
518 /*
519 * Must be the root of the filesystem
520 */
521 if ((vp->v_flag & VROOT) == 0) {
522 vput(vp);
523 return (EINVAL);
524 }
525 vput(vp);
526
527 /*
528 * XXX Freeze syncer. Must do this before locking the
529 * mount point. See dounmount() for details.
530 */
531 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
532
533 if (vfs_busy(mp, 0, 0)) {
534 lockmgr(&syncer_lock, LK_RELEASE, NULL);
535 return (EBUSY);
536 }
537
538 return (dounmount(mp, SCARG(uap, flags), l));
539 }
540
541 /*
542 * Do the actual file system unmount. File system is assumed to have been
543 * marked busy by the caller.
544 */
545 int
546 dounmount(struct mount *mp, int flags, struct lwp *l)
547 {
548 struct vnode *coveredvp;
549 int error;
550 int async;
551 int used_syncer;
552
553 #if NVERIEXEC > 0
554 error = veriexec_unmountchk(mp);
555 if (error)
556 return (error);
557 #endif /* NVERIEXEC > 0 */
558
559 #ifdef FILEASSOC
560 (void)fileassoc_table_delete(mp);
561 #endif /* FILEASSOC */
562
563 simple_lock(&mountlist_slock);
564 vfs_unbusy(mp);
565 used_syncer = (mp->mnt_syncer != NULL);
566
567 /*
568 * XXX Syncer must be frozen when we get here. This should really
569 * be done on a per-mountpoint basis, but especially the softdep
570 * code possibly called from the syncer doesn't exactly work on a
571 * per-mountpoint basis, so the softdep code would become a maze
572 * of vfs_busy() calls.
573 *
574 * The caller of dounmount() must acquire syncer_lock because
575 * the syncer itself acquires locks in syncer_lock -> vfs_busy
576 * order, and we must preserve that order to avoid deadlock.
577 *
578 * So, if the file system did not use the syncer, now is
579 * the time to release the syncer_lock.
580 */
581 if (used_syncer == 0)
582 lockmgr(&syncer_lock, LK_RELEASE, NULL);
583
584 mp->mnt_iflag |= IMNT_UNMOUNT;
585 mp->mnt_unmounter = l;
586 lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
587 vn_start_write(NULL, &mp, V_WAIT);
588
589 async = mp->mnt_flag & MNT_ASYNC;
590 mp->mnt_flag &= ~MNT_ASYNC;
591 cache_purgevfs(mp); /* remove cache entries for this file sys */
592 if (mp->mnt_syncer != NULL)
593 vfs_deallocate_syncvnode(mp);
594 error = 0;
595 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
596 #if NFSS > 0
597 error = fss_umount_hook(mp, (flags & MNT_FORCE));
598 #endif
599 if (error == 0)
600 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l);
601 }
602 if (error == 0 || (flags & MNT_FORCE))
603 error = VFS_UNMOUNT(mp, flags, l);
604 vn_finished_write(mp, 0);
605 simple_lock(&mountlist_slock);
606 if (error) {
607 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
608 (void) vfs_allocate_syncvnode(mp);
609 mp->mnt_iflag &= ~IMNT_UNMOUNT;
610 mp->mnt_unmounter = NULL;
611 mp->mnt_flag |= async;
612 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
613 &mountlist_slock);
614 if (used_syncer)
615 lockmgr(&syncer_lock, LK_RELEASE, NULL);
616 simple_lock(&mp->mnt_slock);
617 while (mp->mnt_wcnt > 0) {
618 wakeup(mp);
619 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
620 0, &mp->mnt_slock);
621 }
622 simple_unlock(&mp->mnt_slock);
623 return (error);
624 }
625 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
626 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
627 coveredvp->v_mountedhere = NULL;
628 vrele(coveredvp);
629 }
630 mp->mnt_op->vfs_refcount--;
631 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
632 panic("unmount: dangling vnode");
633 mp->mnt_iflag |= IMNT_GONE;
634 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
635 mount_finispecific(mp);
636 if (used_syncer)
637 lockmgr(&syncer_lock, LK_RELEASE, NULL);
638 simple_lock(&mp->mnt_slock);
639 while (mp->mnt_wcnt > 0) {
640 wakeup(mp);
641 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock);
642 }
643 simple_unlock(&mp->mnt_slock);
644 vfs_hooks_unmount(mp);
645 free(mp, M_MOUNT);
646 return (0);
647 }
648
649 /*
650 * Sync each mounted filesystem.
651 */
652 #ifdef DEBUG
653 int syncprt = 0;
654 struct ctldebug debug0 = { "syncprt", &syncprt };
655 #endif
656
657 /* ARGSUSED */
658 int
659 sys_sync(struct lwp *l, void *v, register_t *retval)
660 {
661 struct mount *mp, *nmp;
662 int asyncflag;
663
664 if (l == NULL)
665 l = &lwp0;
666
667 simple_lock(&mountlist_slock);
668 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
669 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
670 nmp = mp->mnt_list.cqe_prev;
671 continue;
672 }
673 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
674 vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
675 asyncflag = mp->mnt_flag & MNT_ASYNC;
676 mp->mnt_flag &= ~MNT_ASYNC;
677 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred, l);
678 if (asyncflag)
679 mp->mnt_flag |= MNT_ASYNC;
680 vn_finished_write(mp, 0);
681 }
682 simple_lock(&mountlist_slock);
683 nmp = mp->mnt_list.cqe_prev;
684 vfs_unbusy(mp);
685
686 }
687 simple_unlock(&mountlist_slock);
688 #ifdef DEBUG
689 if (syncprt)
690 vfs_bufstats();
691 #endif /* DEBUG */
692 return (0);
693 }
694
695 /*
696 * Change filesystem quotas.
697 */
698 /* ARGSUSED */
699 int
700 sys_quotactl(struct lwp *l, void *v, register_t *retval)
701 {
702 struct sys_quotactl_args /* {
703 syscallarg(const char *) path;
704 syscallarg(int) cmd;
705 syscallarg(int) uid;
706 syscallarg(void *) arg;
707 } */ *uap = v;
708 struct mount *mp;
709 int error;
710 struct nameidata nd;
711
712 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
713 if ((error = namei(&nd)) != 0)
714 return (error);
715 error = vn_start_write(nd.ni_vp, &mp, V_WAIT | V_PCATCH);
716 vrele(nd.ni_vp);
717 if (error)
718 return (error);
719 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
720 SCARG(uap, arg), l);
721 vn_finished_write(mp, 0);
722 return (error);
723 }
724
725 int
726 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
727 int root)
728 {
729 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
730 int error = 0;
731
732 /*
733 * If MNT_NOWAIT or MNT_LAZY is specified, do not
734 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
735 * overrides MNT_NOWAIT.
736 */
737 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
738 (flags != MNT_WAIT && flags != 0)) {
739 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
740 goto done;
741 }
742
743 /* Get the filesystem stats now */
744 memset(sp, 0, sizeof(*sp));
745 if ((error = VFS_STATVFS(mp, sp, l)) != 0) {
746 return error;
747 }
748
749 if (cwdi->cwdi_rdir == NULL)
750 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
751 done:
752 if (cwdi->cwdi_rdir != NULL) {
753 size_t len;
754 char *bp;
755 char *path = PNBUF_GET();
756 if (!path)
757 return ENOMEM;
758
759 bp = path + MAXPATHLEN;
760 *--bp = '\0';
761 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
762 MAXPATHLEN / 2, 0, l);
763 if (error) {
764 PNBUF_PUT(path);
765 return error;
766 }
767 len = strlen(bp);
768 /*
769 * for mount points that are below our root, we can see
770 * them, so we fix up the pathname and return them. The
771 * rest we cannot see, so we don't allow viewing the
772 * data.
773 */
774 if (strncmp(bp, sp->f_mntonname, len) == 0) {
775 strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
776 sizeof(sp->f_mntonname));
777 if (sp->f_mntonname[0] == '\0')
778 (void)strlcpy(sp->f_mntonname, "/",
779 sizeof(sp->f_mntonname));
780 } else {
781 if (root)
782 (void)strlcpy(sp->f_mntonname, "/",
783 sizeof(sp->f_mntonname));
784 else
785 error = EPERM;
786 }
787 PNBUF_PUT(path);
788 }
789 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
790 return error;
791 }
792
793 /*
794 * Get filesystem statistics.
795 */
796 /* ARGSUSED */
797 int
798 sys_statvfs1(struct lwp *l, void *v, register_t *retval)
799 {
800 struct sys_statvfs1_args /* {
801 syscallarg(const char *) path;
802 syscallarg(struct statvfs *) buf;
803 syscallarg(int) flags;
804 } */ *uap = v;
805 struct mount *mp;
806 struct statvfs *sb;
807 int error;
808 struct nameidata nd;
809
810 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
811 if ((error = namei(&nd)) != 0)
812 return error;
813 mp = nd.ni_vp->v_mount;
814 vrele(nd.ni_vp);
815 sb = STATVFSBUF_GET();
816 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1);
817 if (error == 0) {
818 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
819 }
820 STATVFSBUF_PUT(sb);
821 return error;
822 }
823
824 /*
825 * Get filesystem statistics.
826 */
827 /* ARGSUSED */
828 int
829 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval)
830 {
831 struct sys_fstatvfs1_args /* {
832 syscallarg(int) fd;
833 syscallarg(struct statvfs *) buf;
834 syscallarg(int) flags;
835 } */ *uap = v;
836 struct proc *p = l->l_proc;
837 struct file *fp;
838 struct mount *mp;
839 struct statvfs *sb;
840 int error;
841
842 /* getvnode() will use the descriptor for us */
843 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
844 return (error);
845 mp = ((struct vnode *)fp->f_data)->v_mount;
846 sb = STATVFSBUF_GET();
847 if ((error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1)) != 0)
848 goto out;
849 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
850 out:
851 FILE_UNUSE(fp, l);
852 STATVFSBUF_PUT(sb);
853 return error;
854 }
855
856
857 /*
858 * Get statistics on all filesystems.
859 */
860 int
861 sys_getvfsstat(struct lwp *l, void *v, register_t *retval)
862 {
863 struct sys_getvfsstat_args /* {
864 syscallarg(struct statvfs *) buf;
865 syscallarg(size_t) bufsize;
866 syscallarg(int) flags;
867 } */ *uap = v;
868 int root = 0;
869 struct proc *p = l->l_proc;
870 struct mount *mp, *nmp;
871 struct statvfs *sb;
872 struct statvfs *sfsp;
873 size_t count, maxcount;
874 int error = 0;
875
876 sb = STATVFSBUF_GET();
877 maxcount = SCARG(uap, bufsize) / sizeof(struct statvfs);
878 sfsp = SCARG(uap, buf);
879 simple_lock(&mountlist_slock);
880 count = 0;
881 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
882 mp = nmp) {
883 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
884 nmp = CIRCLEQ_NEXT(mp, mnt_list);
885 continue;
886 }
887 if (sfsp && count < maxcount) {
888 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 0);
889 if (error) {
890 simple_lock(&mountlist_slock);
891 nmp = CIRCLEQ_NEXT(mp, mnt_list);
892 vfs_unbusy(mp);
893 continue;
894 }
895 error = copyout(sb, sfsp, sizeof(*sfsp));
896 if (error) {
897 vfs_unbusy(mp);
898 goto out;
899 }
900 sfsp++;
901 root |= strcmp(sb->f_mntonname, "/") == 0;
902 }
903 count++;
904 simple_lock(&mountlist_slock);
905 nmp = CIRCLEQ_NEXT(mp, mnt_list);
906 vfs_unbusy(mp);
907 }
908 simple_unlock(&mountlist_slock);
909 if (root == 0 && p->p_cwdi->cwdi_rdir) {
910 /*
911 * fake a root entry
912 */
913 if ((error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, sb, l,
914 SCARG(uap, flags), 1)) != 0)
915 goto out;
916 if (sfsp)
917 error = copyout(sb, sfsp, sizeof(*sfsp));
918 count++;
919 }
920 if (sfsp && count > maxcount)
921 *retval = maxcount;
922 else
923 *retval = count;
924 out:
925 STATVFSBUF_PUT(sb);
926 return error;
927 }
928
929 /*
930 * Change current working directory to a given file descriptor.
931 */
932 /* ARGSUSED */
933 int
934 sys_fchdir(struct lwp *l, void *v, register_t *retval)
935 {
936 struct sys_fchdir_args /* {
937 syscallarg(int) fd;
938 } */ *uap = v;
939 struct proc *p = l->l_proc;
940 struct filedesc *fdp = p->p_fd;
941 struct cwdinfo *cwdi = p->p_cwdi;
942 struct vnode *vp, *tdp;
943 struct mount *mp;
944 struct file *fp;
945 int error;
946
947 /* getvnode() will use the descriptor for us */
948 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
949 return (error);
950 vp = (struct vnode *)fp->f_data;
951
952 VREF(vp);
953 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
954 if (vp->v_type != VDIR)
955 error = ENOTDIR;
956 else
957 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
958 if (error) {
959 vput(vp);
960 goto out;
961 }
962 while (!error && (mp = vp->v_mountedhere) != NULL) {
963 if (vfs_busy(mp, 0, 0))
964 continue;
965
966 vput(vp);
967 error = VFS_ROOT(mp, &tdp);
968 vfs_unbusy(mp);
969 if (error)
970 goto out;
971 vp = tdp;
972 }
973 VOP_UNLOCK(vp, 0);
974
975 /*
976 * Disallow changing to a directory not under the process's
977 * current root directory (if there is one).
978 */
979 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
980 vrele(vp);
981 error = EPERM; /* operation not permitted */
982 goto out;
983 }
984
985 vrele(cwdi->cwdi_cdir);
986 cwdi->cwdi_cdir = vp;
987 out:
988 FILE_UNUSE(fp, l);
989 return (error);
990 }
991
992 /*
993 * Change this process's notion of the root directory to a given file
994 * descriptor.
995 */
996 int
997 sys_fchroot(struct lwp *l, void *v, register_t *retval)
998 {
999 struct sys_fchroot_args *uap = v;
1000 struct proc *p = l->l_proc;
1001 struct filedesc *fdp = p->p_fd;
1002 struct cwdinfo *cwdi = p->p_cwdi;
1003 struct vnode *vp;
1004 struct file *fp;
1005 int error;
1006
1007 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1008 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1009 return error;
1010 /* getvnode() will use the descriptor for us */
1011 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1012 return error;
1013 vp = (struct vnode *) fp->f_data;
1014 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1015 if (vp->v_type != VDIR)
1016 error = ENOTDIR;
1017 else
1018 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1019 VOP_UNLOCK(vp, 0);
1020 if (error)
1021 goto out;
1022 VREF(vp);
1023
1024 /*
1025 * Prevent escaping from chroot by putting the root under
1026 * the working directory. Silently chdir to / if we aren't
1027 * already there.
1028 */
1029 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1030 /*
1031 * XXX would be more failsafe to change directory to a
1032 * deadfs node here instead
1033 */
1034 vrele(cwdi->cwdi_cdir);
1035 VREF(vp);
1036 cwdi->cwdi_cdir = vp;
1037 }
1038
1039 if (cwdi->cwdi_rdir != NULL)
1040 vrele(cwdi->cwdi_rdir);
1041 cwdi->cwdi_rdir = vp;
1042 out:
1043 FILE_UNUSE(fp, l);
1044 return (error);
1045 }
1046
1047 /*
1048 * Change current working directory (``.'').
1049 */
1050 /* ARGSUSED */
1051 int
1052 sys_chdir(struct lwp *l, void *v, register_t *retval)
1053 {
1054 struct sys_chdir_args /* {
1055 syscallarg(const char *) path;
1056 } */ *uap = v;
1057 struct proc *p = l->l_proc;
1058 struct cwdinfo *cwdi = p->p_cwdi;
1059 int error;
1060 struct nameidata nd;
1061
1062 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1063 SCARG(uap, path), l);
1064 if ((error = change_dir(&nd, l)) != 0)
1065 return (error);
1066 vrele(cwdi->cwdi_cdir);
1067 cwdi->cwdi_cdir = nd.ni_vp;
1068 return (0);
1069 }
1070
1071 /*
1072 * Change notion of root (``/'') directory.
1073 */
1074 /* ARGSUSED */
1075 int
1076 sys_chroot(struct lwp *l, void *v, register_t *retval)
1077 {
1078 struct sys_chroot_args /* {
1079 syscallarg(const char *) path;
1080 } */ *uap = v;
1081 struct proc *p = l->l_proc;
1082 struct cwdinfo *cwdi = p->p_cwdi;
1083 struct vnode *vp;
1084 int error;
1085 struct nameidata nd;
1086
1087 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1088 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1089 return (error);
1090 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1091 SCARG(uap, path), l);
1092 if ((error = change_dir(&nd, l)) != 0)
1093 return (error);
1094 if (cwdi->cwdi_rdir != NULL)
1095 vrele(cwdi->cwdi_rdir);
1096 vp = nd.ni_vp;
1097 cwdi->cwdi_rdir = vp;
1098
1099 /*
1100 * Prevent escaping from chroot by putting the root under
1101 * the working directory. Silently chdir to / if we aren't
1102 * already there.
1103 */
1104 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1105 /*
1106 * XXX would be more failsafe to change directory to a
1107 * deadfs node here instead
1108 */
1109 vrele(cwdi->cwdi_cdir);
1110 VREF(vp);
1111 cwdi->cwdi_cdir = vp;
1112 }
1113
1114 return (0);
1115 }
1116
1117 /*
1118 * Common routine for chroot and chdir.
1119 */
1120 static int
1121 change_dir(struct nameidata *ndp, struct lwp *l)
1122 {
1123 struct vnode *vp;
1124 int error;
1125
1126 if ((error = namei(ndp)) != 0)
1127 return (error);
1128 vp = ndp->ni_vp;
1129 if (vp->v_type != VDIR)
1130 error = ENOTDIR;
1131 else
1132 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1133
1134 if (error)
1135 vput(vp);
1136 else
1137 VOP_UNLOCK(vp, 0);
1138 return (error);
1139 }
1140
1141 /*
1142 * Check permissions, allocate an open file structure,
1143 * and call the device open routine if any.
1144 */
1145 int
1146 sys_open(struct lwp *l, void *v, register_t *retval)
1147 {
1148 struct sys_open_args /* {
1149 syscallarg(const char *) path;
1150 syscallarg(int) flags;
1151 syscallarg(int) mode;
1152 } */ *uap = v;
1153 struct proc *p = l->l_proc;
1154 struct cwdinfo *cwdi = p->p_cwdi;
1155 struct filedesc *fdp = p->p_fd;
1156 struct file *fp;
1157 struct vnode *vp;
1158 int flags, cmode;
1159 int type, indx, error;
1160 struct flock lf;
1161 struct nameidata nd;
1162
1163 flags = FFLAGS(SCARG(uap, flags));
1164 if ((flags & (FREAD | FWRITE)) == 0)
1165 return (EINVAL);
1166 /* falloc() will use the file descriptor for us */
1167 if ((error = falloc(l, &fp, &indx)) != 0)
1168 return (error);
1169 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1170 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1171 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1172 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1173 FILE_UNUSE(fp, l);
1174 fdp->fd_ofiles[indx] = NULL;
1175 ffree(fp);
1176 if ((error == EDUPFD || error == EMOVEFD) &&
1177 l->l_dupfd >= 0 && /* XXX from fdopen */
1178 (error =
1179 dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) {
1180 *retval = indx;
1181 return (0);
1182 }
1183 if (error == ERESTART)
1184 error = EINTR;
1185 fdremove(fdp, indx);
1186 return (error);
1187 }
1188 l->l_dupfd = 0;
1189 vp = nd.ni_vp;
1190 fp->f_flag = flags & FMASK;
1191 fp->f_type = DTYPE_VNODE;
1192 fp->f_ops = &vnops;
1193 fp->f_data = vp;
1194 if (flags & (O_EXLOCK | O_SHLOCK)) {
1195 lf.l_whence = SEEK_SET;
1196 lf.l_start = 0;
1197 lf.l_len = 0;
1198 if (flags & O_EXLOCK)
1199 lf.l_type = F_WRLCK;
1200 else
1201 lf.l_type = F_RDLCK;
1202 type = F_FLOCK;
1203 if ((flags & FNONBLOCK) == 0)
1204 type |= F_WAIT;
1205 VOP_UNLOCK(vp, 0);
1206 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1207 if (error) {
1208 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1209 FILE_UNUSE(fp, l);
1210 ffree(fp);
1211 fdremove(fdp, indx);
1212 return (error);
1213 }
1214 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1215 fp->f_flag |= FHASLOCK;
1216 }
1217 VOP_UNLOCK(vp, 0);
1218 *retval = indx;
1219 FILE_SET_MATURE(fp);
1220 FILE_UNUSE(fp, l);
1221 return (0);
1222 }
1223
1224 static void
1225 vfs__fhfree(fhandle_t *fhp)
1226 {
1227 size_t fhsize;
1228
1229 if (fhp == NULL) {
1230 return;
1231 }
1232 fhsize = FHANDLE_SIZE(fhp);
1233 kmem_free(fhp, fhsize);
1234 }
1235
1236 /*
1237 * vfs_composefh: compose a filehandle.
1238 */
1239
1240 int
1241 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1242 {
1243 struct mount *mp;
1244 struct fid *fidp;
1245 int error;
1246 size_t needfhsize;
1247 size_t fidsize;
1248
1249 mp = vp->v_mount;
1250 fidp = NULL;
1251 if (*fh_size < FHANDLE_SIZE_MIN) {
1252 fidsize = 0;
1253 } else {
1254 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1255 if (fhp != NULL) {
1256 memset(fhp, 0, *fh_size);
1257 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1258 fidp = &fhp->fh_fid;
1259 }
1260 }
1261 error = VFS_VPTOFH(vp, fidp, &fidsize);
1262 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1263 if (error == 0 && *fh_size < needfhsize) {
1264 error = E2BIG;
1265 }
1266 *fh_size = needfhsize;
1267 return error;
1268 }
1269
1270 int
1271 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1272 {
1273 struct mount *mp;
1274 fhandle_t *fhp;
1275 size_t fhsize;
1276 size_t fidsize;
1277 int error;
1278
1279 *fhpp = NULL;
1280 mp = vp->v_mount;
1281 fidsize = 0;
1282 error = VFS_VPTOFH(vp, NULL, &fidsize);
1283 KASSERT(error != 0);
1284 if (error != E2BIG) {
1285 goto out;
1286 }
1287 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1288 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1289 if (fhp == NULL) {
1290 error = ENOMEM;
1291 goto out;
1292 }
1293 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1294 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1295 if (error == 0) {
1296 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1297 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1298 *fhpp = fhp;
1299 } else {
1300 kmem_free(fhp, fhsize);
1301 }
1302 out:
1303 return error;
1304 }
1305
1306 void
1307 vfs_composefh_free(fhandle_t *fhp)
1308 {
1309
1310 vfs__fhfree(fhp);
1311 }
1312
1313 /*
1314 * vfs_fhtovp: lookup a vnode by a filehandle.
1315 */
1316
1317 int
1318 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1319 {
1320 struct mount *mp;
1321 int error;
1322
1323 *vpp = NULL;
1324 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1325 if (mp == NULL) {
1326 error = ESTALE;
1327 goto out;
1328 }
1329 if (mp->mnt_op->vfs_fhtovp == NULL) {
1330 error = EOPNOTSUPP;
1331 goto out;
1332 }
1333 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1334 out:
1335 return error;
1336 }
1337
1338 /*
1339 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1340 * the needed size.
1341 */
1342
1343 int
1344 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1345 {
1346 fhandle_t *fhp;
1347 int error;
1348
1349 *fhpp = NULL;
1350 if (fhsize > FHANDLE_SIZE_MAX) {
1351 return EINVAL;
1352 }
1353 if (fhsize < FHANDLE_SIZE_MIN) {
1354 return EINVAL;
1355 }
1356 again:
1357 fhp = kmem_alloc(fhsize, KM_SLEEP);
1358 if (fhp == NULL) {
1359 return ENOMEM;
1360 }
1361 error = copyin(ufhp, fhp, fhsize);
1362 if (error == 0) {
1363 /* XXX this check shouldn't be here */
1364 if (FHANDLE_SIZE(fhp) == fhsize) {
1365 *fhpp = fhp;
1366 return 0;
1367 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1368 /*
1369 * a kludge for nfsv2 padded handles.
1370 */
1371 size_t sz;
1372
1373 sz = FHANDLE_SIZE(fhp);
1374 kmem_free(fhp, fhsize);
1375 fhsize = sz;
1376 goto again;
1377 } else {
1378 /*
1379 * userland told us wrong size.
1380 */
1381 error = EINVAL;
1382 }
1383 }
1384 kmem_free(fhp, fhsize);
1385 return error;
1386 }
1387
1388 void
1389 vfs_copyinfh_free(fhandle_t *fhp)
1390 {
1391
1392 vfs__fhfree(fhp);
1393 }
1394
1395 /*
1396 * Get file handle system call
1397 */
1398 int
1399 sys___getfh30(struct lwp *l, void *v, register_t *retval)
1400 {
1401 struct sys___getfh30_args /* {
1402 syscallarg(char *) fname;
1403 syscallarg(fhandle_t *) fhp;
1404 syscallarg(size_t *) fh_size;
1405 } */ *uap = v;
1406 struct vnode *vp;
1407 fhandle_t *fh;
1408 int error;
1409 struct nameidata nd;
1410 size_t sz;
1411 size_t usz;
1412
1413 /*
1414 * Must be super user
1415 */
1416 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1417 0, NULL, NULL, NULL);
1418 if (error)
1419 return (error);
1420 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1421 SCARG(uap, fname), l);
1422 error = namei(&nd);
1423 if (error)
1424 return (error);
1425 vp = nd.ni_vp;
1426 error = vfs_composefh_alloc(vp, &fh);
1427 vput(vp);
1428 if (error != 0) {
1429 goto out;
1430 }
1431 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1432 if (error != 0) {
1433 goto out;
1434 }
1435 sz = FHANDLE_SIZE(fh);
1436 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1437 if (error != 0) {
1438 goto out;
1439 }
1440 if (usz >= sz) {
1441 error = copyout(fh, SCARG(uap, fhp), sz);
1442 } else {
1443 error = E2BIG;
1444 }
1445 out:
1446 vfs_composefh_free(fh);
1447 return (error);
1448 }
1449
1450 /*
1451 * Open a file given a file handle.
1452 *
1453 * Check permissions, allocate an open file structure,
1454 * and call the device open routine if any.
1455 */
1456
1457 int
1458 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1459 register_t *retval)
1460 {
1461 struct filedesc *fdp = l->l_proc->p_fd;
1462 struct file *fp;
1463 struct vnode *vp = NULL;
1464 struct mount *mp;
1465 kauth_cred_t cred = l->l_cred;
1466 struct file *nfp;
1467 int type, indx, error=0;
1468 struct flock lf;
1469 struct vattr va;
1470 fhandle_t *fh;
1471 int flags;
1472
1473 /*
1474 * Must be super user
1475 */
1476 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1477 0, NULL, NULL, NULL)))
1478 return (error);
1479
1480 flags = FFLAGS(oflags);
1481 if ((flags & (FREAD | FWRITE)) == 0)
1482 return (EINVAL);
1483 if ((flags & O_CREAT))
1484 return (EINVAL);
1485 /* falloc() will use the file descriptor for us */
1486 if ((error = falloc(l, &nfp, &indx)) != 0)
1487 return (error);
1488 fp = nfp;
1489 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1490 if (error != 0) {
1491 goto bad;
1492 }
1493 error = vfs_fhtovp(fh, &vp);
1494 if (error != 0) {
1495 goto bad;
1496 }
1497
1498 /* Now do an effective vn_open */
1499
1500 if (vp->v_type == VSOCK) {
1501 error = EOPNOTSUPP;
1502 goto bad;
1503 }
1504 if (flags & FREAD) {
1505 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0)
1506 goto bad;
1507 }
1508 if (flags & (FWRITE | O_TRUNC)) {
1509 if (vp->v_type == VDIR) {
1510 error = EISDIR;
1511 goto bad;
1512 }
1513 if ((error = vn_writechk(vp)) != 0 ||
1514 (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0)
1515 goto bad;
1516 }
1517 if (flags & O_TRUNC) {
1518 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
1519 goto bad;
1520 VOP_UNLOCK(vp, 0); /* XXX */
1521 VOP_LEASE(vp, l, cred, LEASE_WRITE);
1522 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1523 VATTR_NULL(&va);
1524 va.va_size = 0;
1525 error = VOP_SETATTR(vp, &va, cred, l);
1526 vn_finished_write(mp, 0);
1527 if (error)
1528 goto bad;
1529 }
1530 if ((error = VOP_OPEN(vp, flags, cred, l)) != 0)
1531 goto bad;
1532 if (vp->v_type == VREG &&
1533 uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
1534 error = EIO;
1535 goto bad;
1536 }
1537 if (flags & FWRITE)
1538 vp->v_writecount++;
1539
1540 /* done with modified vn_open, now finish what sys_open does. */
1541
1542 fp->f_flag = flags & FMASK;
1543 fp->f_type = DTYPE_VNODE;
1544 fp->f_ops = &vnops;
1545 fp->f_data = vp;
1546 if (flags & (O_EXLOCK | O_SHLOCK)) {
1547 lf.l_whence = SEEK_SET;
1548 lf.l_start = 0;
1549 lf.l_len = 0;
1550 if (flags & O_EXLOCK)
1551 lf.l_type = F_WRLCK;
1552 else
1553 lf.l_type = F_RDLCK;
1554 type = F_FLOCK;
1555 if ((flags & FNONBLOCK) == 0)
1556 type |= F_WAIT;
1557 VOP_UNLOCK(vp, 0);
1558 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1559 if (error) {
1560 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1561 FILE_UNUSE(fp, l);
1562 ffree(fp);
1563 fdremove(fdp, indx);
1564 return (error);
1565 }
1566 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1567 fp->f_flag |= FHASLOCK;
1568 }
1569 VOP_UNLOCK(vp, 0);
1570 *retval = indx;
1571 FILE_SET_MATURE(fp);
1572 FILE_UNUSE(fp, l);
1573 vfs_copyinfh_free(fh);
1574 return (0);
1575
1576 bad:
1577 FILE_UNUSE(fp, l);
1578 ffree(fp);
1579 fdremove(fdp, indx);
1580 if (vp != NULL)
1581 vput(vp);
1582 vfs_copyinfh_free(fh);
1583 return (error);
1584 }
1585
1586 int
1587 sys___fhopen40(struct lwp *l, void *v, register_t *retval)
1588 {
1589 struct sys___fhopen40_args /* {
1590 syscallarg(const void *) fhp;
1591 syscallarg(size_t) fh_size;
1592 syscallarg(int) flags;
1593 } */ *uap = v;
1594
1595 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1596 SCARG(uap, flags), retval);
1597 }
1598
1599 int
1600 dofhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sbp,
1601 register_t *retval)
1602 {
1603 struct stat sb;
1604 int error;
1605 fhandle_t *fh;
1606 struct vnode *vp;
1607
1608 /*
1609 * Must be super user
1610 */
1611 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1612 0, NULL, NULL, NULL)))
1613 return (error);
1614
1615 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1616 if (error != 0) {
1617 goto bad;
1618 }
1619 error = vfs_fhtovp(fh, &vp);
1620 if (error != 0) {
1621 goto bad;
1622 }
1623 error = vn_stat(vp, &sb, l);
1624 vput(vp);
1625 if (error) {
1626 goto bad;
1627 }
1628 error = copyout(&sb, sbp, sizeof(sb));
1629 bad:
1630 vfs_copyinfh_free(fh);
1631 return error;
1632 }
1633
1634
1635 /* ARGSUSED */
1636 int
1637 sys___fhstat40(struct lwp *l, void *v, register_t *retval)
1638 {
1639 struct sys___fhstat40_args /* {
1640 syscallarg(const void *) fhp;
1641 syscallarg(size_t) fh_size;
1642 syscallarg(struct stat *) sb;
1643 } */ *uap = v;
1644
1645 return dofhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), SCARG(uap, sb),
1646 retval);
1647 }
1648
1649 int
1650 dofhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *buf,
1651 int flags, register_t *retval)
1652 {
1653 struct statvfs *sb = NULL;
1654 fhandle_t *fh;
1655 struct mount *mp;
1656 struct vnode *vp;
1657 int error;
1658
1659 /*
1660 * Must be super user
1661 */
1662 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1663 0, NULL, NULL, NULL)))
1664 return error;
1665
1666 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1667 if (error != 0) {
1668 goto out;
1669 }
1670 error = vfs_fhtovp(fh, &vp);
1671 if (error != 0) {
1672 goto out;
1673 }
1674 mp = vp->v_mount;
1675 sb = STATVFSBUF_GET();
1676 if ((error = dostatvfs(mp, sb, l, flags, 1)) != 0) {
1677 vput(vp);
1678 goto out;
1679 }
1680 vput(vp);
1681 error = copyout(sb, buf, sizeof(*sb));
1682 out:
1683 if (sb != NULL) {
1684 STATVFSBUF_PUT(sb);
1685 }
1686 vfs_copyinfh_free(fh);
1687 return error;
1688 }
1689
1690 /* ARGSUSED */
1691 int
1692 sys___fhstatvfs140(struct lwp *l, void *v, register_t *retval)
1693 {
1694 struct sys___fhstatvfs140_args /* {
1695 syscallarg(const void *) fhp;
1696 syscallarg(size_t) fh_size;
1697 syscallarg(struct statvfs *) buf;
1698 syscallarg(int) flags;
1699 } */ *uap = v;
1700
1701 return dofhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1702 SCARG(uap, buf), SCARG(uap, flags), retval);
1703 }
1704
1705 /*
1706 * Create a special file.
1707 */
1708 /* ARGSUSED */
1709 int
1710 sys_mknod(struct lwp *l, void *v, register_t *retval)
1711 {
1712 struct sys_mknod_args /* {
1713 syscallarg(const char *) path;
1714 syscallarg(int) mode;
1715 syscallarg(int) dev;
1716 } */ *uap = v;
1717 struct proc *p = l->l_proc;
1718 struct vnode *vp;
1719 struct mount *mp;
1720 struct vattr vattr;
1721 int error;
1722 int whiteout = 0;
1723 struct nameidata nd;
1724
1725 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1726 0, NULL, NULL, NULL)) != 0)
1727 return (error);
1728 restart:
1729 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1730 if ((error = namei(&nd)) != 0)
1731 return (error);
1732 vp = nd.ni_vp;
1733 if (vp != NULL)
1734 error = EEXIST;
1735 else {
1736 VATTR_NULL(&vattr);
1737 vattr.va_mode =
1738 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1739 vattr.va_rdev = SCARG(uap, dev);
1740 whiteout = 0;
1741
1742 switch (SCARG(uap, mode) & S_IFMT) {
1743 case S_IFMT: /* used by badsect to flag bad sectors */
1744 vattr.va_type = VBAD;
1745 break;
1746 case S_IFCHR:
1747 vattr.va_type = VCHR;
1748 break;
1749 case S_IFBLK:
1750 vattr.va_type = VBLK;
1751 break;
1752 case S_IFWHT:
1753 whiteout = 1;
1754 break;
1755 default:
1756 error = EINVAL;
1757 break;
1758 }
1759 }
1760 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1761 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1762 if (nd.ni_dvp == vp)
1763 vrele(nd.ni_dvp);
1764 else
1765 vput(nd.ni_dvp);
1766 if (vp)
1767 vrele(vp);
1768 if ((error = vn_start_write(NULL, &mp,
1769 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1770 return (error);
1771 goto restart;
1772 }
1773 if (!error) {
1774 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1775 if (whiteout) {
1776 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1777 if (error)
1778 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1779 vput(nd.ni_dvp);
1780 } else {
1781 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1782 &nd.ni_cnd, &vattr);
1783 if (error == 0)
1784 vput(nd.ni_vp);
1785 }
1786 } else {
1787 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1788 if (nd.ni_dvp == vp)
1789 vrele(nd.ni_dvp);
1790 else
1791 vput(nd.ni_dvp);
1792 if (vp)
1793 vrele(vp);
1794 }
1795 vn_finished_write(mp, 0);
1796 return (error);
1797 }
1798
1799 /*
1800 * Create a named pipe.
1801 */
1802 /* ARGSUSED */
1803 int
1804 sys_mkfifo(struct lwp *l, void *v, register_t *retval)
1805 {
1806 struct sys_mkfifo_args /* {
1807 syscallarg(const char *) path;
1808 syscallarg(int) mode;
1809 } */ *uap = v;
1810 struct proc *p = l->l_proc;
1811 struct mount *mp;
1812 struct vattr vattr;
1813 int error;
1814 struct nameidata nd;
1815
1816 restart:
1817 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1818 if ((error = namei(&nd)) != 0)
1819 return (error);
1820 if (nd.ni_vp != NULL) {
1821 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1822 if (nd.ni_dvp == nd.ni_vp)
1823 vrele(nd.ni_dvp);
1824 else
1825 vput(nd.ni_dvp);
1826 vrele(nd.ni_vp);
1827 return (EEXIST);
1828 }
1829 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1830 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1831 if (nd.ni_dvp == nd.ni_vp)
1832 vrele(nd.ni_dvp);
1833 else
1834 vput(nd.ni_dvp);
1835 if (nd.ni_vp)
1836 vrele(nd.ni_vp);
1837 if ((error = vn_start_write(NULL, &mp,
1838 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1839 return (error);
1840 goto restart;
1841 }
1842 VATTR_NULL(&vattr);
1843 vattr.va_type = VFIFO;
1844 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1845 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1846 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1847 if (error == 0)
1848 vput(nd.ni_vp);
1849 vn_finished_write(mp, 0);
1850 return (error);
1851 }
1852
1853 /*
1854 * Make a hard file link.
1855 */
1856 /* ARGSUSED */
1857 int
1858 sys_link(struct lwp *l, void *v, register_t *retval)
1859 {
1860 struct sys_link_args /* {
1861 syscallarg(const char *) path;
1862 syscallarg(const char *) link;
1863 } */ *uap = v;
1864 struct vnode *vp;
1865 struct mount *mp;
1866 struct nameidata nd;
1867 int error;
1868
1869 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1870 if ((error = namei(&nd)) != 0)
1871 return (error);
1872 vp = nd.ni_vp;
1873 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
1874 vrele(vp);
1875 return (error);
1876 }
1877 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1878 if ((error = namei(&nd)) != 0)
1879 goto out;
1880 if (nd.ni_vp) {
1881 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1882 if (nd.ni_dvp == nd.ni_vp)
1883 vrele(nd.ni_dvp);
1884 else
1885 vput(nd.ni_dvp);
1886 vrele(nd.ni_vp);
1887 error = EEXIST;
1888 goto out;
1889 }
1890 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1891 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
1892 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1893 out:
1894 vrele(vp);
1895 vn_finished_write(mp, 0);
1896 return (error);
1897 }
1898
1899 /*
1900 * Make a symbolic link.
1901 */
1902 /* ARGSUSED */
1903 int
1904 sys_symlink(struct lwp *l, void *v, register_t *retval)
1905 {
1906 struct sys_symlink_args /* {
1907 syscallarg(const char *) path;
1908 syscallarg(const char *) link;
1909 } */ *uap = v;
1910 struct proc *p = l->l_proc;
1911 struct mount *mp;
1912 struct vattr vattr;
1913 char *path;
1914 int error;
1915 struct nameidata nd;
1916
1917 path = PNBUF_GET();
1918 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1919 if (error)
1920 goto out;
1921 restart:
1922 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1923 if ((error = namei(&nd)) != 0)
1924 goto out;
1925 if (nd.ni_vp) {
1926 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1927 if (nd.ni_dvp == nd.ni_vp)
1928 vrele(nd.ni_dvp);
1929 else
1930 vput(nd.ni_dvp);
1931 vrele(nd.ni_vp);
1932 error = EEXIST;
1933 goto out;
1934 }
1935 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1936 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1937 if (nd.ni_dvp == nd.ni_vp)
1938 vrele(nd.ni_dvp);
1939 else
1940 vput(nd.ni_dvp);
1941 if ((error = vn_start_write(NULL, &mp,
1942 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1943 return (error);
1944 goto restart;
1945 }
1946 VATTR_NULL(&vattr);
1947 vattr.va_type = VLNK;
1948 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
1949 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1950 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1951 if (error == 0)
1952 vput(nd.ni_vp);
1953 vn_finished_write(mp, 0);
1954 out:
1955 PNBUF_PUT(path);
1956 return (error);
1957 }
1958
1959 /*
1960 * Delete a whiteout from the filesystem.
1961 */
1962 /* ARGSUSED */
1963 int
1964 sys_undelete(struct lwp *l, void *v, register_t *retval)
1965 {
1966 struct sys_undelete_args /* {
1967 syscallarg(const char *) path;
1968 } */ *uap = v;
1969 int error;
1970 struct mount *mp;
1971 struct nameidata nd;
1972
1973 restart:
1974 NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1975 SCARG(uap, path), l);
1976 error = namei(&nd);
1977 if (error)
1978 return (error);
1979
1980 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1981 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1982 if (nd.ni_dvp == nd.ni_vp)
1983 vrele(nd.ni_dvp);
1984 else
1985 vput(nd.ni_dvp);
1986 if (nd.ni_vp)
1987 vrele(nd.ni_vp);
1988 return (EEXIST);
1989 }
1990 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1991 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1992 if (nd.ni_dvp == nd.ni_vp)
1993 vrele(nd.ni_dvp);
1994 else
1995 vput(nd.ni_dvp);
1996 if ((error = vn_start_write(NULL, &mp,
1997 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1998 return (error);
1999 goto restart;
2000 }
2001 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2002 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2003 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2004 vput(nd.ni_dvp);
2005 vn_finished_write(mp, 0);
2006 return (error);
2007 }
2008
2009 /*
2010 * Delete a name from the filesystem.
2011 */
2012 /* ARGSUSED */
2013 int
2014 sys_unlink(struct lwp *l, void *v, register_t *retval)
2015 {
2016 struct sys_unlink_args /* {
2017 syscallarg(const char *) path;
2018 } */ *uap = v;
2019 struct mount *mp;
2020 struct vnode *vp;
2021 int error;
2022 struct nameidata nd;
2023 #if NVERIEXEC > 0
2024 pathname_t pathbuf = NULL;
2025 #endif /* NVERIEXEC > 0 */
2026
2027 restart:
2028 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2029 SCARG(uap, path), l);
2030 if ((error = namei(&nd)) != 0)
2031 return (error);
2032 vp = nd.ni_vp;
2033
2034 /*
2035 * The root of a mounted filesystem cannot be deleted.
2036 */
2037 if (vp->v_flag & VROOT) {
2038 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2039 if (nd.ni_dvp == vp)
2040 vrele(nd.ni_dvp);
2041 else
2042 vput(nd.ni_dvp);
2043 vput(vp);
2044 error = EBUSY;
2045 goto out;
2046 }
2047
2048 #if NVERIEXEC > 0
2049 error = pathname_get(nd.ni_dirp, nd.ni_segflg, &pathbuf);
2050
2051 /* Handle remove requests for veriexec entries. */
2052 if (!error) {
2053 error = veriexec_removechk(vp, pathname_path(pathbuf), l);
2054 pathname_put(pathbuf);
2055 }
2056
2057 if (error) {
2058 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2059 if (nd.ni_dvp == vp)
2060 vrele(nd.ni_dvp);
2061 else
2062 vput(nd.ni_dvp);
2063 vput(vp);
2064 goto out;
2065 }
2066 #endif /* NVERIEXEC > 0 */
2067
2068 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2069 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2070 if (nd.ni_dvp == vp)
2071 vrele(nd.ni_dvp);
2072 else
2073 vput(nd.ni_dvp);
2074 vput(vp);
2075 if ((error = vn_start_write(NULL, &mp,
2076 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
2077 return (error);
2078 goto restart;
2079 }
2080 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2081 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2082 #ifdef FILEASSOC
2083 (void)fileassoc_file_delete(vp);
2084 #endif /* FILEASSOC */
2085 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2086 vn_finished_write(mp, 0);
2087 out:
2088 return (error);
2089 }
2090
2091 /*
2092 * Reposition read/write file offset.
2093 */
2094 int
2095 sys_lseek(struct lwp *l, void *v, register_t *retval)
2096 {
2097 struct sys_lseek_args /* {
2098 syscallarg(int) fd;
2099 syscallarg(int) pad;
2100 syscallarg(off_t) offset;
2101 syscallarg(int) whence;
2102 } */ *uap = v;
2103 struct proc *p = l->l_proc;
2104 kauth_cred_t cred = l->l_cred;
2105 struct filedesc *fdp = p->p_fd;
2106 struct file *fp;
2107 struct vnode *vp;
2108 struct vattr vattr;
2109 off_t newoff;
2110 int error;
2111
2112 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
2113 return (EBADF);
2114
2115 FILE_USE(fp);
2116
2117 vp = (struct vnode *)fp->f_data;
2118 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2119 error = ESPIPE;
2120 goto out;
2121 }
2122
2123 switch (SCARG(uap, whence)) {
2124 case SEEK_CUR:
2125 newoff = fp->f_offset + SCARG(uap, offset);
2126 break;
2127 case SEEK_END:
2128 error = VOP_GETATTR(vp, &vattr, cred, l);
2129 if (error)
2130 goto out;
2131 newoff = SCARG(uap, offset) + vattr.va_size;
2132 break;
2133 case SEEK_SET:
2134 newoff = SCARG(uap, offset);
2135 break;
2136 default:
2137 error = EINVAL;
2138 goto out;
2139 }
2140 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
2141 goto out;
2142
2143 *(off_t *)retval = fp->f_offset = newoff;
2144 out:
2145 FILE_UNUSE(fp, l);
2146 return (error);
2147 }
2148
2149 /*
2150 * Positional read system call.
2151 */
2152 int
2153 sys_pread(struct lwp *l, void *v, register_t *retval)
2154 {
2155 struct sys_pread_args /* {
2156 syscallarg(int) fd;
2157 syscallarg(void *) buf;
2158 syscallarg(size_t) nbyte;
2159 syscallarg(off_t) offset;
2160 } */ *uap = v;
2161 struct proc *p = l->l_proc;
2162 struct filedesc *fdp = p->p_fd;
2163 struct file *fp;
2164 struct vnode *vp;
2165 off_t offset;
2166 int error, fd = SCARG(uap, fd);
2167
2168 if ((fp = fd_getfile(fdp, fd)) == NULL)
2169 return (EBADF);
2170
2171 if ((fp->f_flag & FREAD) == 0) {
2172 simple_unlock(&fp->f_slock);
2173 return (EBADF);
2174 }
2175
2176 FILE_USE(fp);
2177
2178 vp = (struct vnode *)fp->f_data;
2179 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2180 error = ESPIPE;
2181 goto out;
2182 }
2183
2184 offset = SCARG(uap, offset);
2185
2186 /*
2187 * XXX This works because no file systems actually
2188 * XXX take any action on the seek operation.
2189 */
2190 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2191 goto out;
2192
2193 /* dofileread() will unuse the descriptor for us */
2194 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2195 &offset, 0, retval));
2196
2197 out:
2198 FILE_UNUSE(fp, l);
2199 return (error);
2200 }
2201
2202 /*
2203 * Positional scatter read system call.
2204 */
2205 int
2206 sys_preadv(struct lwp *l, void *v, register_t *retval)
2207 {
2208 struct sys_preadv_args /* {
2209 syscallarg(int) fd;
2210 syscallarg(const struct iovec *) iovp;
2211 syscallarg(int) iovcnt;
2212 syscallarg(off_t) offset;
2213 } */ *uap = v;
2214 struct proc *p = l->l_proc;
2215 struct filedesc *fdp = p->p_fd;
2216 struct file *fp;
2217 struct vnode *vp;
2218 off_t offset;
2219 int error, fd = SCARG(uap, fd);
2220
2221 if ((fp = fd_getfile(fdp, fd)) == NULL)
2222 return (EBADF);
2223
2224 if ((fp->f_flag & FREAD) == 0) {
2225 simple_unlock(&fp->f_slock);
2226 return (EBADF);
2227 }
2228
2229 FILE_USE(fp);
2230
2231 vp = (struct vnode *)fp->f_data;
2232 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2233 error = ESPIPE;
2234 goto out;
2235 }
2236
2237 offset = SCARG(uap, offset);
2238
2239 /*
2240 * XXX This works because no file systems actually
2241 * XXX take any action on the seek operation.
2242 */
2243 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2244 goto out;
2245
2246 /* dofilereadv() will unuse the descriptor for us */
2247 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2248 &offset, 0, retval));
2249
2250 out:
2251 FILE_UNUSE(fp, l);
2252 return (error);
2253 }
2254
2255 /*
2256 * Positional write system call.
2257 */
2258 int
2259 sys_pwrite(struct lwp *l, void *v, register_t *retval)
2260 {
2261 struct sys_pwrite_args /* {
2262 syscallarg(int) fd;
2263 syscallarg(const void *) buf;
2264 syscallarg(size_t) nbyte;
2265 syscallarg(off_t) offset;
2266 } */ *uap = v;
2267 struct proc *p = l->l_proc;
2268 struct filedesc *fdp = p->p_fd;
2269 struct file *fp;
2270 struct vnode *vp;
2271 off_t offset;
2272 int error, fd = SCARG(uap, fd);
2273
2274 if ((fp = fd_getfile(fdp, fd)) == NULL)
2275 return (EBADF);
2276
2277 if ((fp->f_flag & FWRITE) == 0) {
2278 simple_unlock(&fp->f_slock);
2279 return (EBADF);
2280 }
2281
2282 FILE_USE(fp);
2283
2284 vp = (struct vnode *)fp->f_data;
2285 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2286 error = ESPIPE;
2287 goto out;
2288 }
2289
2290 offset = SCARG(uap, offset);
2291
2292 /*
2293 * XXX This works because no file systems actually
2294 * XXX take any action on the seek operation.
2295 */
2296 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2297 goto out;
2298
2299 /* dofilewrite() will unuse the descriptor for us */
2300 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2301 &offset, 0, retval));
2302
2303 out:
2304 FILE_UNUSE(fp, l);
2305 return (error);
2306 }
2307
2308 /*
2309 * Positional gather write system call.
2310 */
2311 int
2312 sys_pwritev(struct lwp *l, void *v, register_t *retval)
2313 {
2314 struct sys_pwritev_args /* {
2315 syscallarg(int) fd;
2316 syscallarg(const struct iovec *) iovp;
2317 syscallarg(int) iovcnt;
2318 syscallarg(off_t) offset;
2319 } */ *uap = v;
2320 struct proc *p = l->l_proc;
2321 struct filedesc *fdp = p->p_fd;
2322 struct file *fp;
2323 struct vnode *vp;
2324 off_t offset;
2325 int error, fd = SCARG(uap, fd);
2326
2327 if ((fp = fd_getfile(fdp, fd)) == NULL)
2328 return (EBADF);
2329
2330 if ((fp->f_flag & FWRITE) == 0) {
2331 simple_unlock(&fp->f_slock);
2332 return (EBADF);
2333 }
2334
2335 FILE_USE(fp);
2336
2337 vp = (struct vnode *)fp->f_data;
2338 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2339 error = ESPIPE;
2340 goto out;
2341 }
2342
2343 offset = SCARG(uap, offset);
2344
2345 /*
2346 * XXX This works because no file systems actually
2347 * XXX take any action on the seek operation.
2348 */
2349 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2350 goto out;
2351
2352 /* dofilewritev() will unuse the descriptor for us */
2353 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2354 &offset, 0, retval));
2355
2356 out:
2357 FILE_UNUSE(fp, l);
2358 return (error);
2359 }
2360
2361 /*
2362 * Check access permissions.
2363 */
2364 int
2365 sys_access(struct lwp *l, void *v, register_t *retval)
2366 {
2367 struct sys_access_args /* {
2368 syscallarg(const char *) path;
2369 syscallarg(int) flags;
2370 } */ *uap = v;
2371 kauth_cred_t cred;
2372 struct vnode *vp;
2373 int error, flags;
2374 struct nameidata nd;
2375
2376 if ((SCARG(uap, flags) & ~(R_OK | W_OK | X_OK)) != 0) {
2377 /* nonsense flags */
2378 return EINVAL;
2379 }
2380
2381 cred = kauth_cred_dup(l->l_cred);
2382 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2383 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2384 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2385 SCARG(uap, path), l);
2386 /* Override default credentials */
2387 nd.ni_cnd.cn_cred = cred;
2388 if ((error = namei(&nd)) != 0)
2389 goto out;
2390 vp = nd.ni_vp;
2391
2392 /* Flags == 0 means only check for existence. */
2393 if (SCARG(uap, flags)) {
2394 flags = 0;
2395 if (SCARG(uap, flags) & R_OK)
2396 flags |= VREAD;
2397 if (SCARG(uap, flags) & W_OK)
2398 flags |= VWRITE;
2399 if (SCARG(uap, flags) & X_OK)
2400 flags |= VEXEC;
2401
2402 error = VOP_ACCESS(vp, flags, cred, l);
2403 if (!error && (flags & VWRITE))
2404 error = vn_writechk(vp);
2405 }
2406 vput(vp);
2407 out:
2408 kauth_cred_free(cred);
2409 return (error);
2410 }
2411
2412 /*
2413 * Get file status; this version follows links.
2414 */
2415 /* ARGSUSED */
2416 int
2417 sys___stat30(struct lwp *l, void *v, register_t *retval)
2418 {
2419 struct sys___stat30_args /* {
2420 syscallarg(const char *) path;
2421 syscallarg(struct stat *) ub;
2422 } */ *uap = v;
2423 struct stat sb;
2424 int error;
2425 struct nameidata nd;
2426
2427 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2428 SCARG(uap, path), l);
2429 if ((error = namei(&nd)) != 0)
2430 return (error);
2431 error = vn_stat(nd.ni_vp, &sb, l);
2432 vput(nd.ni_vp);
2433 if (error)
2434 return (error);
2435 error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2436 return (error);
2437 }
2438
2439 /*
2440 * Get file status; this version does not follow links.
2441 */
2442 /* ARGSUSED */
2443 int
2444 sys___lstat30(struct lwp *l, void *v, register_t *retval)
2445 {
2446 struct sys___lstat30_args /* {
2447 syscallarg(const char *) path;
2448 syscallarg(struct stat *) ub;
2449 } */ *uap = v;
2450 struct stat sb;
2451 int error;
2452 struct nameidata nd;
2453
2454 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2455 SCARG(uap, path), l);
2456 if ((error = namei(&nd)) != 0)
2457 return (error);
2458 error = vn_stat(nd.ni_vp, &sb, l);
2459 vput(nd.ni_vp);
2460 if (error)
2461 return (error);
2462 error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2463 return (error);
2464 }
2465
2466 /*
2467 * Get configurable pathname variables.
2468 */
2469 /* ARGSUSED */
2470 int
2471 sys_pathconf(struct lwp *l, void *v, register_t *retval)
2472 {
2473 struct sys_pathconf_args /* {
2474 syscallarg(const char *) path;
2475 syscallarg(int) name;
2476 } */ *uap = v;
2477 int error;
2478 struct nameidata nd;
2479
2480 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2481 SCARG(uap, path), l);
2482 if ((error = namei(&nd)) != 0)
2483 return (error);
2484 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2485 vput(nd.ni_vp);
2486 return (error);
2487 }
2488
2489 /*
2490 * Return target name of a symbolic link.
2491 */
2492 /* ARGSUSED */
2493 int
2494 sys_readlink(struct lwp *l, void *v, register_t *retval)
2495 {
2496 struct sys_readlink_args /* {
2497 syscallarg(const char *) path;
2498 syscallarg(char *) buf;
2499 syscallarg(size_t) count;
2500 } */ *uap = v;
2501 struct vnode *vp;
2502 struct iovec aiov;
2503 struct uio auio;
2504 int error;
2505 struct nameidata nd;
2506
2507 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2508 SCARG(uap, path), l);
2509 if ((error = namei(&nd)) != 0)
2510 return (error);
2511 vp = nd.ni_vp;
2512 if (vp->v_type != VLNK)
2513 error = EINVAL;
2514 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2515 (error = VOP_ACCESS(vp, VREAD, l->l_cred, l)) == 0) {
2516 aiov.iov_base = SCARG(uap, buf);
2517 aiov.iov_len = SCARG(uap, count);
2518 auio.uio_iov = &aiov;
2519 auio.uio_iovcnt = 1;
2520 auio.uio_offset = 0;
2521 auio.uio_rw = UIO_READ;
2522 KASSERT(l == curlwp);
2523 auio.uio_vmspace = l->l_proc->p_vmspace;
2524 auio.uio_resid = SCARG(uap, count);
2525 error = VOP_READLINK(vp, &auio, l->l_cred);
2526 }
2527 vput(vp);
2528 *retval = SCARG(uap, count) - auio.uio_resid;
2529 return (error);
2530 }
2531
2532 /*
2533 * Change flags of a file given a path name.
2534 */
2535 /* ARGSUSED */
2536 int
2537 sys_chflags(struct lwp *l, void *v, register_t *retval)
2538 {
2539 struct sys_chflags_args /* {
2540 syscallarg(const char *) path;
2541 syscallarg(u_long) flags;
2542 } */ *uap = v;
2543 struct vnode *vp;
2544 int error;
2545 struct nameidata nd;
2546
2547 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2548 if ((error = namei(&nd)) != 0)
2549 return (error);
2550 vp = nd.ni_vp;
2551 error = change_flags(vp, SCARG(uap, flags), l);
2552 vput(vp);
2553 return (error);
2554 }
2555
2556 /*
2557 * Change flags of a file given a file descriptor.
2558 */
2559 /* ARGSUSED */
2560 int
2561 sys_fchflags(struct lwp *l, void *v, register_t *retval)
2562 {
2563 struct sys_fchflags_args /* {
2564 syscallarg(int) fd;
2565 syscallarg(u_long) flags;
2566 } */ *uap = v;
2567 struct proc *p = l->l_proc;
2568 struct vnode *vp;
2569 struct file *fp;
2570 int error;
2571
2572 /* getvnode() will use the descriptor for us */
2573 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2574 return (error);
2575 vp = (struct vnode *)fp->f_data;
2576 error = change_flags(vp, SCARG(uap, flags), l);
2577 VOP_UNLOCK(vp, 0);
2578 FILE_UNUSE(fp, l);
2579 return (error);
2580 }
2581
2582 /*
2583 * Change flags of a file given a path name; this version does
2584 * not follow links.
2585 */
2586 int
2587 sys_lchflags(struct lwp *l, void *v, register_t *retval)
2588 {
2589 struct sys_lchflags_args /* {
2590 syscallarg(const char *) path;
2591 syscallarg(u_long) flags;
2592 } */ *uap = v;
2593 struct vnode *vp;
2594 int error;
2595 struct nameidata nd;
2596
2597 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2598 if ((error = namei(&nd)) != 0)
2599 return (error);
2600 vp = nd.ni_vp;
2601 error = change_flags(vp, SCARG(uap, flags), l);
2602 vput(vp);
2603 return (error);
2604 }
2605
2606 /*
2607 * Common routine to change flags of a file.
2608 */
2609 int
2610 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2611 {
2612 struct mount *mp;
2613 struct vattr vattr;
2614 int error;
2615
2616 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2617 return (error);
2618 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2619 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2620 /*
2621 * Non-superusers cannot change the flags on devices, even if they
2622 * own them.
2623 */
2624 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2625 &l->l_acflag) != 0) {
2626 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2627 goto out;
2628 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2629 error = EINVAL;
2630 goto out;
2631 }
2632 }
2633 VATTR_NULL(&vattr);
2634 vattr.va_flags = flags;
2635 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2636 out:
2637 vn_finished_write(mp, 0);
2638 return (error);
2639 }
2640
2641 /*
2642 * Change mode of a file given path name; this version follows links.
2643 */
2644 /* ARGSUSED */
2645 int
2646 sys_chmod(struct lwp *l, void *v, register_t *retval)
2647 {
2648 struct sys_chmod_args /* {
2649 syscallarg(const char *) path;
2650 syscallarg(int) mode;
2651 } */ *uap = v;
2652 int error;
2653 struct nameidata nd;
2654
2655 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2656 if ((error = namei(&nd)) != 0)
2657 return (error);
2658
2659 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2660
2661 vrele(nd.ni_vp);
2662 return (error);
2663 }
2664
2665 /*
2666 * Change mode of a file given a file descriptor.
2667 */
2668 /* ARGSUSED */
2669 int
2670 sys_fchmod(struct lwp *l, void *v, register_t *retval)
2671 {
2672 struct sys_fchmod_args /* {
2673 syscallarg(int) fd;
2674 syscallarg(int) mode;
2675 } */ *uap = v;
2676 struct proc *p = l->l_proc;
2677 struct file *fp;
2678 int error;
2679
2680 /* getvnode() will use the descriptor for us */
2681 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2682 return (error);
2683
2684 error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l);
2685 FILE_UNUSE(fp, l);
2686 return (error);
2687 }
2688
2689 /*
2690 * Change mode of a file given path name; this version does not follow links.
2691 */
2692 /* ARGSUSED */
2693 int
2694 sys_lchmod(struct lwp *l, void *v, register_t *retval)
2695 {
2696 struct sys_lchmod_args /* {
2697 syscallarg(const char *) path;
2698 syscallarg(int) mode;
2699 } */ *uap = v;
2700 int error;
2701 struct nameidata nd;
2702
2703 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2704 if ((error = namei(&nd)) != 0)
2705 return (error);
2706
2707 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2708
2709 vrele(nd.ni_vp);
2710 return (error);
2711 }
2712
2713 /*
2714 * Common routine to set mode given a vnode.
2715 */
2716 static int
2717 change_mode(struct vnode *vp, int mode, struct lwp *l)
2718 {
2719 struct mount *mp;
2720 struct vattr vattr;
2721 int error;
2722
2723 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2724 return (error);
2725 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2726 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2727 VATTR_NULL(&vattr);
2728 vattr.va_mode = mode & ALLPERMS;
2729 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2730 VOP_UNLOCK(vp, 0);
2731 vn_finished_write(mp, 0);
2732 return (error);
2733 }
2734
2735 /*
2736 * Set ownership given a path name; this version follows links.
2737 */
2738 /* ARGSUSED */
2739 int
2740 sys_chown(struct lwp *l, void *v, register_t *retval)
2741 {
2742 struct sys_chown_args /* {
2743 syscallarg(const char *) path;
2744 syscallarg(uid_t) uid;
2745 syscallarg(gid_t) gid;
2746 } */ *uap = v;
2747 int error;
2748 struct nameidata nd;
2749
2750 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2751 if ((error = namei(&nd)) != 0)
2752 return (error);
2753
2754 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2755
2756 vrele(nd.ni_vp);
2757 return (error);
2758 }
2759
2760 /*
2761 * Set ownership given a path name; this version follows links.
2762 * Provides POSIX semantics.
2763 */
2764 /* ARGSUSED */
2765 int
2766 sys___posix_chown(struct lwp *l, void *v, register_t *retval)
2767 {
2768 struct sys_chown_args /* {
2769 syscallarg(const char *) path;
2770 syscallarg(uid_t) uid;
2771 syscallarg(gid_t) gid;
2772 } */ *uap = v;
2773 int error;
2774 struct nameidata nd;
2775
2776 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2777 if ((error = namei(&nd)) != 0)
2778 return (error);
2779
2780 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2781
2782 vrele(nd.ni_vp);
2783 return (error);
2784 }
2785
2786 /*
2787 * Set ownership given a file descriptor.
2788 */
2789 /* ARGSUSED */
2790 int
2791 sys_fchown(struct lwp *l, void *v, register_t *retval)
2792 {
2793 struct sys_fchown_args /* {
2794 syscallarg(int) fd;
2795 syscallarg(uid_t) uid;
2796 syscallarg(gid_t) gid;
2797 } */ *uap = v;
2798 struct proc *p = l->l_proc;
2799 int error;
2800 struct file *fp;
2801
2802 /* getvnode() will use the descriptor for us */
2803 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2804 return (error);
2805
2806 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2807 SCARG(uap, gid), l, 0);
2808 FILE_UNUSE(fp, l);
2809 return (error);
2810 }
2811
2812 /*
2813 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2814 */
2815 /* ARGSUSED */
2816 int
2817 sys___posix_fchown(struct lwp *l, void *v, register_t *retval)
2818 {
2819 struct sys_fchown_args /* {
2820 syscallarg(int) fd;
2821 syscallarg(uid_t) uid;
2822 syscallarg(gid_t) gid;
2823 } */ *uap = v;
2824 struct proc *p = l->l_proc;
2825 int error;
2826 struct file *fp;
2827
2828 /* getvnode() will use the descriptor for us */
2829 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2830 return (error);
2831
2832 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2833 SCARG(uap, gid), l, 1);
2834 FILE_UNUSE(fp, l);
2835 return (error);
2836 }
2837
2838 /*
2839 * Set ownership given a path name; this version does not follow links.
2840 */
2841 /* ARGSUSED */
2842 int
2843 sys_lchown(struct lwp *l, void *v, register_t *retval)
2844 {
2845 struct sys_lchown_args /* {
2846 syscallarg(const char *) path;
2847 syscallarg(uid_t) uid;
2848 syscallarg(gid_t) gid;
2849 } */ *uap = v;
2850 int error;
2851 struct nameidata nd;
2852
2853 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2854 if ((error = namei(&nd)) != 0)
2855 return (error);
2856
2857 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2858
2859 vrele(nd.ni_vp);
2860 return (error);
2861 }
2862
2863 /*
2864 * Set ownership given a path name; this version does not follow links.
2865 * Provides POSIX/XPG semantics.
2866 */
2867 /* ARGSUSED */
2868 int
2869 sys___posix_lchown(struct lwp *l, void *v, register_t *retval)
2870 {
2871 struct sys_lchown_args /* {
2872 syscallarg(const char *) path;
2873 syscallarg(uid_t) uid;
2874 syscallarg(gid_t) gid;
2875 } */ *uap = v;
2876 int error;
2877 struct nameidata nd;
2878
2879 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2880 if ((error = namei(&nd)) != 0)
2881 return (error);
2882
2883 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2884
2885 vrele(nd.ni_vp);
2886 return (error);
2887 }
2888
2889 /*
2890 * Common routine to set ownership given a vnode.
2891 */
2892 static int
2893 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2894 int posix_semantics)
2895 {
2896 struct mount *mp;
2897 struct vattr vattr;
2898 mode_t newmode;
2899 int error;
2900
2901 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2902 return (error);
2903 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2904 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2905 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2906 goto out;
2907
2908 #define CHANGED(x) ((int)(x) != -1)
2909 newmode = vattr.va_mode;
2910 if (posix_semantics) {
2911 /*
2912 * POSIX/XPG semantics: if the caller is not the super-user,
2913 * clear set-user-id and set-group-id bits. Both POSIX and
2914 * the XPG consider the behaviour for calls by the super-user
2915 * implementation-defined; we leave the set-user-id and set-
2916 * group-id settings intact in that case.
2917 */
2918 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2919 NULL) != 0)
2920 newmode &= ~(S_ISUID | S_ISGID);
2921 } else {
2922 /*
2923 * NetBSD semantics: when changing owner and/or group,
2924 * clear the respective bit(s).
2925 */
2926 if (CHANGED(uid))
2927 newmode &= ~S_ISUID;
2928 if (CHANGED(gid))
2929 newmode &= ~S_ISGID;
2930 }
2931 /* Update va_mode iff altered. */
2932 if (vattr.va_mode == newmode)
2933 newmode = VNOVAL;
2934
2935 VATTR_NULL(&vattr);
2936 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2937 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2938 vattr.va_mode = newmode;
2939 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2940 #undef CHANGED
2941
2942 out:
2943 VOP_UNLOCK(vp, 0);
2944 vn_finished_write(mp, 0);
2945 return (error);
2946 }
2947
2948 /*
2949 * Set the access and modification times given a path name; this
2950 * version follows links.
2951 */
2952 /* ARGSUSED */
2953 int
2954 sys_utimes(struct lwp *l, void *v, register_t *retval)
2955 {
2956 struct sys_utimes_args /* {
2957 syscallarg(const char *) path;
2958 syscallarg(const struct timeval *) tptr;
2959 } */ *uap = v;
2960 int error;
2961 struct nameidata nd;
2962
2963 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2964 if ((error = namei(&nd)) != 0)
2965 return (error);
2966
2967 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
2968
2969 vrele(nd.ni_vp);
2970 return (error);
2971 }
2972
2973 /*
2974 * Set the access and modification times given a file descriptor.
2975 */
2976 /* ARGSUSED */
2977 int
2978 sys_futimes(struct lwp *l, void *v, register_t *retval)
2979 {
2980 struct sys_futimes_args /* {
2981 syscallarg(int) fd;
2982 syscallarg(const struct timeval *) tptr;
2983 } */ *uap = v;
2984 struct proc *p = l->l_proc;
2985 int error;
2986 struct file *fp;
2987
2988 /* getvnode() will use the descriptor for us */
2989 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2990 return (error);
2991
2992 error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), l);
2993 FILE_UNUSE(fp, l);
2994 return (error);
2995 }
2996
2997 /*
2998 * Set the access and modification times given a path name; this
2999 * version does not follow links.
3000 */
3001 /* ARGSUSED */
3002 int
3003 sys_lutimes(struct lwp *l, void *v, register_t *retval)
3004 {
3005 struct sys_lutimes_args /* {
3006 syscallarg(const char *) path;
3007 syscallarg(const struct timeval *) tptr;
3008 } */ *uap = v;
3009 int error;
3010 struct nameidata nd;
3011
3012 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3013 if ((error = namei(&nd)) != 0)
3014 return (error);
3015
3016 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
3017
3018 vrele(nd.ni_vp);
3019 return (error);
3020 }
3021
3022 /*
3023 * Common routine to set access and modification times given a vnode.
3024 */
3025 static int
3026 change_utimes(struct vnode *vp, const struct timeval *tptr, struct lwp *l)
3027 {
3028 struct mount *mp;
3029 struct vattr vattr;
3030 int error;
3031
3032 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
3033 return (error);
3034 VATTR_NULL(&vattr);
3035 if (tptr == NULL) {
3036 nanotime(&vattr.va_atime);
3037 vattr.va_mtime = vattr.va_atime;
3038 vattr.va_vaflags |= VA_UTIMES_NULL;
3039 } else {
3040 struct timeval tv[2];
3041
3042 error = copyin(tptr, tv, sizeof(tv));
3043 if (error)
3044 goto out;
3045 TIMEVAL_TO_TIMESPEC(&tv[0], &vattr.va_atime);
3046 TIMEVAL_TO_TIMESPEC(&tv[1], &vattr.va_mtime);
3047 }
3048 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3049 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3050 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3051 VOP_UNLOCK(vp, 0);
3052 out:
3053 vn_finished_write(mp, 0);
3054 return (error);
3055 }
3056
3057 /*
3058 * Truncate a file given its path name.
3059 */
3060 /* ARGSUSED */
3061 int
3062 sys_truncate(struct lwp *l, void *v, register_t *retval)
3063 {
3064 struct sys_truncate_args /* {
3065 syscallarg(const char *) path;
3066 syscallarg(int) pad;
3067 syscallarg(off_t) length;
3068 } */ *uap = v;
3069 struct vnode *vp;
3070 struct mount *mp;
3071 struct vattr vattr;
3072 int error;
3073 struct nameidata nd;
3074
3075 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3076 if ((error = namei(&nd)) != 0)
3077 return (error);
3078 vp = nd.ni_vp;
3079 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3080 vrele(vp);
3081 return (error);
3082 }
3083 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3084 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3085 if (vp->v_type == VDIR)
3086 error = EISDIR;
3087 else if ((error = vn_writechk(vp)) == 0 &&
3088 (error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) == 0) {
3089 VATTR_NULL(&vattr);
3090 vattr.va_size = SCARG(uap, length);
3091 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3092 }
3093 vput(vp);
3094 vn_finished_write(mp, 0);
3095 return (error);
3096 }
3097
3098 /*
3099 * Truncate a file given a file descriptor.
3100 */
3101 /* ARGSUSED */
3102 int
3103 sys_ftruncate(struct lwp *l, void *v, register_t *retval)
3104 {
3105 struct sys_ftruncate_args /* {
3106 syscallarg(int) fd;
3107 syscallarg(int) pad;
3108 syscallarg(off_t) length;
3109 } */ *uap = v;
3110 struct proc *p = l->l_proc;
3111 struct mount *mp;
3112 struct vattr vattr;
3113 struct vnode *vp;
3114 struct file *fp;
3115 int error;
3116
3117 /* getvnode() will use the descriptor for us */
3118 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3119 return (error);
3120 if ((fp->f_flag & FWRITE) == 0) {
3121 error = EINVAL;
3122 goto out;
3123 }
3124 vp = (struct vnode *)fp->f_data;
3125 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3126 FILE_UNUSE(fp, l);
3127 return (error);
3128 }
3129 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3130 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3131 if (vp->v_type == VDIR)
3132 error = EISDIR;
3133 else if ((error = vn_writechk(vp)) == 0) {
3134 VATTR_NULL(&vattr);
3135 vattr.va_size = SCARG(uap, length);
3136 error = VOP_SETATTR(vp, &vattr, fp->f_cred, l);
3137 }
3138 VOP_UNLOCK(vp, 0);
3139 vn_finished_write(mp, 0);
3140 out:
3141 FILE_UNUSE(fp, l);
3142 return (error);
3143 }
3144
3145 /*
3146 * Sync an open file.
3147 */
3148 /* ARGSUSED */
3149 int
3150 sys_fsync(struct lwp *l, void *v, register_t *retval)
3151 {
3152 struct sys_fsync_args /* {
3153 syscallarg(int) fd;
3154 } */ *uap = v;
3155 struct proc *p = l->l_proc;
3156 struct vnode *vp;
3157 struct mount *mp;
3158 struct file *fp;
3159 int error;
3160
3161 /* getvnode() will use the descriptor for us */
3162 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3163 return (error);
3164 vp = (struct vnode *)fp->f_data;
3165 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3166 FILE_UNUSE(fp, l);
3167 return (error);
3168 }
3169 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3170 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l);
3171 if (error == 0 && bioops.io_fsync != NULL &&
3172 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3173 (*bioops.io_fsync)(vp, 0);
3174 VOP_UNLOCK(vp, 0);
3175 vn_finished_write(mp, 0);
3176 FILE_UNUSE(fp, l);
3177 return (error);
3178 }
3179
3180 /*
3181 * Sync a range of file data. API modeled after that found in AIX.
3182 *
3183 * FDATASYNC indicates that we need only save enough metadata to be able
3184 * to re-read the written data. Note we duplicate AIX's requirement that
3185 * the file be open for writing.
3186 */
3187 /* ARGSUSED */
3188 int
3189 sys_fsync_range(struct lwp *l, void *v, register_t *retval)
3190 {
3191 struct sys_fsync_range_args /* {
3192 syscallarg(int) fd;
3193 syscallarg(int) flags;
3194 syscallarg(off_t) start;
3195 syscallarg(off_t) length;
3196 } */ *uap = v;
3197 struct proc *p = l->l_proc;
3198 struct vnode *vp;
3199 struct file *fp;
3200 int flags, nflags;
3201 off_t s, e, len;
3202 int error;
3203
3204 /* getvnode() will use the descriptor for us */
3205 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3206 return (error);
3207
3208 if ((fp->f_flag & FWRITE) == 0) {
3209 error = EBADF;
3210 goto out;
3211 }
3212
3213 flags = SCARG(uap, flags);
3214 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3215 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3216 error = EINVAL;
3217 goto out;
3218 }
3219 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3220 if (flags & FDATASYNC)
3221 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3222 else
3223 nflags = FSYNC_WAIT;
3224 if (flags & FDISKSYNC)
3225 nflags |= FSYNC_CACHE;
3226
3227 len = SCARG(uap, length);
3228 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3229 if (len) {
3230 s = SCARG(uap, start);
3231 e = s + len;
3232 if (e < s) {
3233 error = EINVAL;
3234 goto out;
3235 }
3236 } else {
3237 e = 0;
3238 s = 0;
3239 }
3240
3241 vp = (struct vnode *)fp->f_data;
3242 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3243 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l);
3244
3245 if (error == 0 && bioops.io_fsync != NULL &&
3246 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3247 (*bioops.io_fsync)(vp, nflags);
3248
3249 VOP_UNLOCK(vp, 0);
3250 out:
3251 FILE_UNUSE(fp, l);
3252 return (error);
3253 }
3254
3255 /*
3256 * Sync the data of an open file.
3257 */
3258 /* ARGSUSED */
3259 int
3260 sys_fdatasync(struct lwp *l, void *v, register_t *retval)
3261 {
3262 struct sys_fdatasync_args /* {
3263 syscallarg(int) fd;
3264 } */ *uap = v;
3265 struct proc *p = l->l_proc;
3266 struct vnode *vp;
3267 struct file *fp;
3268 int error;
3269
3270 /* getvnode() will use the descriptor for us */
3271 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3272 return (error);
3273 if ((fp->f_flag & FWRITE) == 0) {
3274 FILE_UNUSE(fp, l);
3275 return (EBADF);
3276 }
3277 vp = (struct vnode *)fp->f_data;
3278 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3279 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l);
3280 VOP_UNLOCK(vp, 0);
3281 FILE_UNUSE(fp, l);
3282 return (error);
3283 }
3284
3285 /*
3286 * Rename files, (standard) BSD semantics frontend.
3287 */
3288 /* ARGSUSED */
3289 int
3290 sys_rename(struct lwp *l, void *v, register_t *retval)
3291 {
3292 struct sys_rename_args /* {
3293 syscallarg(const char *) from;
3294 syscallarg(const char *) to;
3295 } */ *uap = v;
3296
3297 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0));
3298 }
3299
3300 /*
3301 * Rename files, POSIX semantics frontend.
3302 */
3303 /* ARGSUSED */
3304 int
3305 sys___posix_rename(struct lwp *l, void *v, register_t *retval)
3306 {
3307 struct sys___posix_rename_args /* {
3308 syscallarg(const char *) from;
3309 syscallarg(const char *) to;
3310 } */ *uap = v;
3311
3312 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1));
3313 }
3314
3315 /*
3316 * Rename files. Source and destination must either both be directories,
3317 * or both not be directories. If target is a directory, it must be empty.
3318 * If `from' and `to' refer to the same object, the value of the `retain'
3319 * argument is used to determine whether `from' will be
3320 *
3321 * (retain == 0) deleted unless `from' and `to' refer to the same
3322 * object in the file system's name space (BSD).
3323 * (retain == 1) always retained (POSIX).
3324 */
3325 static int
3326 rename_files(const char *from, const char *to, struct lwp *l, int retain)
3327 {
3328 struct mount *mp = NULL;
3329 struct vnode *tvp, *fvp, *tdvp;
3330 struct nameidata fromnd, tond;
3331 struct proc *p;
3332 int error;
3333
3334 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART, UIO_USERSPACE,
3335 from, l);
3336 if ((error = namei(&fromnd)) != 0)
3337 return (error);
3338 if (fromnd.ni_dvp != fromnd.ni_vp)
3339 VOP_UNLOCK(fromnd.ni_dvp, 0);
3340 fvp = fromnd.ni_vp;
3341 error = vn_start_write(fvp, &mp, V_WAIT | V_PCATCH);
3342 if (error != 0) {
3343 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3344 vrele(fromnd.ni_dvp);
3345 vrele(fvp);
3346 if (fromnd.ni_startdir)
3347 vrele(fromnd.ni_startdir);
3348 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3349 return (error);
3350 }
3351 NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3352 (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, l);
3353 if ((error = namei(&tond)) != 0) {
3354 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3355 vrele(fromnd.ni_dvp);
3356 vrele(fvp);
3357 goto out1;
3358 }
3359 tdvp = tond.ni_dvp;
3360 tvp = tond.ni_vp;
3361
3362 if (tvp != NULL) {
3363 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3364 error = ENOTDIR;
3365 goto out;
3366 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3367 error = EISDIR;
3368 goto out;
3369 }
3370 }
3371
3372 if (fvp == tdvp)
3373 error = EINVAL;
3374
3375 /*
3376 * Source and destination refer to the same object.
3377 */
3378 if (fvp == tvp) {
3379 if (retain)
3380 error = -1;
3381 else if (fromnd.ni_dvp == tdvp &&
3382 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3383 !memcmp(fromnd.ni_cnd.cn_nameptr,
3384 tond.ni_cnd.cn_nameptr,
3385 fromnd.ni_cnd.cn_namelen))
3386 error = -1;
3387 }
3388
3389 #if NVERIEXEC > 0
3390 if (!error) {
3391 pathname_t frompath = NULL, topath = NULL;
3392
3393 error = pathname_get(fromnd.ni_dirp, fromnd.ni_segflg,
3394 &frompath);
3395 if (!error)
3396 error = pathname_get(tond.ni_dirp, tond.ni_segflg,
3397 &topath);
3398 if (!error)
3399 error = veriexec_renamechk(fvp, pathname_path(frompath),
3400 tvp, pathname_path(topath), l);
3401
3402 pathname_put(frompath);
3403 pathname_put(topath);
3404 }
3405 #endif /* NVERIEXEC > 0 */
3406
3407 out:
3408 p = l->l_proc;
3409 if (!error) {
3410 VOP_LEASE(tdvp, l, l->l_cred, LEASE_WRITE);
3411 if (fromnd.ni_dvp != tdvp)
3412 VOP_LEASE(fromnd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3413 if (tvp) {
3414 VOP_LEASE(tvp, l, l->l_cred, LEASE_WRITE);
3415 }
3416 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3417 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3418 } else {
3419 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3420 if (tdvp == tvp)
3421 vrele(tdvp);
3422 else
3423 vput(tdvp);
3424 if (tvp)
3425 vput(tvp);
3426 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3427 vrele(fromnd.ni_dvp);
3428 vrele(fvp);
3429 }
3430 vrele(tond.ni_startdir);
3431 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3432 out1:
3433 vn_finished_write(mp, 0);
3434 if (fromnd.ni_startdir)
3435 vrele(fromnd.ni_startdir);
3436 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3437 return (error == -1 ? 0 : error);
3438 }
3439
3440 /*
3441 * Make a directory file.
3442 */
3443 /* ARGSUSED */
3444 int
3445 sys_mkdir(struct lwp *l, void *v, register_t *retval)
3446 {
3447 struct sys_mkdir_args /* {
3448 syscallarg(const char *) path;
3449 syscallarg(int) mode;
3450 } */ *uap = v;
3451 struct proc *p = l->l_proc;
3452 struct mount *mp;
3453 struct vnode *vp;
3454 struct vattr vattr;
3455 int error;
3456 struct nameidata nd;
3457
3458 restart:
3459 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR, UIO_USERSPACE,
3460 SCARG(uap, path), l);
3461 if ((error = namei(&nd)) != 0)
3462 return (error);
3463 vp = nd.ni_vp;
3464 if (vp != NULL) {
3465 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3466 if (nd.ni_dvp == vp)
3467 vrele(nd.ni_dvp);
3468 else
3469 vput(nd.ni_dvp);
3470 vrele(vp);
3471 return (EEXIST);
3472 }
3473 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3474 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3475 if (nd.ni_dvp == vp)
3476 vrele(nd.ni_dvp);
3477 else
3478 vput(nd.ni_dvp);
3479 if ((error = vn_start_write(NULL, &mp,
3480 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3481 return (error);
3482 goto restart;
3483 }
3484 VATTR_NULL(&vattr);
3485 vattr.va_type = VDIR;
3486 vattr.va_mode =
3487 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3488 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3489 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3490 if (!error)
3491 vput(nd.ni_vp);
3492 vn_finished_write(mp, 0);
3493 return (error);
3494 }
3495
3496 /*
3497 * Remove a directory file.
3498 */
3499 /* ARGSUSED */
3500 int
3501 sys_rmdir(struct lwp *l, void *v, register_t *retval)
3502 {
3503 struct sys_rmdir_args /* {
3504 syscallarg(const char *) path;
3505 } */ *uap = v;
3506 struct mount *mp;
3507 struct vnode *vp;
3508 int error;
3509 struct nameidata nd;
3510
3511 restart:
3512 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3513 SCARG(uap, path), l);
3514 if ((error = namei(&nd)) != 0)
3515 return (error);
3516 vp = nd.ni_vp;
3517 if (vp->v_type != VDIR) {
3518 error = ENOTDIR;
3519 goto out;
3520 }
3521 /*
3522 * No rmdir "." please.
3523 */
3524 if (nd.ni_dvp == vp) {
3525 error = EINVAL;
3526 goto out;
3527 }
3528 /*
3529 * The root of a mounted filesystem cannot be deleted.
3530 */
3531 if (vp->v_flag & VROOT) {
3532 error = EBUSY;
3533 goto out;
3534 }
3535 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3536 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3537 if (nd.ni_dvp == vp)
3538 vrele(nd.ni_dvp);
3539 else
3540 vput(nd.ni_dvp);
3541 vput(vp);
3542 if ((error = vn_start_write(NULL, &mp,
3543 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3544 return (error);
3545 goto restart;
3546 }
3547 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3548 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3549 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3550 vn_finished_write(mp, 0);
3551 return (error);
3552
3553 out:
3554 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3555 if (nd.ni_dvp == vp)
3556 vrele(nd.ni_dvp);
3557 else
3558 vput(nd.ni_dvp);
3559 vput(vp);
3560 return (error);
3561 }
3562
3563 /*
3564 * Read a block of directory entries in a file system independent format.
3565 */
3566 int
3567 sys___getdents30(struct lwp *l, void *v, register_t *retval)
3568 {
3569 struct sys___getdents30_args /* {
3570 syscallarg(int) fd;
3571 syscallarg(char *) buf;
3572 syscallarg(size_t) count;
3573 } */ *uap = v;
3574 struct proc *p = l->l_proc;
3575 struct file *fp;
3576 int error, done;
3577
3578 /* getvnode() will use the descriptor for us */
3579 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3580 return (error);
3581 if ((fp->f_flag & FREAD) == 0) {
3582 error = EBADF;
3583 goto out;
3584 }
3585 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3586 SCARG(uap, count), &done, l, 0, 0);
3587 #ifdef KTRACE
3588 if (!error && KTRPOINT(p, KTR_GENIO)) {
3589 struct iovec iov;
3590 iov.iov_base = SCARG(uap, buf);
3591 iov.iov_len = done;
3592 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, done, 0);
3593 }
3594 #endif
3595 *retval = done;
3596 out:
3597 FILE_UNUSE(fp, l);
3598 return (error);
3599 }
3600
3601 /*
3602 * Set the mode mask for creation of filesystem nodes.
3603 */
3604 int
3605 sys_umask(struct lwp *l, void *v, register_t *retval)
3606 {
3607 struct sys_umask_args /* {
3608 syscallarg(mode_t) newmask;
3609 } */ *uap = v;
3610 struct proc *p = l->l_proc;
3611 struct cwdinfo *cwdi;
3612
3613 cwdi = p->p_cwdi;
3614 *retval = cwdi->cwdi_cmask;
3615 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3616 return (0);
3617 }
3618
3619 /*
3620 * Void all references to file by ripping underlying filesystem
3621 * away from vnode.
3622 */
3623 /* ARGSUSED */
3624 int
3625 sys_revoke(struct lwp *l, void *v, register_t *retval)
3626 {
3627 struct sys_revoke_args /* {
3628 syscallarg(const char *) path;
3629 } */ *uap = v;
3630 struct mount *mp;
3631 struct vnode *vp;
3632 struct vattr vattr;
3633 int error;
3634 struct nameidata nd;
3635
3636 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3637 if ((error = namei(&nd)) != 0)
3638 return (error);
3639 vp = nd.ni_vp;
3640 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
3641 goto out;
3642 if (kauth_cred_geteuid(l->l_cred) != vattr.va_uid &&
3643 (error = kauth_authorize_generic(l->l_cred,
3644 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0)
3645 goto out;
3646 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
3647 goto out;
3648 if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
3649 VOP_REVOKE(vp, REVOKEALL);
3650 vn_finished_write(mp, 0);
3651 out:
3652 vrele(vp);
3653 return (error);
3654 }
3655
3656 /*
3657 * Convert a user file descriptor to a kernel file entry.
3658 */
3659 int
3660 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
3661 {
3662 struct vnode *vp;
3663 struct file *fp;
3664
3665 if ((fp = fd_getfile(fdp, fd)) == NULL)
3666 return (EBADF);
3667
3668 FILE_USE(fp);
3669
3670 if (fp->f_type != DTYPE_VNODE) {
3671 FILE_UNUSE(fp, NULL);
3672 return (EINVAL);
3673 }
3674
3675 vp = (struct vnode *)fp->f_data;
3676 if (vp->v_type == VBAD) {
3677 FILE_UNUSE(fp, NULL);
3678 return (EBADF);
3679 }
3680
3681 *fpp = fp;
3682 return (0);
3683 }
Cache object: 11e6e13df8ce68605c982505a2f824be
|