FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_vnops.c
1 /*-
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD: releng/9.1/sys/kern/vfs_vnops.c 236792 2012-06-09 08:04:08Z kib $");
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/fcntl.h>
43 #include <sys/file.h>
44 #include <sys/kdb.h>
45 #include <sys/stat.h>
46 #include <sys/priv.h>
47 #include <sys/proc.h>
48 #include <sys/limits.h>
49 #include <sys/lock.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/namei.h>
53 #include <sys/vnode.h>
54 #include <sys/bio.h>
55 #include <sys/buf.h>
56 #include <sys/filio.h>
57 #include <sys/resourcevar.h>
58 #include <sys/sx.h>
59 #include <sys/ttycom.h>
60 #include <sys/conf.h>
61 #include <sys/syslog.h>
62 #include <sys/unistd.h>
63
64 #include <security/audit/audit.h>
65 #include <security/mac/mac_framework.h>
66
67 #include <vm/vm.h>
68 #include <vm/vm_object.h>
69
70 static fo_rdwr_t vn_read;
71 static fo_rdwr_t vn_write;
72 static fo_truncate_t vn_truncate;
73 static fo_ioctl_t vn_ioctl;
74 static fo_poll_t vn_poll;
75 static fo_kqfilter_t vn_kqfilter;
76 static fo_stat_t vn_statfile;
77 static fo_close_t vn_closefile;
78
79 struct fileops vnops = {
80 .fo_read = vn_read,
81 .fo_write = vn_write,
82 .fo_truncate = vn_truncate,
83 .fo_ioctl = vn_ioctl,
84 .fo_poll = vn_poll,
85 .fo_kqfilter = vn_kqfilter,
86 .fo_stat = vn_statfile,
87 .fo_close = vn_closefile,
88 .fo_chmod = vn_chmod,
89 .fo_chown = vn_chown,
90 .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
91 };
92
93 int
94 vn_open(ndp, flagp, cmode, fp)
95 struct nameidata *ndp;
96 int *flagp, cmode;
97 struct file *fp;
98 {
99 struct thread *td = ndp->ni_cnd.cn_thread;
100
101 return (vn_open_cred(ndp, flagp, cmode, 0, td->td_ucred, fp));
102 }
103
104 /*
105 * Common code for vnode open operations.
106 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
107 *
108 * Note that this does NOT free nameidata for the successful case,
109 * due to the NDINIT being done elsewhere.
110 */
111 int
112 vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags,
113 struct ucred *cred, struct file *fp)
114 {
115 struct vnode *vp;
116 struct mount *mp;
117 struct thread *td = ndp->ni_cnd.cn_thread;
118 struct vattr vat;
119 struct vattr *vap = &vat;
120 int fmode, error;
121 accmode_t accmode;
122 int vfslocked, mpsafe;
123
124 mpsafe = ndp->ni_cnd.cn_flags & MPSAFE;
125 restart:
126 vfslocked = 0;
127 fmode = *flagp;
128 if (fmode & O_CREAT) {
129 ndp->ni_cnd.cn_nameiop = CREATE;
130 ndp->ni_cnd.cn_flags = ISOPEN | LOCKPARENT | LOCKLEAF |
131 MPSAFE;
132 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
133 ndp->ni_cnd.cn_flags |= FOLLOW;
134 if (!(vn_open_flags & VN_OPEN_NOAUDIT))
135 ndp->ni_cnd.cn_flags |= AUDITVNODE1;
136 bwillwrite();
137 if ((error = namei(ndp)) != 0)
138 return (error);
139 vfslocked = NDHASGIANT(ndp);
140 if (!mpsafe)
141 ndp->ni_cnd.cn_flags &= ~MPSAFE;
142 if (ndp->ni_vp == NULL) {
143 VATTR_NULL(vap);
144 vap->va_type = VREG;
145 vap->va_mode = cmode;
146 if (fmode & O_EXCL)
147 vap->va_vaflags |= VA_EXCLUSIVE;
148 if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
149 NDFREE(ndp, NDF_ONLY_PNBUF);
150 vput(ndp->ni_dvp);
151 VFS_UNLOCK_GIANT(vfslocked);
152 if ((error = vn_start_write(NULL, &mp,
153 V_XSLEEP | PCATCH)) != 0)
154 return (error);
155 goto restart;
156 }
157 #ifdef MAC
158 error = mac_vnode_check_create(cred, ndp->ni_dvp,
159 &ndp->ni_cnd, vap);
160 if (error == 0)
161 #endif
162 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
163 &ndp->ni_cnd, vap);
164 vput(ndp->ni_dvp);
165 vn_finished_write(mp);
166 if (error) {
167 VFS_UNLOCK_GIANT(vfslocked);
168 NDFREE(ndp, NDF_ONLY_PNBUF);
169 return (error);
170 }
171 fmode &= ~O_TRUNC;
172 vp = ndp->ni_vp;
173 } else {
174 if (ndp->ni_dvp == ndp->ni_vp)
175 vrele(ndp->ni_dvp);
176 else
177 vput(ndp->ni_dvp);
178 ndp->ni_dvp = NULL;
179 vp = ndp->ni_vp;
180 if (fmode & O_EXCL) {
181 error = EEXIST;
182 goto bad;
183 }
184 fmode &= ~O_CREAT;
185 }
186 } else {
187 ndp->ni_cnd.cn_nameiop = LOOKUP;
188 ndp->ni_cnd.cn_flags = ISOPEN |
189 ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) |
190 LOCKLEAF | MPSAFE;
191 if (!(fmode & FWRITE))
192 ndp->ni_cnd.cn_flags |= LOCKSHARED;
193 if (!(vn_open_flags & VN_OPEN_NOAUDIT))
194 ndp->ni_cnd.cn_flags |= AUDITVNODE1;
195 if ((error = namei(ndp)) != 0)
196 return (error);
197 if (!mpsafe)
198 ndp->ni_cnd.cn_flags &= ~MPSAFE;
199 vfslocked = NDHASGIANT(ndp);
200 vp = ndp->ni_vp;
201 }
202 if (vp->v_type == VLNK) {
203 error = EMLINK;
204 goto bad;
205 }
206 if (vp->v_type == VSOCK) {
207 error = EOPNOTSUPP;
208 goto bad;
209 }
210 if (vp->v_type != VDIR && fmode & O_DIRECTORY) {
211 error = ENOTDIR;
212 goto bad;
213 }
214 accmode = 0;
215 if (fmode & (FWRITE | O_TRUNC)) {
216 if (vp->v_type == VDIR) {
217 error = EISDIR;
218 goto bad;
219 }
220 accmode |= VWRITE;
221 }
222 if (fmode & FREAD)
223 accmode |= VREAD;
224 if (fmode & FEXEC)
225 accmode |= VEXEC;
226 if ((fmode & O_APPEND) && (fmode & FWRITE))
227 accmode |= VAPPEND;
228 #ifdef MAC
229 error = mac_vnode_check_open(cred, vp, accmode);
230 if (error)
231 goto bad;
232 #endif
233 if ((fmode & O_CREAT) == 0) {
234 if (accmode & VWRITE) {
235 error = vn_writechk(vp);
236 if (error)
237 goto bad;
238 }
239 if (accmode) {
240 error = VOP_ACCESS(vp, accmode, cred, td);
241 if (error)
242 goto bad;
243 }
244 }
245 if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0)
246 goto bad;
247
248 if (fmode & FWRITE)
249 vp->v_writecount++;
250 *flagp = fmode;
251 ASSERT_VOP_LOCKED(vp, "vn_open_cred");
252 if (!mpsafe)
253 VFS_UNLOCK_GIANT(vfslocked);
254 return (0);
255 bad:
256 NDFREE(ndp, NDF_ONLY_PNBUF);
257 vput(vp);
258 VFS_UNLOCK_GIANT(vfslocked);
259 *flagp = fmode;
260 ndp->ni_vp = NULL;
261 return (error);
262 }
263
264 /*
265 * Check for write permissions on the specified vnode.
266 * Prototype text segments cannot be written.
267 */
268 int
269 vn_writechk(vp)
270 register struct vnode *vp;
271 {
272
273 ASSERT_VOP_LOCKED(vp, "vn_writechk");
274 /*
275 * If there's shared text associated with
276 * the vnode, try to free it up once. If
277 * we fail, we can't allow writing.
278 */
279 if (vp->v_vflag & VV_TEXT)
280 return (ETXTBSY);
281
282 return (0);
283 }
284
285 /*
286 * Vnode close call
287 */
288 int
289 vn_close(vp, flags, file_cred, td)
290 register struct vnode *vp;
291 int flags;
292 struct ucred *file_cred;
293 struct thread *td;
294 {
295 struct mount *mp;
296 int error, lock_flags;
297
298 if (!(flags & FWRITE) && vp->v_mount != NULL &&
299 vp->v_mount->mnt_kern_flag & MNTK_EXTENDED_SHARED)
300 lock_flags = LK_SHARED;
301 else
302 lock_flags = LK_EXCLUSIVE;
303
304 VFS_ASSERT_GIANT(vp->v_mount);
305
306 vn_start_write(vp, &mp, V_WAIT);
307 vn_lock(vp, lock_flags | LK_RETRY);
308 if (flags & FWRITE) {
309 VNASSERT(vp->v_writecount > 0, vp,
310 ("vn_close: negative writecount"));
311 vp->v_writecount--;
312 }
313 error = VOP_CLOSE(vp, flags, file_cred, td);
314 vput(vp);
315 vn_finished_write(mp);
316 return (error);
317 }
318
319 /*
320 * Heuristic to detect sequential operation.
321 */
322 static int
323 sequential_heuristic(struct uio *uio, struct file *fp)
324 {
325
326 if (atomic_load_acq_int(&(fp->f_flag)) & FRDAHEAD)
327 return (fp->f_seqcount << IO_SEQSHIFT);
328
329 /*
330 * Offset 0 is handled specially. open() sets f_seqcount to 1 so
331 * that the first I/O is normally considered to be slightly
332 * sequential. Seeking to offset 0 doesn't change sequentiality
333 * unless previous seeks have reduced f_seqcount to 0, in which
334 * case offset 0 is not special.
335 */
336 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
337 uio->uio_offset == fp->f_nextoff) {
338 /*
339 * f_seqcount is in units of fixed-size blocks so that it
340 * depends mainly on the amount of sequential I/O and not
341 * much on the number of sequential I/O's. The fixed size
342 * of 16384 is hard-coded here since it is (not quite) just
343 * a magic size that works well here. This size is more
344 * closely related to the best I/O size for real disks than
345 * to any block size used by software.
346 */
347 fp->f_seqcount += howmany(uio->uio_resid, 16384);
348 if (fp->f_seqcount > IO_SEQMAX)
349 fp->f_seqcount = IO_SEQMAX;
350 return (fp->f_seqcount << IO_SEQSHIFT);
351 }
352
353 /* Not sequential. Quickly draw-down sequentiality. */
354 if (fp->f_seqcount > 1)
355 fp->f_seqcount = 1;
356 else
357 fp->f_seqcount = 0;
358 return (0);
359 }
360
361 /*
362 * Package up an I/O request on a vnode into a uio and do it.
363 */
364 int
365 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
366 aresid, td)
367 enum uio_rw rw;
368 struct vnode *vp;
369 void *base;
370 int len;
371 off_t offset;
372 enum uio_seg segflg;
373 int ioflg;
374 struct ucred *active_cred;
375 struct ucred *file_cred;
376 ssize_t *aresid;
377 struct thread *td;
378 {
379 struct uio auio;
380 struct iovec aiov;
381 struct mount *mp;
382 struct ucred *cred;
383 int error, lock_flags;
384
385 VFS_ASSERT_GIANT(vp->v_mount);
386
387 if ((ioflg & IO_NODELOCKED) == 0) {
388 mp = NULL;
389 if (rw == UIO_WRITE) {
390 if (vp->v_type != VCHR &&
391 (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
392 != 0)
393 return (error);
394 if (MNT_SHARED_WRITES(mp) ||
395 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
396 lock_flags = LK_SHARED;
397 } else {
398 lock_flags = LK_EXCLUSIVE;
399 }
400 vn_lock(vp, lock_flags | LK_RETRY);
401 } else
402 vn_lock(vp, LK_SHARED | LK_RETRY);
403
404 }
405 ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
406 auio.uio_iov = &aiov;
407 auio.uio_iovcnt = 1;
408 aiov.iov_base = base;
409 aiov.iov_len = len;
410 auio.uio_resid = len;
411 auio.uio_offset = offset;
412 auio.uio_segflg = segflg;
413 auio.uio_rw = rw;
414 auio.uio_td = td;
415 error = 0;
416 #ifdef MAC
417 if ((ioflg & IO_NOMACCHECK) == 0) {
418 if (rw == UIO_READ)
419 error = mac_vnode_check_read(active_cred, file_cred,
420 vp);
421 else
422 error = mac_vnode_check_write(active_cred, file_cred,
423 vp);
424 }
425 #endif
426 if (error == 0) {
427 if (file_cred)
428 cred = file_cred;
429 else
430 cred = active_cred;
431 if (rw == UIO_READ)
432 error = VOP_READ(vp, &auio, ioflg, cred);
433 else
434 error = VOP_WRITE(vp, &auio, ioflg, cred);
435 }
436 if (aresid)
437 *aresid = auio.uio_resid;
438 else
439 if (auio.uio_resid && error == 0)
440 error = EIO;
441 if ((ioflg & IO_NODELOCKED) == 0) {
442 if (rw == UIO_WRITE && vp->v_type != VCHR)
443 vn_finished_write(mp);
444 VOP_UNLOCK(vp, 0);
445 }
446 return (error);
447 }
448
449 /*
450 * Package up an I/O request on a vnode into a uio and do it. The I/O
451 * request is split up into smaller chunks and we try to avoid saturating
452 * the buffer cache while potentially holding a vnode locked, so we
453 * check bwillwrite() before calling vn_rdwr(). We also call kern_yield()
454 * to give other processes a chance to lock the vnode (either other processes
455 * core'ing the same binary, or unrelated processes scanning the directory).
456 */
457 int
458 vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred,
459 file_cred, aresid, td)
460 enum uio_rw rw;
461 struct vnode *vp;
462 void *base;
463 size_t len;
464 off_t offset;
465 enum uio_seg segflg;
466 int ioflg;
467 struct ucred *active_cred;
468 struct ucred *file_cred;
469 size_t *aresid;
470 struct thread *td;
471 {
472 int error = 0;
473 ssize_t iaresid;
474
475 VFS_ASSERT_GIANT(vp->v_mount);
476
477 do {
478 int chunk;
479
480 /*
481 * Force `offset' to a multiple of MAXBSIZE except possibly
482 * for the first chunk, so that filesystems only need to
483 * write full blocks except possibly for the first and last
484 * chunks.
485 */
486 chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE;
487
488 if (chunk > len)
489 chunk = len;
490 if (rw != UIO_READ && vp->v_type == VREG)
491 bwillwrite();
492 iaresid = 0;
493 error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
494 ioflg, active_cred, file_cred, &iaresid, td);
495 len -= chunk; /* aresid calc already includes length */
496 if (error)
497 break;
498 offset += chunk;
499 base = (char *)base + chunk;
500 kern_yield(PRI_USER);
501 } while (len);
502 if (aresid)
503 *aresid = len + iaresid;
504 return (error);
505 }
506
507 /*
508 * File table vnode read routine.
509 */
510 static int
511 vn_read(fp, uio, active_cred, flags, td)
512 struct file *fp;
513 struct uio *uio;
514 struct ucred *active_cred;
515 int flags;
516 struct thread *td;
517 {
518 struct vnode *vp;
519 int error, ioflag;
520 struct mtx *mtxp;
521 int advice, vfslocked;
522
523 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
524 uio->uio_td, td));
525 mtxp = NULL;
526 vp = fp->f_vnode;
527 ioflag = 0;
528 if (fp->f_flag & FNONBLOCK)
529 ioflag |= IO_NDELAY;
530 if (fp->f_flag & O_DIRECT)
531 ioflag |= IO_DIRECT;
532 advice = POSIX_FADV_NORMAL;
533 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
534 /*
535 * According to McKusick the vn lock was protecting f_offset here.
536 * It is now protected by the FOFFSET_LOCKED flag.
537 */
538 if ((flags & FOF_OFFSET) == 0 || fp->f_advice != NULL) {
539 mtxp = mtx_pool_find(mtxpool_sleep, fp);
540 mtx_lock(mtxp);
541 if ((flags & FOF_OFFSET) == 0) {
542 while (fp->f_vnread_flags & FOFFSET_LOCKED) {
543 fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
544 msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
545 "vnread offlock", 0);
546 }
547 fp->f_vnread_flags |= FOFFSET_LOCKED;
548 uio->uio_offset = fp->f_offset;
549 }
550 if (fp->f_advice != NULL &&
551 uio->uio_offset >= fp->f_advice->fa_start &&
552 uio->uio_offset + uio->uio_resid <= fp->f_advice->fa_end)
553 advice = fp->f_advice->fa_advice;
554 mtx_unlock(mtxp);
555 }
556 vn_lock(vp, LK_SHARED | LK_RETRY);
557
558 switch (advice) {
559 case POSIX_FADV_NORMAL:
560 case POSIX_FADV_SEQUENTIAL:
561 ioflag |= sequential_heuristic(uio, fp);
562 break;
563 case POSIX_FADV_RANDOM:
564 /* Disable read-ahead for random I/O. */
565 break;
566 case POSIX_FADV_NOREUSE:
567 /*
568 * Request the underlying FS to discard the buffers
569 * and pages after the I/O is complete.
570 */
571 ioflag |= IO_DIRECT;
572 break;
573 }
574
575 #ifdef MAC
576 error = mac_vnode_check_read(active_cred, fp->f_cred, vp);
577 if (error == 0)
578 #endif
579 error = VOP_READ(vp, uio, ioflag, fp->f_cred);
580 if ((flags & FOF_OFFSET) == 0) {
581 fp->f_offset = uio->uio_offset;
582 mtx_lock(mtxp);
583 if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
584 wakeup(&fp->f_vnread_flags);
585 fp->f_vnread_flags = 0;
586 mtx_unlock(mtxp);
587 }
588 fp->f_nextoff = uio->uio_offset;
589 VOP_UNLOCK(vp, 0);
590 VFS_UNLOCK_GIANT(vfslocked);
591 return (error);
592 }
593
594 /*
595 * File table vnode write routine.
596 */
597 static int
598 vn_write(fp, uio, active_cred, flags, td)
599 struct file *fp;
600 struct uio *uio;
601 struct ucred *active_cred;
602 int flags;
603 struct thread *td;
604 {
605 struct vnode *vp;
606 struct mount *mp;
607 int error, ioflag, lock_flags;
608 struct mtx *mtxp;
609 int advice, vfslocked;
610
611 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
612 uio->uio_td, td));
613 vp = fp->f_vnode;
614 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
615 if (vp->v_type == VREG)
616 bwillwrite();
617 ioflag = IO_UNIT;
618 if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
619 ioflag |= IO_APPEND;
620 if (fp->f_flag & FNONBLOCK)
621 ioflag |= IO_NDELAY;
622 if (fp->f_flag & O_DIRECT)
623 ioflag |= IO_DIRECT;
624 if ((fp->f_flag & O_FSYNC) ||
625 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
626 ioflag |= IO_SYNC;
627 mp = NULL;
628 if (vp->v_type != VCHR &&
629 (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
630 goto unlock;
631
632 if ((MNT_SHARED_WRITES(mp) ||
633 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) &&
634 (flags & FOF_OFFSET) != 0) {
635 lock_flags = LK_SHARED;
636 } else {
637 lock_flags = LK_EXCLUSIVE;
638 }
639
640 vn_lock(vp, lock_flags | LK_RETRY);
641 if ((flags & FOF_OFFSET) == 0)
642 uio->uio_offset = fp->f_offset;
643 advice = POSIX_FADV_NORMAL;
644 if (fp->f_advice != NULL) {
645 mtxp = mtx_pool_find(mtxpool_sleep, fp);
646 mtx_lock(mtxp);
647 if (fp->f_advice != NULL &&
648 uio->uio_offset >= fp->f_advice->fa_start &&
649 uio->uio_offset + uio->uio_resid <= fp->f_advice->fa_end)
650 advice = fp->f_advice->fa_advice;
651 mtx_unlock(mtxp);
652 }
653 switch (advice) {
654 case POSIX_FADV_NORMAL:
655 case POSIX_FADV_SEQUENTIAL:
656 ioflag |= sequential_heuristic(uio, fp);
657 break;
658 case POSIX_FADV_RANDOM:
659 /* XXX: Is this correct? */
660 break;
661 case POSIX_FADV_NOREUSE:
662 /*
663 * Request the underlying FS to discard the buffers
664 * and pages after the I/O is complete.
665 */
666 ioflag |= IO_DIRECT;
667 break;
668 }
669
670 #ifdef MAC
671 error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
672 if (error == 0)
673 #endif
674 error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
675 if ((flags & FOF_OFFSET) == 0)
676 fp->f_offset = uio->uio_offset;
677 fp->f_nextoff = uio->uio_offset;
678 VOP_UNLOCK(vp, 0);
679 if (vp->v_type != VCHR)
680 vn_finished_write(mp);
681 unlock:
682 VFS_UNLOCK_GIANT(vfslocked);
683 return (error);
684 }
685
686 /*
687 * File table truncate routine.
688 */
689 static int
690 vn_truncate(fp, length, active_cred, td)
691 struct file *fp;
692 off_t length;
693 struct ucred *active_cred;
694 struct thread *td;
695 {
696 struct vattr vattr;
697 struct mount *mp;
698 struct vnode *vp;
699 int vfslocked;
700 int error;
701
702 vp = fp->f_vnode;
703 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
704 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
705 if (error) {
706 VFS_UNLOCK_GIANT(vfslocked);
707 return (error);
708 }
709 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
710 if (vp->v_type == VDIR) {
711 error = EISDIR;
712 goto out;
713 }
714 #ifdef MAC
715 error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
716 if (error)
717 goto out;
718 #endif
719 error = vn_writechk(vp);
720 if (error == 0) {
721 VATTR_NULL(&vattr);
722 vattr.va_size = length;
723 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
724 }
725 out:
726 VOP_UNLOCK(vp, 0);
727 vn_finished_write(mp);
728 VFS_UNLOCK_GIANT(vfslocked);
729 return (error);
730 }
731
732 /*
733 * File table vnode stat routine.
734 */
735 static int
736 vn_statfile(fp, sb, active_cred, td)
737 struct file *fp;
738 struct stat *sb;
739 struct ucred *active_cred;
740 struct thread *td;
741 {
742 struct vnode *vp = fp->f_vnode;
743 int vfslocked;
744 int error;
745
746 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
747 vn_lock(vp, LK_SHARED | LK_RETRY);
748 error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
749 VOP_UNLOCK(vp, 0);
750 VFS_UNLOCK_GIANT(vfslocked);
751
752 return (error);
753 }
754
755 /*
756 * Stat a vnode; implementation for the stat syscall
757 */
758 int
759 vn_stat(vp, sb, active_cred, file_cred, td)
760 struct vnode *vp;
761 register struct stat *sb;
762 struct ucred *active_cred;
763 struct ucred *file_cred;
764 struct thread *td;
765 {
766 struct vattr vattr;
767 register struct vattr *vap;
768 int error;
769 u_short mode;
770
771 #ifdef MAC
772 error = mac_vnode_check_stat(active_cred, file_cred, vp);
773 if (error)
774 return (error);
775 #endif
776
777 vap = &vattr;
778
779 /*
780 * Initialize defaults for new and unusual fields, so that file
781 * systems which don't support these fields don't need to know
782 * about them.
783 */
784 vap->va_birthtime.tv_sec = -1;
785 vap->va_birthtime.tv_nsec = 0;
786 vap->va_fsid = VNOVAL;
787 vap->va_rdev = NODEV;
788
789 error = VOP_GETATTR(vp, vap, active_cred);
790 if (error)
791 return (error);
792
793 /*
794 * Zero the spare stat fields
795 */
796 bzero(sb, sizeof *sb);
797
798 /*
799 * Copy from vattr table
800 */
801 if (vap->va_fsid != VNOVAL)
802 sb->st_dev = vap->va_fsid;
803 else
804 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
805 sb->st_ino = vap->va_fileid;
806 mode = vap->va_mode;
807 switch (vap->va_type) {
808 case VREG:
809 mode |= S_IFREG;
810 break;
811 case VDIR:
812 mode |= S_IFDIR;
813 break;
814 case VBLK:
815 mode |= S_IFBLK;
816 break;
817 case VCHR:
818 mode |= S_IFCHR;
819 break;
820 case VLNK:
821 mode |= S_IFLNK;
822 break;
823 case VSOCK:
824 mode |= S_IFSOCK;
825 break;
826 case VFIFO:
827 mode |= S_IFIFO;
828 break;
829 default:
830 return (EBADF);
831 };
832 sb->st_mode = mode;
833 sb->st_nlink = vap->va_nlink;
834 sb->st_uid = vap->va_uid;
835 sb->st_gid = vap->va_gid;
836 sb->st_rdev = vap->va_rdev;
837 if (vap->va_size > OFF_MAX)
838 return (EOVERFLOW);
839 sb->st_size = vap->va_size;
840 sb->st_atim = vap->va_atime;
841 sb->st_mtim = vap->va_mtime;
842 sb->st_ctim = vap->va_ctime;
843 sb->st_birthtim = vap->va_birthtime;
844
845 /*
846 * According to www.opengroup.org, the meaning of st_blksize is
847 * "a filesystem-specific preferred I/O block size for this
848 * object. In some filesystem types, this may vary from file
849 * to file"
850 * Use miminum/default of PAGE_SIZE (e.g. for VCHR).
851 */
852
853 sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
854
855 sb->st_flags = vap->va_flags;
856 if (priv_check(td, PRIV_VFS_GENERATION))
857 sb->st_gen = 0;
858 else
859 sb->st_gen = vap->va_gen;
860
861 sb->st_blocks = vap->va_bytes / S_BLKSIZE;
862 return (0);
863 }
864
865 /*
866 * File table vnode ioctl routine.
867 */
868 static int
869 vn_ioctl(fp, com, data, active_cred, td)
870 struct file *fp;
871 u_long com;
872 void *data;
873 struct ucred *active_cred;
874 struct thread *td;
875 {
876 struct vnode *vp = fp->f_vnode;
877 struct vattr vattr;
878 int vfslocked;
879 int error;
880
881 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
882 error = ENOTTY;
883 switch (vp->v_type) {
884 case VREG:
885 case VDIR:
886 if (com == FIONREAD) {
887 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
888 error = VOP_GETATTR(vp, &vattr, active_cred);
889 VOP_UNLOCK(vp, 0);
890 if (!error)
891 *(int *)data = vattr.va_size - fp->f_offset;
892 }
893 if (com == FIONBIO || com == FIOASYNC) /* XXX */
894 error = 0;
895 else
896 error = VOP_IOCTL(vp, com, data, fp->f_flag,
897 active_cred, td);
898 break;
899
900 default:
901 break;
902 }
903 VFS_UNLOCK_GIANT(vfslocked);
904 return (error);
905 }
906
907 /*
908 * File table vnode poll routine.
909 */
910 static int
911 vn_poll(fp, events, active_cred, td)
912 struct file *fp;
913 int events;
914 struct ucred *active_cred;
915 struct thread *td;
916 {
917 struct vnode *vp;
918 int vfslocked;
919 int error;
920
921 vp = fp->f_vnode;
922 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
923 #ifdef MAC
924 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
925 error = mac_vnode_check_poll(active_cred, fp->f_cred, vp);
926 VOP_UNLOCK(vp, 0);
927 if (!error)
928 #endif
929
930 error = VOP_POLL(vp, events, fp->f_cred, td);
931 VFS_UNLOCK_GIANT(vfslocked);
932 return (error);
933 }
934
935 /*
936 * Acquire the requested lock and then check for validity. LK_RETRY
937 * permits vn_lock to return doomed vnodes.
938 */
939 int
940 _vn_lock(struct vnode *vp, int flags, char *file, int line)
941 {
942 int error;
943
944 VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
945 ("vn_lock called with no locktype."));
946 do {
947 #ifdef DEBUG_VFS_LOCKS
948 KASSERT(vp->v_holdcnt != 0,
949 ("vn_lock %p: zero hold count", vp));
950 #endif
951 error = VOP_LOCK1(vp, flags, file, line);
952 flags &= ~LK_INTERLOCK; /* Interlock is always dropped. */
953 KASSERT((flags & LK_RETRY) == 0 || error == 0,
954 ("LK_RETRY set with incompatible flags (0x%x) or an error occured (%d)",
955 flags, error));
956 /*
957 * Callers specify LK_RETRY if they wish to get dead vnodes.
958 * If RETRY is not set, we return ENOENT instead.
959 */
960 if (error == 0 && vp->v_iflag & VI_DOOMED &&
961 (flags & LK_RETRY) == 0) {
962 VOP_UNLOCK(vp, 0);
963 error = ENOENT;
964 break;
965 }
966 } while (flags & LK_RETRY && error != 0);
967 return (error);
968 }
969
970 /*
971 * File table vnode close routine.
972 */
973 static int
974 vn_closefile(fp, td)
975 struct file *fp;
976 struct thread *td;
977 {
978 struct vnode *vp;
979 struct flock lf;
980 int vfslocked;
981 int error;
982
983 vp = fp->f_vnode;
984
985 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
986 if (fp->f_type == DTYPE_VNODE && fp->f_flag & FHASLOCK) {
987 lf.l_whence = SEEK_SET;
988 lf.l_start = 0;
989 lf.l_len = 0;
990 lf.l_type = F_UNLCK;
991 (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
992 }
993
994 fp->f_ops = &badfileops;
995
996 error = vn_close(vp, fp->f_flag, fp->f_cred, td);
997 VFS_UNLOCK_GIANT(vfslocked);
998 return (error);
999 }
1000
1001 /*
1002 * Preparing to start a filesystem write operation. If the operation is
1003 * permitted, then we bump the count of operations in progress and
1004 * proceed. If a suspend request is in progress, we wait until the
1005 * suspension is over, and then proceed.
1006 */
1007 int
1008 vn_start_write(vp, mpp, flags)
1009 struct vnode *vp;
1010 struct mount **mpp;
1011 int flags;
1012 {
1013 struct mount *mp;
1014 int error;
1015
1016 error = 0;
1017 /*
1018 * If a vnode is provided, get and return the mount point that
1019 * to which it will write.
1020 */
1021 if (vp != NULL) {
1022 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
1023 *mpp = NULL;
1024 if (error != EOPNOTSUPP)
1025 return (error);
1026 return (0);
1027 }
1028 }
1029 if ((mp = *mpp) == NULL)
1030 return (0);
1031
1032 /*
1033 * VOP_GETWRITEMOUNT() returns with the mp refcount held through
1034 * a vfs_ref().
1035 * As long as a vnode is not provided we need to acquire a
1036 * refcount for the provided mountpoint too, in order to
1037 * emulate a vfs_ref().
1038 */
1039 MNT_ILOCK(mp);
1040 if (vp == NULL)
1041 MNT_REF(mp);
1042
1043 /*
1044 * Check on status of suspension.
1045 */
1046 if ((curthread->td_pflags & TDP_IGNSUSP) == 0 ||
1047 mp->mnt_susp_owner != curthread) {
1048 while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
1049 if (flags & V_NOWAIT) {
1050 error = EWOULDBLOCK;
1051 goto unlock;
1052 }
1053 error = msleep(&mp->mnt_flag, MNT_MTX(mp),
1054 (PUSER - 1) | (flags & PCATCH), "suspfs", 0);
1055 if (error)
1056 goto unlock;
1057 }
1058 }
1059 if (flags & V_XSLEEP)
1060 goto unlock;
1061 mp->mnt_writeopcount++;
1062 unlock:
1063 if (error != 0 || (flags & V_XSLEEP) != 0)
1064 MNT_REL(mp);
1065 MNT_IUNLOCK(mp);
1066 return (error);
1067 }
1068
1069 /*
1070 * Secondary suspension. Used by operations such as vop_inactive
1071 * routines that are needed by the higher level functions. These
1072 * are allowed to proceed until all the higher level functions have
1073 * completed (indicated by mnt_writeopcount dropping to zero). At that
1074 * time, these operations are halted until the suspension is over.
1075 */
1076 int
1077 vn_start_secondary_write(vp, mpp, flags)
1078 struct vnode *vp;
1079 struct mount **mpp;
1080 int flags;
1081 {
1082 struct mount *mp;
1083 int error;
1084
1085 retry:
1086 if (vp != NULL) {
1087 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
1088 *mpp = NULL;
1089 if (error != EOPNOTSUPP)
1090 return (error);
1091 return (0);
1092 }
1093 }
1094 /*
1095 * If we are not suspended or have not yet reached suspended
1096 * mode, then let the operation proceed.
1097 */
1098 if ((mp = *mpp) == NULL)
1099 return (0);
1100
1101 /*
1102 * VOP_GETWRITEMOUNT() returns with the mp refcount held through
1103 * a vfs_ref().
1104 * As long as a vnode is not provided we need to acquire a
1105 * refcount for the provided mountpoint too, in order to
1106 * emulate a vfs_ref().
1107 */
1108 MNT_ILOCK(mp);
1109 if (vp == NULL)
1110 MNT_REF(mp);
1111 if ((mp->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND2)) == 0) {
1112 mp->mnt_secondary_writes++;
1113 mp->mnt_secondary_accwrites++;
1114 MNT_IUNLOCK(mp);
1115 return (0);
1116 }
1117 if (flags & V_NOWAIT) {
1118 MNT_REL(mp);
1119 MNT_IUNLOCK(mp);
1120 return (EWOULDBLOCK);
1121 }
1122 /*
1123 * Wait for the suspension to finish.
1124 */
1125 error = msleep(&mp->mnt_flag, MNT_MTX(mp),
1126 (PUSER - 1) | (flags & PCATCH) | PDROP, "suspfs", 0);
1127 vfs_rel(mp);
1128 if (error == 0)
1129 goto retry;
1130 return (error);
1131 }
1132
1133 /*
1134 * Filesystem write operation has completed. If we are suspending and this
1135 * operation is the last one, notify the suspender that the suspension is
1136 * now in effect.
1137 */
1138 void
1139 vn_finished_write(mp)
1140 struct mount *mp;
1141 {
1142 if (mp == NULL)
1143 return;
1144 MNT_ILOCK(mp);
1145 MNT_REL(mp);
1146 mp->mnt_writeopcount--;
1147 if (mp->mnt_writeopcount < 0)
1148 panic("vn_finished_write: neg cnt");
1149 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
1150 mp->mnt_writeopcount <= 0)
1151 wakeup(&mp->mnt_writeopcount);
1152 MNT_IUNLOCK(mp);
1153 }
1154
1155
1156 /*
1157 * Filesystem secondary write operation has completed. If we are
1158 * suspending and this operation is the last one, notify the suspender
1159 * that the suspension is now in effect.
1160 */
1161 void
1162 vn_finished_secondary_write(mp)
1163 struct mount *mp;
1164 {
1165 if (mp == NULL)
1166 return;
1167 MNT_ILOCK(mp);
1168 MNT_REL(mp);
1169 mp->mnt_secondary_writes--;
1170 if (mp->mnt_secondary_writes < 0)
1171 panic("vn_finished_secondary_write: neg cnt");
1172 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
1173 mp->mnt_secondary_writes <= 0)
1174 wakeup(&mp->mnt_secondary_writes);
1175 MNT_IUNLOCK(mp);
1176 }
1177
1178
1179
1180 /*
1181 * Request a filesystem to suspend write operations.
1182 */
1183 int
1184 vfs_write_suspend(mp)
1185 struct mount *mp;
1186 {
1187 int error;
1188
1189 MNT_ILOCK(mp);
1190 if (mp->mnt_susp_owner == curthread) {
1191 MNT_IUNLOCK(mp);
1192 return (EALREADY);
1193 }
1194 while (mp->mnt_kern_flag & MNTK_SUSPEND)
1195 msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1, "wsuspfs", 0);
1196 mp->mnt_kern_flag |= MNTK_SUSPEND;
1197 mp->mnt_susp_owner = curthread;
1198 if (mp->mnt_writeopcount > 0)
1199 (void) msleep(&mp->mnt_writeopcount,
1200 MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt", 0);
1201 else
1202 MNT_IUNLOCK(mp);
1203 if ((error = VFS_SYNC(mp, MNT_SUSPEND)) != 0)
1204 vfs_write_resume(mp);
1205 return (error);
1206 }
1207
1208 /*
1209 * Request a filesystem to resume write operations.
1210 */
1211 void
1212 vfs_write_resume(mp)
1213 struct mount *mp;
1214 {
1215
1216 MNT_ILOCK(mp);
1217 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
1218 KASSERT(mp->mnt_susp_owner == curthread, ("mnt_susp_owner"));
1219 mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPEND2 |
1220 MNTK_SUSPENDED);
1221 mp->mnt_susp_owner = NULL;
1222 wakeup(&mp->mnt_writeopcount);
1223 wakeup(&mp->mnt_flag);
1224 curthread->td_pflags &= ~TDP_IGNSUSP;
1225 MNT_IUNLOCK(mp);
1226 VFS_SUSP_CLEAN(mp);
1227 } else
1228 MNT_IUNLOCK(mp);
1229 }
1230
1231 /*
1232 * Implement kqueues for files by translating it to vnode operation.
1233 */
1234 static int
1235 vn_kqfilter(struct file *fp, struct knote *kn)
1236 {
1237 int vfslocked;
1238 int error;
1239
1240 vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
1241 error = VOP_KQFILTER(fp->f_vnode, kn);
1242 VFS_UNLOCK_GIANT(vfslocked);
1243
1244 return error;
1245 }
1246
1247 /*
1248 * Simplified in-kernel wrapper calls for extended attribute access.
1249 * Both calls pass in a NULL credential, authorizing as "kernel" access.
1250 * Set IO_NODELOCKED in ioflg if the vnode is already locked.
1251 */
1252 int
1253 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
1254 const char *attrname, int *buflen, char *buf, struct thread *td)
1255 {
1256 struct uio auio;
1257 struct iovec iov;
1258 int error;
1259
1260 iov.iov_len = *buflen;
1261 iov.iov_base = buf;
1262
1263 auio.uio_iov = &iov;
1264 auio.uio_iovcnt = 1;
1265 auio.uio_rw = UIO_READ;
1266 auio.uio_segflg = UIO_SYSSPACE;
1267 auio.uio_td = td;
1268 auio.uio_offset = 0;
1269 auio.uio_resid = *buflen;
1270
1271 if ((ioflg & IO_NODELOCKED) == 0)
1272 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1273
1274 ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
1275
1276 /* authorize attribute retrieval as kernel */
1277 error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL,
1278 td);
1279
1280 if ((ioflg & IO_NODELOCKED) == 0)
1281 VOP_UNLOCK(vp, 0);
1282
1283 if (error == 0) {
1284 *buflen = *buflen - auio.uio_resid;
1285 }
1286
1287 return (error);
1288 }
1289
1290 /*
1291 * XXX failure mode if partially written?
1292 */
1293 int
1294 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
1295 const char *attrname, int buflen, char *buf, struct thread *td)
1296 {
1297 struct uio auio;
1298 struct iovec iov;
1299 struct mount *mp;
1300 int error;
1301
1302 iov.iov_len = buflen;
1303 iov.iov_base = buf;
1304
1305 auio.uio_iov = &iov;
1306 auio.uio_iovcnt = 1;
1307 auio.uio_rw = UIO_WRITE;
1308 auio.uio_segflg = UIO_SYSSPACE;
1309 auio.uio_td = td;
1310 auio.uio_offset = 0;
1311 auio.uio_resid = buflen;
1312
1313 if ((ioflg & IO_NODELOCKED) == 0) {
1314 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
1315 return (error);
1316 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1317 }
1318
1319 ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
1320
1321 /* authorize attribute setting as kernel */
1322 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td);
1323
1324 if ((ioflg & IO_NODELOCKED) == 0) {
1325 vn_finished_write(mp);
1326 VOP_UNLOCK(vp, 0);
1327 }
1328
1329 return (error);
1330 }
1331
1332 int
1333 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
1334 const char *attrname, struct thread *td)
1335 {
1336 struct mount *mp;
1337 int error;
1338
1339 if ((ioflg & IO_NODELOCKED) == 0) {
1340 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
1341 return (error);
1342 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1343 }
1344
1345 ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
1346
1347 /* authorize attribute removal as kernel */
1348 error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL, td);
1349 if (error == EOPNOTSUPP)
1350 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
1351 NULL, td);
1352
1353 if ((ioflg & IO_NODELOCKED) == 0) {
1354 vn_finished_write(mp);
1355 VOP_UNLOCK(vp, 0);
1356 }
1357
1358 return (error);
1359 }
1360
1361 int
1362 vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags, struct vnode **rvp)
1363 {
1364 struct mount *mp;
1365 int ltype, error;
1366
1367 mp = vp->v_mount;
1368 ltype = VOP_ISLOCKED(vp);
1369 KASSERT(ltype == LK_EXCLUSIVE || ltype == LK_SHARED,
1370 ("vn_vget_ino: vp not locked"));
1371 error = vfs_busy(mp, MBF_NOWAIT);
1372 if (error != 0) {
1373 vfs_ref(mp);
1374 VOP_UNLOCK(vp, 0);
1375 error = vfs_busy(mp, 0);
1376 vn_lock(vp, ltype | LK_RETRY);
1377 vfs_rel(mp);
1378 if (error != 0)
1379 return (ENOENT);
1380 if (vp->v_iflag & VI_DOOMED) {
1381 vfs_unbusy(mp);
1382 return (ENOENT);
1383 }
1384 }
1385 VOP_UNLOCK(vp, 0);
1386 error = VFS_VGET(mp, ino, lkflags, rvp);
1387 vfs_unbusy(mp);
1388 vn_lock(vp, ltype | LK_RETRY);
1389 if (vp->v_iflag & VI_DOOMED) {
1390 if (error == 0)
1391 vput(*rvp);
1392 error = ENOENT;
1393 }
1394 return (error);
1395 }
1396
1397 int
1398 vn_rlimit_fsize(const struct vnode *vp, const struct uio *uio,
1399 const struct thread *td)
1400 {
1401
1402 if (vp->v_type != VREG || td == NULL)
1403 return (0);
1404 PROC_LOCK(td->td_proc);
1405 if ((uoff_t)uio->uio_offset + uio->uio_resid >
1406 lim_cur(td->td_proc, RLIMIT_FSIZE)) {
1407 kern_psignal(td->td_proc, SIGXFSZ);
1408 PROC_UNLOCK(td->td_proc);
1409 return (EFBIG);
1410 }
1411 PROC_UNLOCK(td->td_proc);
1412 return (0);
1413 }
1414
1415 int
1416 vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
1417 struct thread *td)
1418 {
1419 struct vnode *vp;
1420 int error, vfslocked;
1421
1422 vp = fp->f_vnode;
1423 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1424 #ifdef AUDIT
1425 vn_lock(vp, LK_SHARED | LK_RETRY);
1426 AUDIT_ARG_VNODE1(vp);
1427 VOP_UNLOCK(vp, 0);
1428 #endif
1429 error = setfmode(td, active_cred, vp, mode);
1430 VFS_UNLOCK_GIANT(vfslocked);
1431 return (error);
1432 }
1433
1434 int
1435 vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
1436 struct thread *td)
1437 {
1438 struct vnode *vp;
1439 int error, vfslocked;
1440
1441 vp = fp->f_vnode;
1442 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1443 #ifdef AUDIT
1444 vn_lock(vp, LK_SHARED | LK_RETRY);
1445 AUDIT_ARG_VNODE1(vp);
1446 VOP_UNLOCK(vp, 0);
1447 #endif
1448 error = setfown(td, active_cred, vp, uid, gid);
1449 VFS_UNLOCK_GIANT(vfslocked);
1450 return (error);
1451 }
1452
1453 void
1454 vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end)
1455 {
1456 vm_object_t object;
1457
1458 if ((object = vp->v_object) == NULL)
1459 return;
1460 VM_OBJECT_LOCK(object);
1461 vm_object_page_remove(object, start, end, 0);
1462 VM_OBJECT_UNLOCK(object);
1463 }
1464
1465 int
1466 vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred)
1467 {
1468 struct vattr va;
1469 daddr_t bn, bnp;
1470 uint64_t bsize;
1471 off_t noff;
1472 int error;
1473
1474 KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA,
1475 ("Wrong command %lu", cmd));
1476
1477 if (vn_lock(vp, LK_SHARED) != 0)
1478 return (EBADF);
1479 if (vp->v_type != VREG) {
1480 error = ENOTTY;
1481 goto unlock;
1482 }
1483 error = VOP_GETATTR(vp, &va, cred);
1484 if (error != 0)
1485 goto unlock;
1486 noff = *off;
1487 if (noff >= va.va_size) {
1488 error = ENXIO;
1489 goto unlock;
1490 }
1491 bsize = vp->v_mount->mnt_stat.f_iosize;
1492 for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize) {
1493 error = VOP_BMAP(vp, bn, NULL, &bnp, NULL, NULL);
1494 if (error == EOPNOTSUPP) {
1495 error = ENOTTY;
1496 goto unlock;
1497 }
1498 if ((bnp == -1 && cmd == FIOSEEKHOLE) ||
1499 (bnp != -1 && cmd == FIOSEEKDATA)) {
1500 noff = bn * bsize;
1501 if (noff < *off)
1502 noff = *off;
1503 goto unlock;
1504 }
1505 }
1506 if (noff > va.va_size)
1507 noff = va.va_size;
1508 /* noff == va.va_size. There is an implicit hole at the end of file. */
1509 if (cmd == FIOSEEKDATA)
1510 error = ENXIO;
1511 unlock:
1512 VOP_UNLOCK(vp, 0);
1513 if (error == 0)
1514 *off = noff;
1515 return (error);
1516 }
Cache object: e8f3dcf7d60aa906a687e03f239c7c20
|