FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_vnops.c
1 /*-
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD: releng/8.3/sys/kern/vfs_vnops.c 229725 2012-01-06 19:32:39Z jhb $");
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/fcntl.h>
43 #include <sys/file.h>
44 #include <sys/kdb.h>
45 #include <sys/stat.h>
46 #include <sys/priv.h>
47 #include <sys/proc.h>
48 #include <sys/limits.h>
49 #include <sys/lock.h>
50 #include <sys/mount.h>
51 #include <sys/mutex.h>
52 #include <sys/namei.h>
53 #include <sys/vnode.h>
54 #include <sys/bio.h>
55 #include <sys/buf.h>
56 #include <sys/filio.h>
57 #include <sys/resourcevar.h>
58 #include <sys/sx.h>
59 #include <sys/ttycom.h>
60 #include <sys/conf.h>
61 #include <sys/syslog.h>
62 #include <sys/unistd.h>
63
64 #include <security/mac/mac_framework.h>
65
66 #include <vm/vm.h>
67 #include <vm/vm_object.h>
68
69 static fo_rdwr_t vn_read;
70 static fo_rdwr_t vn_write;
71 static fo_truncate_t vn_truncate;
72 static fo_ioctl_t vn_ioctl;
73 static fo_poll_t vn_poll;
74 static fo_kqfilter_t vn_kqfilter;
75 static fo_stat_t vn_statfile;
76 static fo_close_t vn_closefile;
77
78 struct fileops vnops = {
79 .fo_read = vn_read,
80 .fo_write = vn_write,
81 .fo_truncate = vn_truncate,
82 .fo_ioctl = vn_ioctl,
83 .fo_poll = vn_poll,
84 .fo_kqfilter = vn_kqfilter,
85 .fo_stat = vn_statfile,
86 .fo_close = vn_closefile,
87 .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
88 };
89
90 int
91 vn_open(ndp, flagp, cmode, fp)
92 struct nameidata *ndp;
93 int *flagp, cmode;
94 struct file *fp;
95 {
96 struct thread *td = ndp->ni_cnd.cn_thread;
97
98 return (vn_open_cred(ndp, flagp, cmode, 0, td->td_ucred, fp));
99 }
100
101 /*
102 * Common code for vnode open operations.
103 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
104 *
105 * Note that this does NOT free nameidata for the successful case,
106 * due to the NDINIT being done elsewhere.
107 */
108 int
109 vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags,
110 struct ucred *cred, struct file *fp)
111 {
112 struct vnode *vp;
113 struct mount *mp;
114 struct thread *td = ndp->ni_cnd.cn_thread;
115 struct vattr vat;
116 struct vattr *vap = &vat;
117 int fmode, error;
118 accmode_t accmode;
119 int vfslocked, mpsafe;
120
121 mpsafe = ndp->ni_cnd.cn_flags & MPSAFE;
122 restart:
123 vfslocked = 0;
124 fmode = *flagp;
125 if (fmode & O_CREAT) {
126 ndp->ni_cnd.cn_nameiop = CREATE;
127 ndp->ni_cnd.cn_flags = ISOPEN | LOCKPARENT | LOCKLEAF |
128 MPSAFE;
129 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
130 ndp->ni_cnd.cn_flags |= FOLLOW;
131 if (!(vn_open_flags & VN_OPEN_NOAUDIT))
132 ndp->ni_cnd.cn_flags |= AUDITVNODE1;
133 bwillwrite();
134 if ((error = namei(ndp)) != 0)
135 return (error);
136 vfslocked = NDHASGIANT(ndp);
137 if (!mpsafe)
138 ndp->ni_cnd.cn_flags &= ~MPSAFE;
139 if (ndp->ni_vp == NULL) {
140 VATTR_NULL(vap);
141 vap->va_type = VREG;
142 vap->va_mode = cmode;
143 if (fmode & O_EXCL)
144 vap->va_vaflags |= VA_EXCLUSIVE;
145 if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
146 NDFREE(ndp, NDF_ONLY_PNBUF);
147 vput(ndp->ni_dvp);
148 VFS_UNLOCK_GIANT(vfslocked);
149 if ((error = vn_start_write(NULL, &mp,
150 V_XSLEEP | PCATCH)) != 0)
151 return (error);
152 goto restart;
153 }
154 #ifdef MAC
155 error = mac_vnode_check_create(cred, ndp->ni_dvp,
156 &ndp->ni_cnd, vap);
157 if (error == 0)
158 #endif
159 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
160 &ndp->ni_cnd, vap);
161 vput(ndp->ni_dvp);
162 vn_finished_write(mp);
163 if (error) {
164 VFS_UNLOCK_GIANT(vfslocked);
165 NDFREE(ndp, NDF_ONLY_PNBUF);
166 return (error);
167 }
168 fmode &= ~O_TRUNC;
169 vp = ndp->ni_vp;
170 } else {
171 if (ndp->ni_dvp == ndp->ni_vp)
172 vrele(ndp->ni_dvp);
173 else
174 vput(ndp->ni_dvp);
175 ndp->ni_dvp = NULL;
176 vp = ndp->ni_vp;
177 if (fmode & O_EXCL) {
178 error = EEXIST;
179 goto bad;
180 }
181 fmode &= ~O_CREAT;
182 }
183 } else {
184 ndp->ni_cnd.cn_nameiop = LOOKUP;
185 ndp->ni_cnd.cn_flags = ISOPEN |
186 ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) |
187 LOCKLEAF | MPSAFE;
188 if (!(fmode & FWRITE))
189 ndp->ni_cnd.cn_flags |= LOCKSHARED;
190 if (!(vn_open_flags & VN_OPEN_NOAUDIT))
191 ndp->ni_cnd.cn_flags |= AUDITVNODE1;
192 if ((error = namei(ndp)) != 0)
193 return (error);
194 if (!mpsafe)
195 ndp->ni_cnd.cn_flags &= ~MPSAFE;
196 vfslocked = NDHASGIANT(ndp);
197 vp = ndp->ni_vp;
198 }
199 if (vp->v_type == VLNK) {
200 error = EMLINK;
201 goto bad;
202 }
203 if (vp->v_type == VSOCK) {
204 error = EOPNOTSUPP;
205 goto bad;
206 }
207 accmode = 0;
208 if (fmode & (FWRITE | O_TRUNC)) {
209 if (vp->v_type == VDIR) {
210 error = EISDIR;
211 goto bad;
212 }
213 accmode |= VWRITE;
214 }
215 if (fmode & FREAD)
216 accmode |= VREAD;
217 if (fmode & FEXEC)
218 accmode |= VEXEC;
219 if ((fmode & O_APPEND) && (fmode & FWRITE))
220 accmode |= VAPPEND;
221 #ifdef MAC
222 error = mac_vnode_check_open(cred, vp, accmode);
223 if (error)
224 goto bad;
225 #endif
226 if ((fmode & O_CREAT) == 0) {
227 if (accmode & VWRITE) {
228 error = vn_writechk(vp);
229 if (error)
230 goto bad;
231 }
232 if (accmode) {
233 error = VOP_ACCESS(vp, accmode, cred, td);
234 if (error)
235 goto bad;
236 }
237 }
238 if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0)
239 goto bad;
240
241 if (fmode & FWRITE)
242 vp->v_writecount++;
243 *flagp = fmode;
244 ASSERT_VOP_LOCKED(vp, "vn_open_cred");
245 if (!mpsafe)
246 VFS_UNLOCK_GIANT(vfslocked);
247 return (0);
248 bad:
249 NDFREE(ndp, NDF_ONLY_PNBUF);
250 vput(vp);
251 VFS_UNLOCK_GIANT(vfslocked);
252 *flagp = fmode;
253 ndp->ni_vp = NULL;
254 return (error);
255 }
256
257 /*
258 * Check for write permissions on the specified vnode.
259 * Prototype text segments cannot be written.
260 */
261 int
262 vn_writechk(vp)
263 register struct vnode *vp;
264 {
265
266 ASSERT_VOP_LOCKED(vp, "vn_writechk");
267 /*
268 * If there's shared text associated with
269 * the vnode, try to free it up once. If
270 * we fail, we can't allow writing.
271 */
272 if (vp->v_vflag & VV_TEXT)
273 return (ETXTBSY);
274
275 return (0);
276 }
277
278 /*
279 * Vnode close call
280 */
281 int
282 vn_close(vp, flags, file_cred, td)
283 register struct vnode *vp;
284 int flags;
285 struct ucred *file_cred;
286 struct thread *td;
287 {
288 struct mount *mp;
289 int error, lock_flags;
290
291 if (!(flags & FWRITE) && vp->v_mount != NULL &&
292 vp->v_mount->mnt_kern_flag & MNTK_EXTENDED_SHARED)
293 lock_flags = LK_SHARED;
294 else
295 lock_flags = LK_EXCLUSIVE;
296
297 VFS_ASSERT_GIANT(vp->v_mount);
298
299 vn_start_write(vp, &mp, V_WAIT);
300 vn_lock(vp, lock_flags | LK_RETRY);
301 if (flags & FWRITE) {
302 VNASSERT(vp->v_writecount > 0, vp,
303 ("vn_close: negative writecount"));
304 vp->v_writecount--;
305 }
306 error = VOP_CLOSE(vp, flags, file_cred, td);
307 vput(vp);
308 vn_finished_write(mp);
309 return (error);
310 }
311
312 /*
313 * Heuristic to detect sequential operation.
314 */
315 static int
316 sequential_heuristic(struct uio *uio, struct file *fp)
317 {
318
319 if (atomic_load_acq_int(&(fp->f_flag)) & FRDAHEAD)
320 return (fp->f_seqcount << IO_SEQSHIFT);
321
322 /*
323 * Offset 0 is handled specially. open() sets f_seqcount to 1 so
324 * that the first I/O is normally considered to be slightly
325 * sequential. Seeking to offset 0 doesn't change sequentiality
326 * unless previous seeks have reduced f_seqcount to 0, in which
327 * case offset 0 is not special.
328 */
329 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
330 uio->uio_offset == fp->f_nextoff) {
331 /*
332 * f_seqcount is in units of fixed-size blocks so that it
333 * depends mainly on the amount of sequential I/O and not
334 * much on the number of sequential I/O's. The fixed size
335 * of 16384 is hard-coded here since it is (not quite) just
336 * a magic size that works well here. This size is more
337 * closely related to the best I/O size for real disks than
338 * to any block size used by software.
339 */
340 fp->f_seqcount += howmany(uio->uio_resid, 16384);
341 if (fp->f_seqcount > IO_SEQMAX)
342 fp->f_seqcount = IO_SEQMAX;
343 return (fp->f_seqcount << IO_SEQSHIFT);
344 }
345
346 /* Not sequential. Quickly draw-down sequentiality. */
347 if (fp->f_seqcount > 1)
348 fp->f_seqcount = 1;
349 else
350 fp->f_seqcount = 0;
351 return (0);
352 }
353
354 /*
355 * Package up an I/O request on a vnode into a uio and do it.
356 */
357 int
358 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
359 aresid, td)
360 enum uio_rw rw;
361 struct vnode *vp;
362 void *base;
363 int len;
364 off_t offset;
365 enum uio_seg segflg;
366 int ioflg;
367 struct ucred *active_cred;
368 struct ucred *file_cred;
369 int *aresid;
370 struct thread *td;
371 {
372 struct uio auio;
373 struct iovec aiov;
374 struct mount *mp;
375 struct ucred *cred;
376 int error, lock_flags;
377
378 VFS_ASSERT_GIANT(vp->v_mount);
379
380 if ((ioflg & IO_NODELOCKED) == 0) {
381 mp = NULL;
382 if (rw == UIO_WRITE) {
383 if (vp->v_type != VCHR &&
384 (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
385 != 0)
386 return (error);
387 if (MNT_SHARED_WRITES(mp) ||
388 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
389 lock_flags = LK_SHARED;
390 } else {
391 lock_flags = LK_EXCLUSIVE;
392 }
393 vn_lock(vp, lock_flags | LK_RETRY);
394 } else
395 vn_lock(vp, LK_SHARED | LK_RETRY);
396
397 }
398 ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
399 auio.uio_iov = &aiov;
400 auio.uio_iovcnt = 1;
401 aiov.iov_base = base;
402 aiov.iov_len = len;
403 auio.uio_resid = len;
404 auio.uio_offset = offset;
405 auio.uio_segflg = segflg;
406 auio.uio_rw = rw;
407 auio.uio_td = td;
408 error = 0;
409 #ifdef MAC
410 if ((ioflg & IO_NOMACCHECK) == 0) {
411 if (rw == UIO_READ)
412 error = mac_vnode_check_read(active_cred, file_cred,
413 vp);
414 else
415 error = mac_vnode_check_write(active_cred, file_cred,
416 vp);
417 }
418 #endif
419 if (error == 0) {
420 if (file_cred)
421 cred = file_cred;
422 else
423 cred = active_cred;
424 if (rw == UIO_READ)
425 error = VOP_READ(vp, &auio, ioflg, cred);
426 else
427 error = VOP_WRITE(vp, &auio, ioflg, cred);
428 }
429 if (aresid)
430 *aresid = auio.uio_resid;
431 else
432 if (auio.uio_resid && error == 0)
433 error = EIO;
434 if ((ioflg & IO_NODELOCKED) == 0) {
435 if (rw == UIO_WRITE && vp->v_type != VCHR)
436 vn_finished_write(mp);
437 VOP_UNLOCK(vp, 0);
438 }
439 return (error);
440 }
441
442 /*
443 * Package up an I/O request on a vnode into a uio and do it. The I/O
444 * request is split up into smaller chunks and we try to avoid saturating
445 * the buffer cache while potentially holding a vnode locked, so we
446 * check bwillwrite() before calling vn_rdwr(). We also call uio_yield()
447 * to give other processes a chance to lock the vnode (either other processes
448 * core'ing the same binary, or unrelated processes scanning the directory).
449 */
450 int
451 vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred,
452 file_cred, aresid, td)
453 enum uio_rw rw;
454 struct vnode *vp;
455 void *base;
456 size_t len;
457 off_t offset;
458 enum uio_seg segflg;
459 int ioflg;
460 struct ucred *active_cred;
461 struct ucred *file_cred;
462 size_t *aresid;
463 struct thread *td;
464 {
465 int error = 0;
466 int iaresid;
467
468 VFS_ASSERT_GIANT(vp->v_mount);
469
470 do {
471 int chunk;
472
473 /*
474 * Force `offset' to a multiple of MAXBSIZE except possibly
475 * for the first chunk, so that filesystems only need to
476 * write full blocks except possibly for the first and last
477 * chunks.
478 */
479 chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE;
480
481 if (chunk > len)
482 chunk = len;
483 if (rw != UIO_READ && vp->v_type == VREG)
484 bwillwrite();
485 iaresid = 0;
486 error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
487 ioflg, active_cred, file_cred, &iaresid, td);
488 len -= chunk; /* aresid calc already includes length */
489 if (error)
490 break;
491 offset += chunk;
492 base = (char *)base + chunk;
493 uio_yield();
494 } while (len);
495 if (aresid)
496 *aresid = len + iaresid;
497 return (error);
498 }
499
500 /*
501 * File table vnode read routine.
502 */
503 static int
504 vn_read(fp, uio, active_cred, flags, td)
505 struct file *fp;
506 struct uio *uio;
507 struct ucred *active_cred;
508 struct thread *td;
509 int flags;
510 {
511 struct vnode *vp;
512 int error, ioflag;
513 struct mtx *mtxp;
514 int advice, vfslocked;
515
516 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
517 uio->uio_td, td));
518 mtxp = NULL;
519 vp = fp->f_vnode;
520 ioflag = 0;
521 if (fp->f_flag & FNONBLOCK)
522 ioflag |= IO_NDELAY;
523 if (fp->f_flag & O_DIRECT)
524 ioflag |= IO_DIRECT;
525 advice = POSIX_FADV_NORMAL;
526 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
527 /*
528 * According to McKusick the vn lock was protecting f_offset here.
529 * It is now protected by the FOFFSET_LOCKED flag.
530 */
531 if ((flags & FOF_OFFSET) == 0 || fp->f_advice != NULL) {
532 mtxp = mtx_pool_find(mtxpool_sleep, fp);
533 mtx_lock(mtxp);
534 if ((flags & FOF_OFFSET) == 0) {
535 while (fp->f_vnread_flags & FOFFSET_LOCKED) {
536 fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
537 msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
538 "vnread offlock", 0);
539 }
540 fp->f_vnread_flags |= FOFFSET_LOCKED;
541 uio->uio_offset = fp->f_offset;
542 }
543 if (fp->f_advice != NULL &&
544 uio->uio_offset >= fp->f_advice->fa_start &&
545 uio->uio_offset + uio->uio_resid <= fp->f_advice->fa_end)
546 advice = fp->f_advice->fa_advice;
547 mtx_unlock(mtxp);
548 }
549 vn_lock(vp, LK_SHARED | LK_RETRY);
550
551 switch (advice) {
552 case POSIX_FADV_NORMAL:
553 case POSIX_FADV_SEQUENTIAL:
554 ioflag |= sequential_heuristic(uio, fp);
555 break;
556 case POSIX_FADV_RANDOM:
557 /* Disable read-ahead for random I/O. */
558 break;
559 case POSIX_FADV_NOREUSE:
560 /*
561 * Request the underlying FS to discard the buffers
562 * and pages after the I/O is complete.
563 */
564 ioflag |= IO_DIRECT;
565 break;
566 }
567
568 #ifdef MAC
569 error = mac_vnode_check_read(active_cred, fp->f_cred, vp);
570 if (error == 0)
571 #endif
572 error = VOP_READ(vp, uio, ioflag, fp->f_cred);
573 if ((flags & FOF_OFFSET) == 0) {
574 fp->f_offset = uio->uio_offset;
575 mtx_lock(mtxp);
576 if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
577 wakeup(&fp->f_vnread_flags);
578 fp->f_vnread_flags = 0;
579 mtx_unlock(mtxp);
580 }
581 fp->f_nextoff = uio->uio_offset;
582 VOP_UNLOCK(vp, 0);
583 VFS_UNLOCK_GIANT(vfslocked);
584 return (error);
585 }
586
587 /*
588 * File table vnode write routine.
589 */
590 static int
591 vn_write(fp, uio, active_cred, flags, td)
592 struct file *fp;
593 struct uio *uio;
594 struct ucred *active_cred;
595 struct thread *td;
596 int flags;
597 {
598 struct vnode *vp;
599 struct mount *mp;
600 int error, ioflag, lock_flags;
601 struct mtx *mtxp;
602 int advice, vfslocked;
603
604 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
605 uio->uio_td, td));
606 vp = fp->f_vnode;
607 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
608 if (vp->v_type == VREG)
609 bwillwrite();
610 ioflag = IO_UNIT;
611 if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
612 ioflag |= IO_APPEND;
613 if (fp->f_flag & FNONBLOCK)
614 ioflag |= IO_NDELAY;
615 if (fp->f_flag & O_DIRECT)
616 ioflag |= IO_DIRECT;
617 if ((fp->f_flag & O_FSYNC) ||
618 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
619 ioflag |= IO_SYNC;
620 mp = NULL;
621 if (vp->v_type != VCHR &&
622 (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
623 goto unlock;
624
625 if ((MNT_SHARED_WRITES(mp) ||
626 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) &&
627 (flags & FOF_OFFSET) != 0) {
628 lock_flags = LK_SHARED;
629 } else {
630 lock_flags = LK_EXCLUSIVE;
631 }
632
633 vn_lock(vp, lock_flags | LK_RETRY);
634 if ((flags & FOF_OFFSET) == 0)
635 uio->uio_offset = fp->f_offset;
636 advice = POSIX_FADV_NORMAL;
637 if (fp->f_advice != NULL) {
638 mtxp = mtx_pool_find(mtxpool_sleep, fp);
639 mtx_lock(mtxp);
640 if (fp->f_advice != NULL &&
641 uio->uio_offset >= fp->f_advice->fa_start &&
642 uio->uio_offset + uio->uio_resid <= fp->f_advice->fa_end)
643 advice = fp->f_advice->fa_advice;
644 mtx_unlock(mtxp);
645 }
646 switch (advice) {
647 case POSIX_FADV_NORMAL:
648 case POSIX_FADV_SEQUENTIAL:
649 ioflag |= sequential_heuristic(uio, fp);
650 break;
651 case POSIX_FADV_RANDOM:
652 /* XXX: Is this correct? */
653 break;
654 case POSIX_FADV_NOREUSE:
655 /*
656 * Request the underlying FS to discard the buffers
657 * and pages after the I/O is complete.
658 */
659 ioflag |= IO_DIRECT;
660 break;
661 }
662
663 #ifdef MAC
664 error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
665 if (error == 0)
666 #endif
667 error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
668 if ((flags & FOF_OFFSET) == 0)
669 fp->f_offset = uio->uio_offset;
670 fp->f_nextoff = uio->uio_offset;
671 VOP_UNLOCK(vp, 0);
672 if (vp->v_type != VCHR)
673 vn_finished_write(mp);
674 unlock:
675 VFS_UNLOCK_GIANT(vfslocked);
676 return (error);
677 }
678
679 /*
680 * File table truncate routine.
681 */
682 static int
683 vn_truncate(fp, length, active_cred, td)
684 struct file *fp;
685 off_t length;
686 struct ucred *active_cred;
687 struct thread *td;
688 {
689 struct vattr vattr;
690 struct mount *mp;
691 struct vnode *vp;
692 int vfslocked;
693 int error;
694
695 vp = fp->f_vnode;
696 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
697 error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
698 if (error) {
699 VFS_UNLOCK_GIANT(vfslocked);
700 return (error);
701 }
702 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
703 if (vp->v_type == VDIR) {
704 error = EISDIR;
705 goto out;
706 }
707 #ifdef MAC
708 error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
709 if (error)
710 goto out;
711 #endif
712 error = vn_writechk(vp);
713 if (error == 0) {
714 VATTR_NULL(&vattr);
715 vattr.va_size = length;
716 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
717 }
718 out:
719 VOP_UNLOCK(vp, 0);
720 vn_finished_write(mp);
721 VFS_UNLOCK_GIANT(vfslocked);
722 return (error);
723 }
724
725 /*
726 * File table vnode stat routine.
727 */
728 static int
729 vn_statfile(fp, sb, active_cred, td)
730 struct file *fp;
731 struct stat *sb;
732 struct ucred *active_cred;
733 struct thread *td;
734 {
735 struct vnode *vp = fp->f_vnode;
736 int vfslocked;
737 int error;
738
739 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
740 vn_lock(vp, LK_SHARED | LK_RETRY);
741 error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
742 VOP_UNLOCK(vp, 0);
743 VFS_UNLOCK_GIANT(vfslocked);
744
745 return (error);
746 }
747
748 /*
749 * Stat a vnode; implementation for the stat syscall
750 */
751 int
752 vn_stat(vp, sb, active_cred, file_cred, td)
753 struct vnode *vp;
754 register struct stat *sb;
755 struct ucred *active_cred;
756 struct ucred *file_cred;
757 struct thread *td;
758 {
759 struct vattr vattr;
760 register struct vattr *vap;
761 int error;
762 u_short mode;
763
764 #ifdef MAC
765 error = mac_vnode_check_stat(active_cred, file_cred, vp);
766 if (error)
767 return (error);
768 #endif
769
770 vap = &vattr;
771
772 /*
773 * Initialize defaults for new and unusual fields, so that file
774 * systems which don't support these fields don't need to know
775 * about them.
776 */
777 vap->va_birthtime.tv_sec = -1;
778 vap->va_birthtime.tv_nsec = 0;
779 vap->va_fsid = VNOVAL;
780 vap->va_rdev = NODEV;
781
782 error = VOP_GETATTR(vp, vap, active_cred);
783 if (error)
784 return (error);
785
786 /*
787 * Zero the spare stat fields
788 */
789 bzero(sb, sizeof *sb);
790
791 /*
792 * Copy from vattr table
793 */
794 if (vap->va_fsid != VNOVAL)
795 sb->st_dev = vap->va_fsid;
796 else
797 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
798 sb->st_ino = vap->va_fileid;
799 mode = vap->va_mode;
800 switch (vap->va_type) {
801 case VREG:
802 mode |= S_IFREG;
803 break;
804 case VDIR:
805 mode |= S_IFDIR;
806 break;
807 case VBLK:
808 mode |= S_IFBLK;
809 break;
810 case VCHR:
811 mode |= S_IFCHR;
812 break;
813 case VLNK:
814 mode |= S_IFLNK;
815 break;
816 case VSOCK:
817 mode |= S_IFSOCK;
818 break;
819 case VFIFO:
820 mode |= S_IFIFO;
821 break;
822 default:
823 return (EBADF);
824 };
825 sb->st_mode = mode;
826 sb->st_nlink = vap->va_nlink;
827 sb->st_uid = vap->va_uid;
828 sb->st_gid = vap->va_gid;
829 sb->st_rdev = vap->va_rdev;
830 if (vap->va_size > OFF_MAX)
831 return (EOVERFLOW);
832 sb->st_size = vap->va_size;
833 sb->st_atimespec = vap->va_atime;
834 sb->st_mtimespec = vap->va_mtime;
835 sb->st_ctimespec = vap->va_ctime;
836 sb->st_birthtimespec = vap->va_birthtime;
837
838 /*
839 * According to www.opengroup.org, the meaning of st_blksize is
840 * "a filesystem-specific preferred I/O block size for this
841 * object. In some filesystem types, this may vary from file
842 * to file"
843 * Use miminum/default of PAGE_SIZE (e.g. for VCHR).
844 */
845
846 sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
847
848 sb->st_flags = vap->va_flags;
849 if (priv_check(td, PRIV_VFS_GENERATION))
850 sb->st_gen = 0;
851 else
852 sb->st_gen = vap->va_gen;
853
854 sb->st_blocks = vap->va_bytes / S_BLKSIZE;
855 return (0);
856 }
857
858 /*
859 * File table vnode ioctl routine.
860 */
861 static int
862 vn_ioctl(fp, com, data, active_cred, td)
863 struct file *fp;
864 u_long com;
865 void *data;
866 struct ucred *active_cred;
867 struct thread *td;
868 {
869 struct vnode *vp = fp->f_vnode;
870 struct vattr vattr;
871 int vfslocked;
872 int error;
873
874 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
875 error = ENOTTY;
876 switch (vp->v_type) {
877 case VREG:
878 case VDIR:
879 if (com == FIONREAD) {
880 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
881 error = VOP_GETATTR(vp, &vattr, active_cred);
882 VOP_UNLOCK(vp, 0);
883 if (!error)
884 *(int *)data = vattr.va_size - fp->f_offset;
885 }
886 if (com == FIONBIO || com == FIOASYNC) /* XXX */
887 error = 0;
888 else
889 error = VOP_IOCTL(vp, com, data, fp->f_flag,
890 active_cred, td);
891 break;
892
893 default:
894 break;
895 }
896 VFS_UNLOCK_GIANT(vfslocked);
897 return (error);
898 }
899
900 /*
901 * File table vnode poll routine.
902 */
903 static int
904 vn_poll(fp, events, active_cred, td)
905 struct file *fp;
906 int events;
907 struct ucred *active_cred;
908 struct thread *td;
909 {
910 struct vnode *vp;
911 int vfslocked;
912 int error;
913
914 vp = fp->f_vnode;
915 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
916 #ifdef MAC
917 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
918 error = mac_vnode_check_poll(active_cred, fp->f_cred, vp);
919 VOP_UNLOCK(vp, 0);
920 if (!error)
921 #endif
922
923 error = VOP_POLL(vp, events, fp->f_cred, td);
924 VFS_UNLOCK_GIANT(vfslocked);
925 return (error);
926 }
927
928 /*
929 * Acquire the requested lock and then check for validity. LK_RETRY
930 * permits vn_lock to return doomed vnodes.
931 */
932 int
933 _vn_lock(struct vnode *vp, int flags, char *file, int line)
934 {
935 int error;
936
937 VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
938 ("vn_lock called with no locktype."));
939 do {
940 #ifdef DEBUG_VFS_LOCKS
941 KASSERT(vp->v_holdcnt != 0,
942 ("vn_lock %p: zero hold count", vp));
943 #endif
944 error = VOP_LOCK1(vp, flags, file, line);
945 flags &= ~LK_INTERLOCK; /* Interlock is always dropped. */
946 KASSERT((flags & LK_RETRY) == 0 || error == 0,
947 ("LK_RETRY set with incompatible flags (0x%x) or an error occured (%d)",
948 flags, error));
949 /*
950 * Callers specify LK_RETRY if they wish to get dead vnodes.
951 * If RETRY is not set, we return ENOENT instead.
952 */
953 if (error == 0 && vp->v_iflag & VI_DOOMED &&
954 (flags & LK_RETRY) == 0) {
955 VOP_UNLOCK(vp, 0);
956 error = ENOENT;
957 break;
958 }
959 } while (flags & LK_RETRY && error != 0);
960 return (error);
961 }
962
963 /*
964 * File table vnode close routine.
965 */
966 static int
967 vn_closefile(fp, td)
968 struct file *fp;
969 struct thread *td;
970 {
971 struct vnode *vp;
972 struct flock lf;
973 int vfslocked;
974 int error;
975
976 vp = fp->f_vnode;
977
978 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
979 if (fp->f_type == DTYPE_VNODE && fp->f_flag & FHASLOCK) {
980 lf.l_whence = SEEK_SET;
981 lf.l_start = 0;
982 lf.l_len = 0;
983 lf.l_type = F_UNLCK;
984 (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
985 }
986
987 fp->f_ops = &badfileops;
988
989 error = vn_close(vp, fp->f_flag, fp->f_cred, td);
990 VFS_UNLOCK_GIANT(vfslocked);
991 return (error);
992 }
993
994 /*
995 * Preparing to start a filesystem write operation. If the operation is
996 * permitted, then we bump the count of operations in progress and
997 * proceed. If a suspend request is in progress, we wait until the
998 * suspension is over, and then proceed.
999 */
1000 int
1001 vn_start_write(vp, mpp, flags)
1002 struct vnode *vp;
1003 struct mount **mpp;
1004 int flags;
1005 {
1006 struct mount *mp;
1007 int error;
1008
1009 error = 0;
1010 /*
1011 * If a vnode is provided, get and return the mount point that
1012 * to which it will write.
1013 */
1014 if (vp != NULL) {
1015 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
1016 *mpp = NULL;
1017 if (error != EOPNOTSUPP)
1018 return (error);
1019 return (0);
1020 }
1021 }
1022 if ((mp = *mpp) == NULL)
1023 return (0);
1024
1025 /*
1026 * VOP_GETWRITEMOUNT() returns with the mp refcount held through
1027 * a vfs_ref().
1028 * As long as a vnode is not provided we need to acquire a
1029 * refcount for the provided mountpoint too, in order to
1030 * emulate a vfs_ref().
1031 */
1032 MNT_ILOCK(mp);
1033 if (vp == NULL)
1034 MNT_REF(mp);
1035
1036 /*
1037 * Check on status of suspension.
1038 */
1039 if ((curthread->td_pflags & TDP_IGNSUSP) == 0 ||
1040 mp->mnt_susp_owner != curthread) {
1041 while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
1042 if (flags & V_NOWAIT) {
1043 error = EWOULDBLOCK;
1044 goto unlock;
1045 }
1046 error = msleep(&mp->mnt_flag, MNT_MTX(mp),
1047 (PUSER - 1) | (flags & PCATCH), "suspfs", 0);
1048 if (error)
1049 goto unlock;
1050 }
1051 }
1052 if (flags & V_XSLEEP)
1053 goto unlock;
1054 mp->mnt_writeopcount++;
1055 unlock:
1056 if (error != 0 || (flags & V_XSLEEP) != 0)
1057 MNT_REL(mp);
1058 MNT_IUNLOCK(mp);
1059 return (error);
1060 }
1061
1062 /*
1063 * Secondary suspension. Used by operations such as vop_inactive
1064 * routines that are needed by the higher level functions. These
1065 * are allowed to proceed until all the higher level functions have
1066 * completed (indicated by mnt_writeopcount dropping to zero). At that
1067 * time, these operations are halted until the suspension is over.
1068 */
1069 int
1070 vn_start_secondary_write(vp, mpp, flags)
1071 struct vnode *vp;
1072 struct mount **mpp;
1073 int flags;
1074 {
1075 struct mount *mp;
1076 int error;
1077
1078 retry:
1079 if (vp != NULL) {
1080 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
1081 *mpp = NULL;
1082 if (error != EOPNOTSUPP)
1083 return (error);
1084 return (0);
1085 }
1086 }
1087 /*
1088 * If we are not suspended or have not yet reached suspended
1089 * mode, then let the operation proceed.
1090 */
1091 if ((mp = *mpp) == NULL)
1092 return (0);
1093
1094 /*
1095 * VOP_GETWRITEMOUNT() returns with the mp refcount held through
1096 * a vfs_ref().
1097 * As long as a vnode is not provided we need to acquire a
1098 * refcount for the provided mountpoint too, in order to
1099 * emulate a vfs_ref().
1100 */
1101 MNT_ILOCK(mp);
1102 if (vp == NULL)
1103 MNT_REF(mp);
1104 if ((mp->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND2)) == 0) {
1105 mp->mnt_secondary_writes++;
1106 mp->mnt_secondary_accwrites++;
1107 MNT_IUNLOCK(mp);
1108 return (0);
1109 }
1110 if (flags & V_NOWAIT) {
1111 MNT_REL(mp);
1112 MNT_IUNLOCK(mp);
1113 return (EWOULDBLOCK);
1114 }
1115 /*
1116 * Wait for the suspension to finish.
1117 */
1118 error = msleep(&mp->mnt_flag, MNT_MTX(mp),
1119 (PUSER - 1) | (flags & PCATCH) | PDROP, "suspfs", 0);
1120 vfs_rel(mp);
1121 if (error == 0)
1122 goto retry;
1123 return (error);
1124 }
1125
1126 /*
1127 * Filesystem write operation has completed. If we are suspending and this
1128 * operation is the last one, notify the suspender that the suspension is
1129 * now in effect.
1130 */
1131 void
1132 vn_finished_write(mp)
1133 struct mount *mp;
1134 {
1135 if (mp == NULL)
1136 return;
1137 MNT_ILOCK(mp);
1138 MNT_REL(mp);
1139 mp->mnt_writeopcount--;
1140 if (mp->mnt_writeopcount < 0)
1141 panic("vn_finished_write: neg cnt");
1142 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
1143 mp->mnt_writeopcount <= 0)
1144 wakeup(&mp->mnt_writeopcount);
1145 MNT_IUNLOCK(mp);
1146 }
1147
1148
1149 /*
1150 * Filesystem secondary write operation has completed. If we are
1151 * suspending and this operation is the last one, notify the suspender
1152 * that the suspension is now in effect.
1153 */
1154 void
1155 vn_finished_secondary_write(mp)
1156 struct mount *mp;
1157 {
1158 if (mp == NULL)
1159 return;
1160 MNT_ILOCK(mp);
1161 MNT_REL(mp);
1162 mp->mnt_secondary_writes--;
1163 if (mp->mnt_secondary_writes < 0)
1164 panic("vn_finished_secondary_write: neg cnt");
1165 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
1166 mp->mnt_secondary_writes <= 0)
1167 wakeup(&mp->mnt_secondary_writes);
1168 MNT_IUNLOCK(mp);
1169 }
1170
1171
1172
1173 /*
1174 * Request a filesystem to suspend write operations.
1175 */
1176 int
1177 vfs_write_suspend(mp)
1178 struct mount *mp;
1179 {
1180 int error;
1181
1182 MNT_ILOCK(mp);
1183 if (mp->mnt_susp_owner == curthread) {
1184 MNT_IUNLOCK(mp);
1185 return (EALREADY);
1186 }
1187 while (mp->mnt_kern_flag & MNTK_SUSPEND)
1188 msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1, "wsuspfs", 0);
1189 mp->mnt_kern_flag |= MNTK_SUSPEND;
1190 mp->mnt_susp_owner = curthread;
1191 if (mp->mnt_writeopcount > 0)
1192 (void) msleep(&mp->mnt_writeopcount,
1193 MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt", 0);
1194 else
1195 MNT_IUNLOCK(mp);
1196 if ((error = VFS_SYNC(mp, MNT_SUSPEND)) != 0)
1197 vfs_write_resume(mp);
1198 return (error);
1199 }
1200
1201 /*
1202 * Request a filesystem to resume write operations.
1203 */
1204 void
1205 vfs_write_resume(mp)
1206 struct mount *mp;
1207 {
1208
1209 MNT_ILOCK(mp);
1210 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
1211 KASSERT(mp->mnt_susp_owner == curthread, ("mnt_susp_owner"));
1212 mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPEND2 |
1213 MNTK_SUSPENDED);
1214 mp->mnt_susp_owner = NULL;
1215 wakeup(&mp->mnt_writeopcount);
1216 wakeup(&mp->mnt_flag);
1217 curthread->td_pflags &= ~TDP_IGNSUSP;
1218 MNT_IUNLOCK(mp);
1219 VFS_SUSP_CLEAN(mp);
1220 } else
1221 MNT_IUNLOCK(mp);
1222 }
1223
1224 /*
1225 * Implement kqueues for files by translating it to vnode operation.
1226 */
1227 static int
1228 vn_kqfilter(struct file *fp, struct knote *kn)
1229 {
1230 int vfslocked;
1231 int error;
1232
1233 vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
1234 error = VOP_KQFILTER(fp->f_vnode, kn);
1235 VFS_UNLOCK_GIANT(vfslocked);
1236
1237 return error;
1238 }
1239
1240 /*
1241 * Simplified in-kernel wrapper calls for extended attribute access.
1242 * Both calls pass in a NULL credential, authorizing as "kernel" access.
1243 * Set IO_NODELOCKED in ioflg if the vnode is already locked.
1244 */
1245 int
1246 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
1247 const char *attrname, int *buflen, char *buf, struct thread *td)
1248 {
1249 struct uio auio;
1250 struct iovec iov;
1251 int error;
1252
1253 iov.iov_len = *buflen;
1254 iov.iov_base = buf;
1255
1256 auio.uio_iov = &iov;
1257 auio.uio_iovcnt = 1;
1258 auio.uio_rw = UIO_READ;
1259 auio.uio_segflg = UIO_SYSSPACE;
1260 auio.uio_td = td;
1261 auio.uio_offset = 0;
1262 auio.uio_resid = *buflen;
1263
1264 if ((ioflg & IO_NODELOCKED) == 0)
1265 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1266
1267 ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
1268
1269 /* authorize attribute retrieval as kernel */
1270 error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL,
1271 td);
1272
1273 if ((ioflg & IO_NODELOCKED) == 0)
1274 VOP_UNLOCK(vp, 0);
1275
1276 if (error == 0) {
1277 *buflen = *buflen - auio.uio_resid;
1278 }
1279
1280 return (error);
1281 }
1282
1283 /*
1284 * XXX failure mode if partially written?
1285 */
1286 int
1287 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
1288 const char *attrname, int buflen, char *buf, struct thread *td)
1289 {
1290 struct uio auio;
1291 struct iovec iov;
1292 struct mount *mp;
1293 int error;
1294
1295 iov.iov_len = buflen;
1296 iov.iov_base = buf;
1297
1298 auio.uio_iov = &iov;
1299 auio.uio_iovcnt = 1;
1300 auio.uio_rw = UIO_WRITE;
1301 auio.uio_segflg = UIO_SYSSPACE;
1302 auio.uio_td = td;
1303 auio.uio_offset = 0;
1304 auio.uio_resid = buflen;
1305
1306 if ((ioflg & IO_NODELOCKED) == 0) {
1307 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
1308 return (error);
1309 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1310 }
1311
1312 ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
1313
1314 /* authorize attribute setting as kernel */
1315 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td);
1316
1317 if ((ioflg & IO_NODELOCKED) == 0) {
1318 vn_finished_write(mp);
1319 VOP_UNLOCK(vp, 0);
1320 }
1321
1322 return (error);
1323 }
1324
1325 int
1326 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
1327 const char *attrname, struct thread *td)
1328 {
1329 struct mount *mp;
1330 int error;
1331
1332 if ((ioflg & IO_NODELOCKED) == 0) {
1333 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
1334 return (error);
1335 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1336 }
1337
1338 ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
1339
1340 /* authorize attribute removal as kernel */
1341 error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL, td);
1342 if (error == EOPNOTSUPP)
1343 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
1344 NULL, td);
1345
1346 if ((ioflg & IO_NODELOCKED) == 0) {
1347 vn_finished_write(mp);
1348 VOP_UNLOCK(vp, 0);
1349 }
1350
1351 return (error);
1352 }
1353
1354 int
1355 vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags, struct vnode **rvp)
1356 {
1357 struct mount *mp;
1358 int ltype, error;
1359
1360 mp = vp->v_mount;
1361 ltype = VOP_ISLOCKED(vp);
1362 KASSERT(ltype == LK_EXCLUSIVE || ltype == LK_SHARED,
1363 ("vn_vget_ino: vp not locked"));
1364 error = vfs_busy(mp, MBF_NOWAIT);
1365 if (error != 0) {
1366 vfs_ref(mp);
1367 VOP_UNLOCK(vp, 0);
1368 error = vfs_busy(mp, 0);
1369 vn_lock(vp, ltype | LK_RETRY);
1370 vfs_rel(mp);
1371 if (error != 0)
1372 return (ENOENT);
1373 if (vp->v_iflag & VI_DOOMED) {
1374 vfs_unbusy(mp);
1375 return (ENOENT);
1376 }
1377 }
1378 VOP_UNLOCK(vp, 0);
1379 error = VFS_VGET(mp, ino, lkflags, rvp);
1380 vfs_unbusy(mp);
1381 vn_lock(vp, ltype | LK_RETRY);
1382 if (vp->v_iflag & VI_DOOMED) {
1383 if (error == 0)
1384 vput(*rvp);
1385 error = ENOENT;
1386 }
1387 return (error);
1388 }
1389
1390 int
1391 vn_rlimit_fsize(const struct vnode *vp, const struct uio *uio, const struct thread *td)
1392 {
1393 if (vp->v_type != VREG || td == NULL)
1394 return (0);
1395
1396 PROC_LOCK(td->td_proc);
1397 if (uio->uio_offset + uio->uio_resid >
1398 lim_cur(td->td_proc, RLIMIT_FSIZE)) {
1399 psignal(td->td_proc, SIGXFSZ);
1400 PROC_UNLOCK(td->td_proc);
1401 return (EFBIG);
1402 }
1403 PROC_UNLOCK(td->td_proc);
1404
1405 return (0);
1406 }
1407
1408 void
1409 vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end)
1410 {
1411 vm_object_t object;
1412
1413 if ((object = vp->v_object) == NULL)
1414 return;
1415 VM_OBJECT_LOCK(object);
1416 vm_object_page_remove(object, start, end, 0);
1417 VM_OBJECT_UNLOCK(object);
1418 }
Cache object: 3f5c4c5d14558d102e9c4b209f0da9f5
|