1 /* $OpenBSD: ffs_vnops.c,v 1.100 2022/06/26 05:20:43 visa Exp $ */
2 /* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)ffs_vnops.c 8.10 (Berkeley) 8/10/94
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/resourcevar.h>
38 #include <sys/kernel.h>
39 #include <sys/stat.h>
40 #include <sys/buf.h>
41 #include <sys/mount.h>
42 #include <sys/vnode.h>
43 #include <sys/malloc.h>
44 #include <sys/signalvar.h>
45 #include <sys/pool.h>
46 #include <sys/event.h>
47 #include <sys/specdev.h>
48
49 #include <miscfs/fifofs/fifo.h>
50
51 #include <ufs/ufs/quota.h>
52 #include <ufs/ufs/inode.h>
53 #include <ufs/ufs/dir.h>
54 #include <ufs/ufs/ufs_extern.h>
55 #include <ufs/ufs/ufsmount.h>
56
57 #include <ufs/ffs/fs.h>
58 #include <ufs/ffs/ffs_extern.h>
59
60 const struct vops ffs_vops = {
61 .vop_lookup = ufs_lookup,
62 .vop_create = ufs_create,
63 .vop_mknod = ufs_mknod,
64 .vop_open = ufs_open,
65 .vop_close = ufs_close,
66 .vop_access = ufs_access,
67 .vop_getattr = ufs_getattr,
68 .vop_setattr = ufs_setattr,
69 .vop_read = ffs_read,
70 .vop_write = ffs_write,
71 .vop_ioctl = ufs_ioctl,
72 .vop_kqfilter = ufs_kqfilter,
73 .vop_revoke = vop_generic_revoke,
74 .vop_fsync = ffs_fsync,
75 .vop_remove = ufs_remove,
76 .vop_link = ufs_link,
77 .vop_rename = ufs_rename,
78 .vop_mkdir = ufs_mkdir,
79 .vop_rmdir = ufs_rmdir,
80 .vop_symlink = ufs_symlink,
81 .vop_readdir = ufs_readdir,
82 .vop_readlink = ufs_readlink,
83 .vop_abortop = vop_generic_abortop,
84 .vop_inactive = ufs_inactive,
85 .vop_reclaim = ffs_reclaim,
86 .vop_lock = ufs_lock,
87 .vop_unlock = ufs_unlock,
88 .vop_bmap = ufs_bmap,
89 .vop_strategy = ufs_strategy,
90 .vop_print = ufs_print,
91 .vop_islocked = ufs_islocked,
92 .vop_pathconf = ufs_pathconf,
93 .vop_advlock = ufs_advlock,
94 .vop_bwrite = vop_generic_bwrite
95 };
96
97 const struct vops ffs_specvops = {
98 .vop_close = ufsspec_close,
99 .vop_access = ufs_access,
100 .vop_getattr = ufs_getattr,
101 .vop_setattr = ufs_setattr,
102 .vop_read = ufsspec_read,
103 .vop_write = ufsspec_write,
104 .vop_fsync = ffs_fsync,
105 .vop_inactive = ufs_inactive,
106 .vop_reclaim = ffs_reclaim,
107 .vop_lock = ufs_lock,
108 .vop_unlock = ufs_unlock,
109 .vop_print = ufs_print,
110 .vop_islocked = ufs_islocked,
111
112 /* XXX: Keep in sync with spec_vops */
113 .vop_lookup = vop_generic_lookup,
114 .vop_create = vop_generic_badop,
115 .vop_mknod = vop_generic_badop,
116 .vop_open = spec_open,
117 .vop_ioctl = spec_ioctl,
118 .vop_kqfilter = spec_kqfilter,
119 .vop_revoke = vop_generic_revoke,
120 .vop_remove = vop_generic_badop,
121 .vop_link = vop_generic_badop,
122 .vop_rename = vop_generic_badop,
123 .vop_mkdir = vop_generic_badop,
124 .vop_rmdir = vop_generic_badop,
125 .vop_symlink = vop_generic_badop,
126 .vop_readdir = vop_generic_badop,
127 .vop_readlink = vop_generic_badop,
128 .vop_abortop = vop_generic_badop,
129 .vop_bmap = vop_generic_bmap,
130 .vop_strategy = spec_strategy,
131 .vop_pathconf = spec_pathconf,
132 .vop_advlock = spec_advlock,
133 .vop_bwrite = vop_generic_bwrite,
134 };
135
136 #ifdef FIFO
137 const struct vops ffs_fifovops = {
138 .vop_close = ufsfifo_close,
139 .vop_access = ufs_access,
140 .vop_getattr = ufs_getattr,
141 .vop_setattr = ufs_setattr,
142 .vop_read = ufsfifo_read,
143 .vop_write = ufsfifo_write,
144 .vop_fsync = ffs_fsync,
145 .vop_inactive = ufs_inactive,
146 .vop_reclaim = ffsfifo_reclaim,
147 .vop_lock = ufs_lock,
148 .vop_unlock = ufs_unlock,
149 .vop_print = ufs_print,
150 .vop_islocked = ufs_islocked,
151 .vop_bwrite = vop_generic_bwrite,
152
153 /* XXX: Keep in sync with fifo_vops */
154 .vop_lookup = vop_generic_lookup,
155 .vop_create = vop_generic_badop,
156 .vop_mknod = vop_generic_badop,
157 .vop_open = fifo_open,
158 .vop_ioctl = fifo_ioctl,
159 .vop_kqfilter = fifo_kqfilter,
160 .vop_revoke = vop_generic_revoke,
161 .vop_remove = vop_generic_badop,
162 .vop_link = vop_generic_badop,
163 .vop_rename = vop_generic_badop,
164 .vop_mkdir = vop_generic_badop,
165 .vop_rmdir = vop_generic_badop,
166 .vop_symlink = vop_generic_badop,
167 .vop_readdir = vop_generic_badop,
168 .vop_readlink = vop_generic_badop,
169 .vop_abortop = vop_generic_badop,
170 .vop_bmap = vop_generic_bmap,
171 .vop_strategy = vop_generic_badop,
172 .vop_pathconf = fifo_pathconf,
173 .vop_advlock = fifo_advlock
174 };
175 #endif /* FIFO */
176
177 /*
178 * Vnode op for reading.
179 */
180 int
181 ffs_read(void *v)
182 {
183 struct vop_read_args *ap = v;
184 struct vnode *vp;
185 struct inode *ip;
186 struct uio *uio;
187 struct fs *fs;
188 struct buf *bp;
189 daddr_t lbn, nextlbn;
190 off_t bytesinfile;
191 int size, xfersize, blkoffset;
192 mode_t mode;
193 int error;
194
195 vp = ap->a_vp;
196 ip = VTOI(vp);
197 mode = DIP(ip, mode);
198 uio = ap->a_uio;
199
200 #ifdef DIAGNOSTIC
201 if (uio->uio_rw != UIO_READ)
202 panic("ffs_read: mode");
203
204 if (vp->v_type == VLNK) {
205 if (DIP(ip, size) < ip->i_ump->um_maxsymlinklen ||
206 (ip->i_ump->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0))
207 panic("ffs_read: short symlink");
208 } else if (vp->v_type != VREG && vp->v_type != VDIR)
209 panic("ffs_read: type %d", vp->v_type);
210 #endif
211 fs = ip->i_fs;
212 if (uio->uio_offset < 0)
213 return (EINVAL);
214 if (uio->uio_resid == 0)
215 return (0);
216
217 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
218 if ((bytesinfile = DIP(ip, size) - uio->uio_offset) <= 0)
219 break;
220 lbn = lblkno(fs, uio->uio_offset);
221 nextlbn = lbn + 1;
222 size = fs->fs_bsize; /* WAS blksize(fs, ip, lbn); */
223 blkoffset = blkoff(fs, uio->uio_offset);
224 xfersize = fs->fs_bsize - blkoffset;
225 if (uio->uio_resid < xfersize)
226 xfersize = uio->uio_resid;
227 if (bytesinfile < xfersize)
228 xfersize = bytesinfile;
229
230 if (lblktosize(fs, nextlbn) >= DIP(ip, size))
231 error = bread(vp, lbn, size, &bp);
232 else if (lbn - 1 == ip->i_ci.ci_lastr ||
233 uio->uio_resid > xfersize) {
234 error = bread_cluster(vp, lbn, size, &bp);
235 } else
236 error = bread(vp, lbn, size, &bp);
237
238 if (error)
239 break;
240 ip->i_ci.ci_lastr = lbn;
241
242 /*
243 * We should only get non-zero b_resid when an I/O error
244 * has occurred, which should cause us to break above.
245 * However, if the short read did not cause an error,
246 * then we want to ensure that we do not uiomove bad
247 * or uninitialized data.
248 */
249 size -= bp->b_resid;
250 if (size < xfersize) {
251 if (size == 0)
252 break;
253 xfersize = size;
254 }
255 error = uiomove(bp->b_data + blkoffset, xfersize, uio);
256 if (error)
257 break;
258 brelse(bp);
259 }
260 if (bp != NULL)
261 brelse(bp);
262 if (!(vp->v_mount->mnt_flag & MNT_NOATIME) ||
263 (ip->i_flag & (IN_CHANGE | IN_UPDATE))) {
264 ip->i_flag |= IN_ACCESS;
265 }
266 return (error);
267 }
268
269 /*
270 * Vnode op for writing.
271 */
272 int
273 ffs_write(void *v)
274 {
275 struct vop_write_args *ap = v;
276 struct vnode *vp;
277 struct uio *uio;
278 struct inode *ip;
279 struct fs *fs;
280 struct buf *bp;
281 daddr_t lbn;
282 off_t osize;
283 int blkoffset, error, extended, flags, ioflag, size, xfersize;
284 size_t resid;
285 ssize_t overrun;
286
287 extended = 0;
288 ioflag = ap->a_ioflag;
289 uio = ap->a_uio;
290 vp = ap->a_vp;
291 ip = VTOI(vp);
292
293 #ifdef DIAGNOSTIC
294 if (uio->uio_rw != UIO_WRITE)
295 panic("ffs_write: mode");
296 #endif
297
298 /*
299 * If writing 0 bytes, succeed and do not change
300 * update time or file offset (standards compliance)
301 */
302 if (uio->uio_resid == 0)
303 return (0);
304
305 switch (vp->v_type) {
306 case VREG:
307 if (ioflag & IO_APPEND)
308 uio->uio_offset = DIP(ip, size);
309 if ((DIP(ip, flags) & APPEND) && uio->uio_offset != DIP(ip, size))
310 return (EPERM);
311 /* FALLTHROUGH */
312 case VLNK:
313 break;
314 case VDIR:
315 if ((ioflag & IO_SYNC) == 0)
316 panic("ffs_write: nonsync dir write");
317 break;
318 default:
319 panic("ffs_write: type %d", vp->v_type);
320 }
321
322 fs = ip->i_fs;
323 if (uio->uio_offset < 0 ||
324 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
325 return (EFBIG);
326
327 /* do the filesize rlimit check */
328 if ((error = vn_fsizechk(vp, uio, ioflag, &overrun)))
329 return (error);
330
331 resid = uio->uio_resid;
332 osize = DIP(ip, size);
333 flags = ioflag & IO_SYNC ? B_SYNC : 0;
334
335 for (error = 0; uio->uio_resid > 0;) {
336 lbn = lblkno(fs, uio->uio_offset);
337 blkoffset = blkoff(fs, uio->uio_offset);
338 xfersize = fs->fs_bsize - blkoffset;
339 if (uio->uio_resid < xfersize)
340 xfersize = uio->uio_resid;
341 if (fs->fs_bsize > xfersize)
342 flags |= B_CLRBUF;
343 else
344 flags &= ~B_CLRBUF;
345
346 if ((error = UFS_BUF_ALLOC(ip, uio->uio_offset, xfersize,
347 ap->a_cred, flags, &bp)) != 0)
348 break;
349 if (uio->uio_offset + xfersize > DIP(ip, size)) {
350 DIP_ASSIGN(ip, size, uio->uio_offset + xfersize);
351 uvm_vnp_setsize(vp, DIP(ip, size));
352 extended = 1;
353 }
354 (void)uvm_vnp_uncache(vp);
355
356 size = blksize(fs, ip, lbn) - bp->b_resid;
357 if (size < xfersize)
358 xfersize = size;
359
360 error = uiomove(bp->b_data + blkoffset, xfersize, uio);
361 /*
362 * If the buffer is not already filled and we encounter an
363 * error while trying to fill it, we have to clear out any
364 * garbage data from the pages instantiated for the buffer.
365 * If we do not, a failed uiomove() during a write can leave
366 * the prior contents of the pages exposed to a userland mmap.
367 *
368 * Note that we don't need to clear buffers that were
369 * allocated with the B_CLRBUF flag set.
370 */
371 if (error != 0 && !(flags & B_CLRBUF))
372 memset(bp->b_data + blkoffset, 0, xfersize);
373
374 if (ioflag & IO_NOCACHE)
375 bp->b_flags |= B_NOCACHE;
376
377 if (ioflag & IO_SYNC)
378 (void)bwrite(bp);
379 else if (xfersize + blkoffset == fs->fs_bsize) {
380 bawrite(bp);
381 } else
382 bdwrite(bp);
383
384 if (error || xfersize == 0)
385 break;
386 ip->i_flag |= IN_CHANGE | IN_UPDATE;
387 }
388 /*
389 * If we successfully wrote any data, and we are not the superuser
390 * we clear the setuid and setgid bits as a precaution against
391 * tampering.
392 */
393 if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0 &&
394 !vnoperm(vp))
395 DIP_ASSIGN(ip, mode, DIP(ip, mode) & ~(ISUID | ISGID));
396 if (resid > uio->uio_resid)
397 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
398 if (error) {
399 if (ioflag & IO_UNIT) {
400 (void)UFS_TRUNCATE(ip, osize,
401 ioflag & IO_SYNC, ap->a_cred);
402 uio->uio_offset -= resid - uio->uio_resid;
403 uio->uio_resid = resid;
404 }
405 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
406 error = UFS_UPDATE(ip, 1);
407 }
408 /* correct the result for writes clamped by vn_fsizechk() */
409 uio->uio_resid += overrun;
410 return (error);
411 }
412
413 /*
414 * Synch an open file.
415 */
416 int
417 ffs_fsync(void *v)
418 {
419 struct vop_fsync_args *ap = v;
420 struct vnode *vp = ap->a_vp;
421 struct buf *bp, *nbp;
422 int s, error, passes, skipmeta;
423
424 if (vp->v_type == VBLK &&
425 vp->v_specmountpoint != NULL &&
426 (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP))
427 softdep_fsync_mountdev(vp, ap->a_waitfor);
428
429 /*
430 * Flush all dirty buffers associated with a vnode.
431 */
432 passes = NIADDR + 1;
433 skipmeta = 0;
434 if (ap->a_waitfor == MNT_WAIT)
435 skipmeta = 1;
436 s = splbio();
437 loop:
438 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
439 bp->b_flags &= ~B_SCANNED;
440 }
441 LIST_FOREACH_SAFE(bp, &vp->v_dirtyblkhd, b_vnbufs, nbp) {
442 /*
443 * Reasons to skip this buffer: it has already been considered
444 * on this pass, this pass is the first time through on a
445 * synchronous flush request and the buffer being considered
446 * is metadata, the buffer has dependencies that will cause
447 * it to be redirtied and it has not already been deferred,
448 * or it is already being written.
449 */
450 if (bp->b_flags & (B_BUSY | B_SCANNED))
451 continue;
452 if ((bp->b_flags & B_DELWRI) == 0)
453 panic("ffs_fsync: not dirty");
454 if (skipmeta && bp->b_lblkno < 0)
455 continue;
456 if (ap->a_waitfor != MNT_WAIT &&
457 LIST_FIRST(&bp->b_dep) != NULL &&
458 (bp->b_flags & B_DEFERRED) == 0 &&
459 buf_countdeps(bp, 0, 1)) {
460 bp->b_flags |= B_DEFERRED;
461 continue;
462 }
463
464 bremfree(bp);
465 buf_acquire(bp);
466 bp->b_flags |= B_SCANNED;
467 splx(s);
468 /*
469 * On our final pass through, do all I/O synchronously
470 * so that we can find out if our flush is failing
471 * because of write errors.
472 */
473 if (passes > 0 || ap->a_waitfor != MNT_WAIT)
474 (void) bawrite(bp);
475 else if ((error = bwrite(bp)) != 0)
476 return (error);
477 s = splbio();
478 /*
479 * Since we may have slept during the I/O, we need
480 * to start from a known point.
481 */
482 nbp = LIST_FIRST(&vp->v_dirtyblkhd);
483 }
484 if (skipmeta) {
485 skipmeta = 0;
486 goto loop;
487 }
488 if (ap->a_waitfor == MNT_WAIT) {
489 vwaitforio(vp, 0, "ffs_fsync", INFSLP);
490
491 /*
492 * Ensure that any filesystem metadata associated
493 * with the vnode has been written.
494 */
495 splx(s);
496 if ((error = softdep_sync_metadata(ap)) != 0)
497 return (error);
498 s = splbio();
499 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
500 /*
501 * Block devices associated with filesystems may
502 * have new I/O requests posted for them even if
503 * the vnode is locked, so no amount of trying will
504 * get them clean. Thus we give block devices a
505 * good effort, then just give up. For all other file
506 * types, go around and try again until it is clean.
507 */
508 if (passes > 0) {
509 passes -= 1;
510 goto loop;
511 }
512 #ifdef DIAGNOSTIC
513 if (vp->v_type != VBLK)
514 vprint("ffs_fsync: dirty", vp);
515 #endif
516 }
517 }
518 splx(s);
519 return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT));
520 }
521
522 /*
523 * Reclaim an inode so that it can be used for other purposes.
524 */
525 int
526 ffs_reclaim(void *v)
527 {
528 struct vop_reclaim_args *ap = v;
529 struct vnode *vp = ap->a_vp;
530 struct inode *ip = VTOI(vp);
531 int error;
532
533 if ((error = ufs_reclaim(vp)) != 0)
534 return (error);
535
536 if (ip->i_din1 != NULL) {
537 #ifdef FFS2
538 if (ip->i_ump->um_fstype == UM_UFS2)
539 pool_put(&ffs_dinode2_pool, ip->i_din2);
540 else
541 #endif
542 pool_put(&ffs_dinode1_pool, ip->i_din1);
543 }
544
545 pool_put(&ffs_ino_pool, ip);
546
547 vp->v_data = NULL;
548
549 return (0);
550 }
551
552 #ifdef FIFO
553 int
554 ffsfifo_reclaim(void *v)
555 {
556 fifo_reclaim(v);
557 return (ffs_reclaim(v));
558 }
559 #endif
Cache object: 292e1c655afcbf1af3f0d45766bc5bec
|