1 /*-
2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
60 * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
61 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95
62 */
63
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
66
67 #include <sys/param.h>
68 #include <sys/bio.h>
69 #include <sys/systm.h>
70 #include <sys/buf.h>
71 #include <sys/conf.h>
72 #include <sys/extattr.h>
73 #include <sys/kernel.h>
74 #include <sys/limits.h>
75 #include <sys/malloc.h>
76 #include <sys/mount.h>
77 #include <sys/priv.h>
78 #include <sys/proc.h>
79 #include <sys/resourcevar.h>
80 #include <sys/signalvar.h>
81 #include <sys/stat.h>
82 #include <sys/vmmeter.h>
83 #include <sys/vnode.h>
84
85 #include <vm/vm.h>
86 #include <vm/vm_extern.h>
87 #include <vm/vm_object.h>
88 #include <vm/vm_page.h>
89 #include <vm/vm_pager.h>
90 #include <vm/vnode_pager.h>
91
92 #include <ufs/ufs/extattr.h>
93 #include <ufs/ufs/quota.h>
94 #include <ufs/ufs/inode.h>
95 #include <ufs/ufs/ufs_extern.h>
96 #include <ufs/ufs/ufsmount.h>
97
98 #include <ufs/ffs/fs.h>
99 #include <ufs/ffs/ffs_extern.h>
100 #include "opt_directio.h"
101 #include "opt_ffs.h"
102
103 #ifdef DIRECTIO
104 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
105 #endif
106 static vop_fsync_t ffs_fsync;
107 static vop_lock1_t ffs_lock;
108 static vop_getpages_t ffs_getpages;
109 static vop_read_t ffs_read;
110 static vop_write_t ffs_write;
111 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
112 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
113 struct ucred *cred);
114 static vop_strategy_t ffsext_strategy;
115 static vop_closeextattr_t ffs_closeextattr;
116 static vop_deleteextattr_t ffs_deleteextattr;
117 static vop_getextattr_t ffs_getextattr;
118 static vop_listextattr_t ffs_listextattr;
119 static vop_openextattr_t ffs_openextattr;
120 static vop_setextattr_t ffs_setextattr;
121 static vop_vptofh_t ffs_vptofh;
122
123
124 /* Global vfs data structures for ufs. */
125 struct vop_vector ffs_vnodeops1 = {
126 .vop_default = &ufs_vnodeops,
127 .vop_fsync = ffs_fsync,
128 .vop_getpages = ffs_getpages,
129 .vop_lock1 = ffs_lock,
130 .vop_read = ffs_read,
131 .vop_reallocblks = ffs_reallocblks,
132 .vop_write = ffs_write,
133 .vop_vptofh = ffs_vptofh,
134 };
135
136 struct vop_vector ffs_fifoops1 = {
137 .vop_default = &ufs_fifoops,
138 .vop_fsync = ffs_fsync,
139 .vop_reallocblks = ffs_reallocblks, /* XXX: really ??? */
140 .vop_vptofh = ffs_vptofh,
141 };
142
143 /* Global vfs data structures for ufs. */
144 struct vop_vector ffs_vnodeops2 = {
145 .vop_default = &ufs_vnodeops,
146 .vop_fsync = ffs_fsync,
147 .vop_getpages = ffs_getpages,
148 .vop_lock1 = ffs_lock,
149 .vop_read = ffs_read,
150 .vop_reallocblks = ffs_reallocblks,
151 .vop_write = ffs_write,
152 .vop_closeextattr = ffs_closeextattr,
153 .vop_deleteextattr = ffs_deleteextattr,
154 .vop_getextattr = ffs_getextattr,
155 .vop_listextattr = ffs_listextattr,
156 .vop_openextattr = ffs_openextattr,
157 .vop_setextattr = ffs_setextattr,
158 .vop_vptofh = ffs_vptofh,
159 };
160
161 struct vop_vector ffs_fifoops2 = {
162 .vop_default = &ufs_fifoops,
163 .vop_fsync = ffs_fsync,
164 .vop_lock1 = ffs_lock,
165 .vop_reallocblks = ffs_reallocblks,
166 .vop_strategy = ffsext_strategy,
167 .vop_closeextattr = ffs_closeextattr,
168 .vop_deleteextattr = ffs_deleteextattr,
169 .vop_getextattr = ffs_getextattr,
170 .vop_listextattr = ffs_listextattr,
171 .vop_openextattr = ffs_openextattr,
172 .vop_setextattr = ffs_setextattr,
173 .vop_vptofh = ffs_vptofh,
174 };
175
176 /*
177 * Synch an open file.
178 */
179 /* ARGSUSED */
180 static int
181 ffs_fsync(struct vop_fsync_args *ap)
182 {
183 int error;
184
185 error = ffs_syncvnode(ap->a_vp, ap->a_waitfor);
186 if (error)
187 return (error);
188 if (ap->a_waitfor == MNT_WAIT &&
189 (ap->a_vp->v_mount->mnt_flag & MNT_SOFTDEP))
190 error = softdep_fsync(ap->a_vp);
191 return (error);
192 }
193
194 int
195 ffs_syncvnode(struct vnode *vp, int waitfor)
196 {
197 struct inode *ip = VTOI(vp);
198 struct buf *bp;
199 struct buf *nbp;
200 int s, error, wait, passes, skipmeta;
201 ufs_lbn_t lbn;
202
203 wait = (waitfor == MNT_WAIT);
204 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
205
206 /*
207 * Flush all dirty buffers associated with a vnode.
208 */
209 passes = NIADDR + 1;
210 skipmeta = 0;
211 if (wait)
212 skipmeta = 1;
213 s = splbio();
214 VI_LOCK(vp);
215 loop:
216 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs)
217 bp->b_vflags &= ~BV_SCANNED;
218 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
219 /*
220 * Reasons to skip this buffer: it has already been considered
221 * on this pass, this pass is the first time through on a
222 * synchronous flush request and the buffer being considered
223 * is metadata, the buffer has dependencies that will cause
224 * it to be redirtied and it has not already been deferred,
225 * or it is already being written.
226 */
227 if ((bp->b_vflags & BV_SCANNED) != 0)
228 continue;
229 bp->b_vflags |= BV_SCANNED;
230 if ((skipmeta == 1 && bp->b_lblkno < 0))
231 continue;
232 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
233 continue;
234 VI_UNLOCK(vp);
235 if (!wait && !LIST_EMPTY(&bp->b_dep) &&
236 (bp->b_flags & B_DEFERRED) == 0 &&
237 buf_countdeps(bp, 0)) {
238 bp->b_flags |= B_DEFERRED;
239 BUF_UNLOCK(bp);
240 VI_LOCK(vp);
241 continue;
242 }
243 if ((bp->b_flags & B_DELWRI) == 0)
244 panic("ffs_fsync: not dirty");
245 /*
246 * If this is a synchronous flush request, or it is not a
247 * file or device, start the write on this buffer immediatly.
248 */
249 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) {
250
251 /*
252 * On our final pass through, do all I/O synchronously
253 * so that we can find out if our flush is failing
254 * because of write errors.
255 */
256 if (passes > 0 || !wait) {
257 if ((bp->b_flags & B_CLUSTEROK) && !wait) {
258 (void) vfs_bio_awrite(bp);
259 } else {
260 bremfree(bp);
261 splx(s);
262 (void) bawrite(bp);
263 s = splbio();
264 }
265 } else {
266 bremfree(bp);
267 splx(s);
268 if ((error = bwrite(bp)) != 0)
269 return (error);
270 s = splbio();
271 }
272 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) {
273 /*
274 * If the buffer is for data that has been truncated
275 * off the file, then throw it away.
276 */
277 bremfree(bp);
278 bp->b_flags |= B_INVAL | B_NOCACHE;
279 splx(s);
280 brelse(bp);
281 s = splbio();
282 } else
283 vfs_bio_awrite(bp);
284
285 /*
286 * Since we may have slept during the I/O, we need
287 * to start from a known point.
288 */
289 VI_LOCK(vp);
290 nbp = TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd);
291 }
292 /*
293 * If we were asked to do this synchronously, then go back for
294 * another pass, this time doing the metadata.
295 */
296 if (skipmeta) {
297 skipmeta = 0;
298 goto loop;
299 }
300
301 if (wait) {
302 bufobj_wwait(&vp->v_bufobj, 3, 0);
303 VI_UNLOCK(vp);
304
305 /*
306 * Ensure that any filesystem metatdata associated
307 * with the vnode has been written.
308 */
309 splx(s);
310 if ((error = softdep_sync_metadata(vp)) != 0)
311 return (error);
312 s = splbio();
313
314 VI_LOCK(vp);
315 if (vp->v_bufobj.bo_dirty.bv_cnt > 0) {
316 /*
317 * Block devices associated with filesystems may
318 * have new I/O requests posted for them even if
319 * the vnode is locked, so no amount of trying will
320 * get them clean. Thus we give block devices a
321 * good effort, then just give up. For all other file
322 * types, go around and try again until it is clean.
323 */
324 if (passes > 0) {
325 passes -= 1;
326 goto loop;
327 }
328 #ifdef INVARIANTS
329 if (!vn_isdisk(vp, NULL))
330 vprint("ffs_fsync: dirty", vp);
331 #endif
332 }
333 }
334 VI_UNLOCK(vp);
335 splx(s);
336 return (ffs_update(vp, wait));
337 }
338
339 static int
340 ffs_lock(ap)
341 struct vop_lock1_args /* {
342 struct vnode *a_vp;
343 int a_flags;
344 struct thread *a_td;
345 char *file;
346 int line;
347 } */ *ap;
348 {
349 #ifndef NO_FFS_SNAPSHOT
350 struct vnode *vp;
351 int flags;
352 struct lock *lkp;
353 int result;
354
355 switch (ap->a_flags & LK_TYPE_MASK) {
356 case LK_SHARED:
357 case LK_UPGRADE:
358 case LK_EXCLUSIVE:
359 vp = ap->a_vp;
360 flags = ap->a_flags;
361 for (;;) {
362 /*
363 * vnode interlock must be held to ensure that
364 * the possibly external lock isn't freed,
365 * e.g. when mutating from snapshot file vnode
366 * to regular file vnode.
367 */
368 if ((flags & LK_INTERLOCK) == 0) {
369 VI_LOCK(vp);
370 flags |= LK_INTERLOCK;
371 }
372 #ifdef DEBUG_VFS_LOCKS
373 KASSERT(vp->v_holdcnt != 0,
374 ("ffs_lock %p: zero hold count", vp));
375 #endif
376 lkp = vp->v_vnlock;
377 result = _lockmgr(lkp, flags, VI_MTX(vp), ap->a_td, ap->a_file, ap->a_line);
378 if (lkp == vp->v_vnlock || result != 0)
379 break;
380 /*
381 * Apparent success, except that the vnode
382 * mutated between snapshot file vnode and
383 * regular file vnode while this process
384 * slept. The lock currently held is not the
385 * right lock. Release it, and try to get the
386 * new lock.
387 */
388 (void) _lockmgr(lkp, LK_RELEASE, VI_MTX(vp), ap->a_td, ap->a_file, ap->a_line);
389 if ((flags & LK_TYPE_MASK) == LK_UPGRADE)
390 flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
391 flags &= ~LK_INTERLOCK;
392 }
393 break;
394 default:
395 result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
396 }
397 return (result);
398 #else
399 return (VOP_LOCK1_APV(&ufs_vnodeops, ap));
400 #endif
401 }
402
403 /*
404 * Vnode op for reading.
405 */
406 /* ARGSUSED */
407 static int
408 ffs_read(ap)
409 struct vop_read_args /* {
410 struct vnode *a_vp;
411 struct uio *a_uio;
412 int a_ioflag;
413 struct ucred *a_cred;
414 } */ *ap;
415 {
416 struct vnode *vp;
417 struct inode *ip;
418 struct uio *uio;
419 struct fs *fs;
420 struct buf *bp;
421 ufs_lbn_t lbn, nextlbn;
422 off_t bytesinfile;
423 long size, xfersize, blkoffset;
424 int error, orig_resid;
425 int seqcount;
426 int ioflag;
427
428 vp = ap->a_vp;
429 uio = ap->a_uio;
430 ioflag = ap->a_ioflag;
431 if (ap->a_ioflag & IO_EXT)
432 #ifdef notyet
433 return (ffs_extread(vp, uio, ioflag));
434 #else
435 panic("ffs_read+IO_EXT");
436 #endif
437 #ifdef DIRECTIO
438 if ((ioflag & IO_DIRECT) != 0) {
439 int workdone;
440
441 error = ffs_rawread(vp, uio, &workdone);
442 if (error != 0 || workdone != 0)
443 return error;
444 }
445 #endif
446
447 seqcount = ap->a_ioflag >> IO_SEQSHIFT;
448 ip = VTOI(vp);
449
450 #ifdef INVARIANTS
451 if (uio->uio_rw != UIO_READ)
452 panic("ffs_read: mode");
453
454 if (vp->v_type == VLNK) {
455 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
456 panic("ffs_read: short symlink");
457 } else if (vp->v_type != VREG && vp->v_type != VDIR)
458 panic("ffs_read: type %d", vp->v_type);
459 #endif
460 orig_resid = uio->uio_resid;
461 KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0"));
462 if (orig_resid == 0)
463 return (0);
464 KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0"));
465 fs = ip->i_fs;
466 if (uio->uio_offset < ip->i_size &&
467 uio->uio_offset >= fs->fs_maxfilesize)
468 return (EOVERFLOW);
469
470 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
471 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
472 break;
473 lbn = lblkno(fs, uio->uio_offset);
474 nextlbn = lbn + 1;
475
476 /*
477 * size of buffer. The buffer representing the
478 * end of the file is rounded up to the size of
479 * the block type ( fragment or full block,
480 * depending ).
481 */
482 size = blksize(fs, ip, lbn);
483 blkoffset = blkoff(fs, uio->uio_offset);
484
485 /*
486 * The amount we want to transfer in this iteration is
487 * one FS block less the amount of the data before
488 * our startpoint (duh!)
489 */
490 xfersize = fs->fs_bsize - blkoffset;
491
492 /*
493 * But if we actually want less than the block,
494 * or the file doesn't have a whole block more of data,
495 * then use the lesser number.
496 */
497 if (uio->uio_resid < xfersize)
498 xfersize = uio->uio_resid;
499 if (bytesinfile < xfersize)
500 xfersize = bytesinfile;
501
502 if (lblktosize(fs, nextlbn) >= ip->i_size) {
503 /*
504 * Don't do readahead if this is the end of the file.
505 */
506 error = bread(vp, lbn, size, NOCRED, &bp);
507 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
508 /*
509 * Otherwise if we are allowed to cluster,
510 * grab as much as we can.
511 *
512 * XXX This may not be a win if we are not
513 * doing sequential access.
514 */
515 error = cluster_read(vp, ip->i_size, lbn,
516 size, NOCRED, blkoffset + uio->uio_resid, seqcount, &bp);
517 } else if (seqcount > 1) {
518 /*
519 * If we are NOT allowed to cluster, then
520 * if we appear to be acting sequentially,
521 * fire off a request for a readahead
522 * as well as a read. Note that the 4th and 5th
523 * arguments point to arrays of the size specified in
524 * the 6th argument.
525 */
526 int nextsize = blksize(fs, ip, nextlbn);
527 error = breadn(vp, lbn,
528 size, &nextlbn, &nextsize, 1, NOCRED, &bp);
529 } else {
530 /*
531 * Failing all of the above, just read what the
532 * user asked for. Interestingly, the same as
533 * the first option above.
534 */
535 error = bread(vp, lbn, size, NOCRED, &bp);
536 }
537 if (error) {
538 brelse(bp);
539 bp = NULL;
540 break;
541 }
542
543 /*
544 * If IO_DIRECT then set B_DIRECT for the buffer. This
545 * will cause us to attempt to release the buffer later on
546 * and will cause the buffer cache to attempt to free the
547 * underlying pages.
548 */
549 if (ioflag & IO_DIRECT)
550 bp->b_flags |= B_DIRECT;
551
552 /*
553 * We should only get non-zero b_resid when an I/O error
554 * has occurred, which should cause us to break above.
555 * However, if the short read did not cause an error,
556 * then we want to ensure that we do not uiomove bad
557 * or uninitialized data.
558 */
559 size -= bp->b_resid;
560 if (size < xfersize) {
561 if (size == 0)
562 break;
563 xfersize = size;
564 }
565
566 error = uiomove((char *)bp->b_data + blkoffset,
567 (int)xfersize, uio);
568 if (error)
569 break;
570
571 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
572 (LIST_EMPTY(&bp->b_dep))) {
573 /*
574 * If there are no dependencies, and it's VMIO,
575 * then we don't need the buf, mark it available
576 * for freeing. The VM has the data.
577 */
578 bp->b_flags |= B_RELBUF;
579 brelse(bp);
580 } else {
581 /*
582 * Otherwise let whoever
583 * made the request take care of
584 * freeing it. We just queue
585 * it onto another list.
586 */
587 bqrelse(bp);
588 }
589 }
590
591 /*
592 * This can only happen in the case of an error
593 * because the loop above resets bp to NULL on each iteration
594 * and on normal completion has not set a new value into it.
595 * so it must have come from a 'break' statement
596 */
597 if (bp != NULL) {
598 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
599 (LIST_EMPTY(&bp->b_dep))) {
600 bp->b_flags |= B_RELBUF;
601 brelse(bp);
602 } else {
603 bqrelse(bp);
604 }
605 }
606
607 if ((error == 0 || uio->uio_resid != orig_resid) &&
608 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
609 VI_LOCK(vp);
610 ip->i_flag |= IN_ACCESS;
611 VI_UNLOCK(vp);
612 }
613 return (error);
614 }
615
616 /*
617 * Vnode op for writing.
618 */
619 static int
620 ffs_write(ap)
621 struct vop_write_args /* {
622 struct vnode *a_vp;
623 struct uio *a_uio;
624 int a_ioflag;
625 struct ucred *a_cred;
626 } */ *ap;
627 {
628 struct vnode *vp;
629 struct uio *uio;
630 struct inode *ip;
631 struct fs *fs;
632 struct buf *bp;
633 struct thread *td;
634 ufs_lbn_t lbn;
635 off_t osize;
636 int seqcount;
637 int blkoffset, error, flags, ioflag, resid, size, xfersize;
638
639 vp = ap->a_vp;
640 uio = ap->a_uio;
641 ioflag = ap->a_ioflag;
642 if (ap->a_ioflag & IO_EXT)
643 #ifdef notyet
644 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
645 #else
646 panic("ffs_write+IO_EXT");
647 #endif
648
649 seqcount = ap->a_ioflag >> IO_SEQSHIFT;
650 ip = VTOI(vp);
651
652 #ifdef INVARIANTS
653 if (uio->uio_rw != UIO_WRITE)
654 panic("ffs_write: mode");
655 #endif
656
657 switch (vp->v_type) {
658 case VREG:
659 if (ioflag & IO_APPEND)
660 uio->uio_offset = ip->i_size;
661 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
662 return (EPERM);
663 /* FALLTHROUGH */
664 case VLNK:
665 break;
666 case VDIR:
667 panic("ffs_write: dir write");
668 break;
669 default:
670 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
671 (int)uio->uio_offset,
672 (int)uio->uio_resid
673 );
674 }
675
676 KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0"));
677 KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0"));
678 fs = ip->i_fs;
679 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
680 return (EFBIG);
681 /*
682 * Maybe this should be above the vnode op call, but so long as
683 * file servers have no limits, I don't think it matters.
684 */
685 td = uio->uio_td;
686 if (vp->v_type == VREG && td != NULL) {
687 PROC_LOCK(td->td_proc);
688 if (uio->uio_offset + uio->uio_resid >
689 lim_cur(td->td_proc, RLIMIT_FSIZE)) {
690 psignal(td->td_proc, SIGXFSZ);
691 PROC_UNLOCK(td->td_proc);
692 return (EFBIG);
693 }
694 PROC_UNLOCK(td->td_proc);
695 }
696
697 resid = uio->uio_resid;
698 osize = ip->i_size;
699 if (seqcount > BA_SEQMAX)
700 flags = BA_SEQMAX << BA_SEQSHIFT;
701 else
702 flags = seqcount << BA_SEQSHIFT;
703 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
704 flags |= IO_SYNC;
705
706 for (error = 0; uio->uio_resid > 0;) {
707 lbn = lblkno(fs, uio->uio_offset);
708 blkoffset = blkoff(fs, uio->uio_offset);
709 xfersize = fs->fs_bsize - blkoffset;
710 if (uio->uio_resid < xfersize)
711 xfersize = uio->uio_resid;
712 if (uio->uio_offset + xfersize > ip->i_size)
713 vnode_pager_setsize(vp, uio->uio_offset + xfersize);
714
715 /*
716 * We must perform a read-before-write if the transfer size
717 * does not cover the entire buffer.
718 */
719 if (fs->fs_bsize > xfersize)
720 flags |= BA_CLRBUF;
721 else
722 flags &= ~BA_CLRBUF;
723 /* XXX is uio->uio_offset the right thing here? */
724 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
725 ap->a_cred, flags, &bp);
726 if (error != 0)
727 break;
728 /*
729 * If the buffer is not valid we have to clear out any
730 * garbage data from the pages instantiated for the buffer.
731 * If we do not, a failed uiomove() during a write can leave
732 * the prior contents of the pages exposed to a userland
733 * mmap(). XXX deal with uiomove() errors a better way.
734 */
735 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
736 vfs_bio_clrbuf(bp);
737 if (ioflag & IO_DIRECT)
738 bp->b_flags |= B_DIRECT;
739 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
740 bp->b_flags |= B_NOCACHE;
741
742 if (uio->uio_offset + xfersize > ip->i_size) {
743 ip->i_size = uio->uio_offset + xfersize;
744 DIP_SET(ip, i_size, ip->i_size);
745 }
746
747 size = blksize(fs, ip, lbn) - bp->b_resid;
748 if (size < xfersize)
749 xfersize = size;
750
751 error =
752 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
753 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
754 (LIST_EMPTY(&bp->b_dep))) {
755 bp->b_flags |= B_RELBUF;
756 }
757
758 /*
759 * If IO_SYNC each buffer is written synchronously. Otherwise
760 * if we have a severe page deficiency write the buffer
761 * asynchronously. Otherwise try to cluster, and if that
762 * doesn't do it then either do an async write (if O_DIRECT),
763 * or a delayed write (if not).
764 */
765 if (ioflag & IO_SYNC) {
766 (void)bwrite(bp);
767 } else if (vm_page_count_severe() ||
768 buf_dirty_count_severe() ||
769 (ioflag & IO_ASYNC)) {
770 bp->b_flags |= B_CLUSTEROK;
771 bawrite(bp);
772 } else if (xfersize + blkoffset == fs->fs_bsize) {
773 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
774 bp->b_flags |= B_CLUSTEROK;
775 cluster_write(vp, bp, ip->i_size, seqcount);
776 } else {
777 bawrite(bp);
778 }
779 } else if (ioflag & IO_DIRECT) {
780 bp->b_flags |= B_CLUSTEROK;
781 bawrite(bp);
782 } else {
783 bp->b_flags |= B_CLUSTEROK;
784 bdwrite(bp);
785 }
786 if (error || xfersize == 0)
787 break;
788 ip->i_flag |= IN_CHANGE | IN_UPDATE;
789 }
790 /*
791 * If we successfully wrote any data, and we are not the superuser
792 * we clear the setuid and setgid bits as a precaution against
793 * tampering.
794 */
795 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
796 ap->a_cred) {
797 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) {
798 ip->i_mode &= ~(ISUID | ISGID);
799 DIP_SET(ip, i_mode, ip->i_mode);
800 }
801 }
802 if (error) {
803 if (ioflag & IO_UNIT) {
804 (void)ffs_truncate(vp, osize,
805 IO_NORMAL | (ioflag & IO_SYNC),
806 ap->a_cred, uio->uio_td);
807 uio->uio_offset -= resid - uio->uio_resid;
808 uio->uio_resid = resid;
809 }
810 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
811 error = ffs_update(vp, 1);
812 return (error);
813 }
814
815 /*
816 * get page routine
817 */
818 static int
819 ffs_getpages(ap)
820 struct vop_getpages_args *ap;
821 {
822 int i;
823 vm_page_t mreq;
824 int pcount;
825
826 pcount = round_page(ap->a_count) / PAGE_SIZE;
827 mreq = ap->a_m[ap->a_reqpage];
828
829 /*
830 * if ANY DEV_BSIZE blocks are valid on a large filesystem block,
831 * then the entire page is valid. Since the page may be mapped,
832 * user programs might reference data beyond the actual end of file
833 * occuring within the page. We have to zero that data.
834 */
835 VM_OBJECT_LOCK(mreq->object);
836 if (mreq->valid) {
837 if (mreq->valid != VM_PAGE_BITS_ALL)
838 vm_page_zero_invalid(mreq, TRUE);
839 vm_page_lock_queues();
840 for (i = 0; i < pcount; i++) {
841 if (i != ap->a_reqpage) {
842 vm_page_free(ap->a_m[i]);
843 }
844 }
845 vm_page_unlock_queues();
846 VM_OBJECT_UNLOCK(mreq->object);
847 return VM_PAGER_OK;
848 }
849 VM_OBJECT_UNLOCK(mreq->object);
850
851 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
852 ap->a_count,
853 ap->a_reqpage);
854 }
855
856
857 /*
858 * Extended attribute area reading.
859 */
860 static int
861 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
862 {
863 struct inode *ip;
864 struct ufs2_dinode *dp;
865 struct fs *fs;
866 struct buf *bp;
867 ufs_lbn_t lbn, nextlbn;
868 off_t bytesinfile;
869 long size, xfersize, blkoffset;
870 int error, orig_resid;
871
872 ip = VTOI(vp);
873 fs = ip->i_fs;
874 dp = ip->i_din2;
875
876 #ifdef INVARIANTS
877 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
878 panic("ffs_extread: mode");
879
880 #endif
881 orig_resid = uio->uio_resid;
882 KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0"));
883 if (orig_resid == 0)
884 return (0);
885 KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0"));
886
887 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
888 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
889 break;
890 lbn = lblkno(fs, uio->uio_offset);
891 nextlbn = lbn + 1;
892
893 /*
894 * size of buffer. The buffer representing the
895 * end of the file is rounded up to the size of
896 * the block type ( fragment or full block,
897 * depending ).
898 */
899 size = sblksize(fs, dp->di_extsize, lbn);
900 blkoffset = blkoff(fs, uio->uio_offset);
901
902 /*
903 * The amount we want to transfer in this iteration is
904 * one FS block less the amount of the data before
905 * our startpoint (duh!)
906 */
907 xfersize = fs->fs_bsize - blkoffset;
908
909 /*
910 * But if we actually want less than the block,
911 * or the file doesn't have a whole block more of data,
912 * then use the lesser number.
913 */
914 if (uio->uio_resid < xfersize)
915 xfersize = uio->uio_resid;
916 if (bytesinfile < xfersize)
917 xfersize = bytesinfile;
918
919 if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
920 /*
921 * Don't do readahead if this is the end of the info.
922 */
923 error = bread(vp, -1 - lbn, size, NOCRED, &bp);
924 } else {
925 /*
926 * If we have a second block, then
927 * fire off a request for a readahead
928 * as well as a read. Note that the 4th and 5th
929 * arguments point to arrays of the size specified in
930 * the 6th argument.
931 */
932 int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
933
934 nextlbn = -1 - nextlbn;
935 error = breadn(vp, -1 - lbn,
936 size, &nextlbn, &nextsize, 1, NOCRED, &bp);
937 }
938 if (error) {
939 brelse(bp);
940 bp = NULL;
941 break;
942 }
943
944 /*
945 * If IO_DIRECT then set B_DIRECT for the buffer. This
946 * will cause us to attempt to release the buffer later on
947 * and will cause the buffer cache to attempt to free the
948 * underlying pages.
949 */
950 if (ioflag & IO_DIRECT)
951 bp->b_flags |= B_DIRECT;
952
953 /*
954 * We should only get non-zero b_resid when an I/O error
955 * has occurred, which should cause us to break above.
956 * However, if the short read did not cause an error,
957 * then we want to ensure that we do not uiomove bad
958 * or uninitialized data.
959 */
960 size -= bp->b_resid;
961 if (size < xfersize) {
962 if (size == 0)
963 break;
964 xfersize = size;
965 }
966
967 error = uiomove((char *)bp->b_data + blkoffset,
968 (int)xfersize, uio);
969 if (error)
970 break;
971
972 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
973 (LIST_EMPTY(&bp->b_dep))) {
974 /*
975 * If there are no dependencies, and it's VMIO,
976 * then we don't need the buf, mark it available
977 * for freeing. The VM has the data.
978 */
979 bp->b_flags |= B_RELBUF;
980 brelse(bp);
981 } else {
982 /*
983 * Otherwise let whoever
984 * made the request take care of
985 * freeing it. We just queue
986 * it onto another list.
987 */
988 bqrelse(bp);
989 }
990 }
991
992 /*
993 * This can only happen in the case of an error
994 * because the loop above resets bp to NULL on each iteration
995 * and on normal completion has not set a new value into it.
996 * so it must have come from a 'break' statement
997 */
998 if (bp != NULL) {
999 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
1000 (LIST_EMPTY(&bp->b_dep))) {
1001 bp->b_flags |= B_RELBUF;
1002 brelse(bp);
1003 } else {
1004 bqrelse(bp);
1005 }
1006 }
1007
1008 if ((error == 0 || uio->uio_resid != orig_resid) &&
1009 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
1010 VI_LOCK(vp);
1011 ip->i_flag |= IN_ACCESS;
1012 VI_UNLOCK(vp);
1013 }
1014 return (error);
1015 }
1016
1017 /*
1018 * Extended attribute area writing.
1019 */
1020 static int
1021 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
1022 {
1023 struct inode *ip;
1024 struct ufs2_dinode *dp;
1025 struct fs *fs;
1026 struct buf *bp;
1027 ufs_lbn_t lbn;
1028 off_t osize;
1029 int blkoffset, error, flags, resid, size, xfersize;
1030
1031 ip = VTOI(vp);
1032 fs = ip->i_fs;
1033 dp = ip->i_din2;
1034
1035 KASSERT(!(ip->i_flag & IN_SPACECOUNTED), ("inode %u: inode is dead",
1036 ip->i_number));
1037
1038 #ifdef INVARIANTS
1039 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
1040 panic("ffs_extwrite: mode");
1041 #endif
1042
1043 if (ioflag & IO_APPEND)
1044 uio->uio_offset = dp->di_extsize;
1045 KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0"));
1046 KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0"));
1047 if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize)
1048 return (EFBIG);
1049
1050 resid = uio->uio_resid;
1051 osize = dp->di_extsize;
1052 flags = IO_EXT;
1053 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
1054 flags |= IO_SYNC;
1055
1056 for (error = 0; uio->uio_resid > 0;) {
1057 lbn = lblkno(fs, uio->uio_offset);
1058 blkoffset = blkoff(fs, uio->uio_offset);
1059 xfersize = fs->fs_bsize - blkoffset;
1060 if (uio->uio_resid < xfersize)
1061 xfersize = uio->uio_resid;
1062
1063 /*
1064 * We must perform a read-before-write if the transfer size
1065 * does not cover the entire buffer.
1066 */
1067 if (fs->fs_bsize > xfersize)
1068 flags |= BA_CLRBUF;
1069 else
1070 flags &= ~BA_CLRBUF;
1071 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
1072 ucred, flags, &bp);
1073 if (error != 0)
1074 break;
1075 /*
1076 * If the buffer is not valid we have to clear out any
1077 * garbage data from the pages instantiated for the buffer.
1078 * If we do not, a failed uiomove() during a write can leave
1079 * the prior contents of the pages exposed to a userland
1080 * mmap(). XXX deal with uiomove() errors a better way.
1081 */
1082 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
1083 vfs_bio_clrbuf(bp);
1084 if (ioflag & IO_DIRECT)
1085 bp->b_flags |= B_DIRECT;
1086
1087 if (uio->uio_offset + xfersize > dp->di_extsize)
1088 dp->di_extsize = uio->uio_offset + xfersize;
1089
1090 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
1091 if (size < xfersize)
1092 xfersize = size;
1093
1094 error =
1095 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
1096 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
1097 (LIST_EMPTY(&bp->b_dep))) {
1098 bp->b_flags |= B_RELBUF;
1099 }
1100
1101 /*
1102 * If IO_SYNC each buffer is written synchronously. Otherwise
1103 * if we have a severe page deficiency write the buffer
1104 * asynchronously. Otherwise try to cluster, and if that
1105 * doesn't do it then either do an async write (if O_DIRECT),
1106 * or a delayed write (if not).
1107 */
1108 if (ioflag & IO_SYNC) {
1109 (void)bwrite(bp);
1110 } else if (vm_page_count_severe() ||
1111 buf_dirty_count_severe() ||
1112 xfersize + blkoffset == fs->fs_bsize ||
1113 (ioflag & (IO_ASYNC | IO_DIRECT)))
1114 bawrite(bp);
1115 else
1116 bdwrite(bp);
1117 if (error || xfersize == 0)
1118 break;
1119 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1120 }
1121 /*
1122 * If we successfully wrote any data, and we are not the superuser
1123 * we clear the setuid and setgid bits as a precaution against
1124 * tampering.
1125 */
1126 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) {
1127 if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID, 0)) {
1128 ip->i_mode &= ~(ISUID | ISGID);
1129 dp->di_mode = ip->i_mode;
1130 }
1131 }
1132 if (error) {
1133 if (ioflag & IO_UNIT) {
1134 (void)ffs_truncate(vp, osize,
1135 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td);
1136 uio->uio_offset -= resid - uio->uio_resid;
1137 uio->uio_resid = resid;
1138 }
1139 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
1140 error = ffs_update(vp, 1);
1141 return (error);
1142 }
1143
1144
1145 /*
1146 * Vnode operating to retrieve a named extended attribute.
1147 *
1148 * Locate a particular EA (nspace:name) in the area (ptr:length), and return
1149 * the length of the EA, and possibly the pointer to the entry and to the data.
1150 */
1151 static int
1152 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac)
1153 {
1154 u_char *p, *pe, *pn, *p0;
1155 int eapad1, eapad2, ealength, ealen, nlen;
1156 uint32_t ul;
1157
1158 pe = ptr + length;
1159 nlen = strlen(name);
1160
1161 for (p = ptr; p < pe; p = pn) {
1162 p0 = p;
1163 bcopy(p, &ul, sizeof(ul));
1164 pn = p + ul;
1165 /* make sure this entry is complete */
1166 if (pn > pe)
1167 break;
1168 p += sizeof(uint32_t);
1169 if (*p != nspace)
1170 continue;
1171 p++;
1172 eapad2 = *p++;
1173 if (*p != nlen)
1174 continue;
1175 p++;
1176 if (bcmp(p, name, nlen))
1177 continue;
1178 ealength = sizeof(uint32_t) + 3 + nlen;
1179 eapad1 = 8 - (ealength % 8);
1180 if (eapad1 == 8)
1181 eapad1 = 0;
1182 ealength += eapad1;
1183 ealen = ul - ealength - eapad2;
1184 p += nlen + eapad1;
1185 if (eap != NULL)
1186 *eap = p0;
1187 if (eac != NULL)
1188 *eac = p;
1189 return (ealen);
1190 }
1191 return(-1);
1192 }
1193
1194 static int
1195 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra)
1196 {
1197 struct inode *ip;
1198 struct ufs2_dinode *dp;
1199 struct fs *fs;
1200 struct uio luio;
1201 struct iovec liovec;
1202 int easize, error;
1203 u_char *eae;
1204
1205 ip = VTOI(vp);
1206 fs = ip->i_fs;
1207 dp = ip->i_din2;
1208 easize = dp->di_extsize;
1209 if ((uoff_t)easize + extra > NXADDR * fs->fs_bsize)
1210 return (EFBIG);
1211
1212 eae = malloc(easize + extra, M_TEMP, M_WAITOK);
1213
1214 liovec.iov_base = eae;
1215 liovec.iov_len = easize;
1216 luio.uio_iov = &liovec;
1217 luio.uio_iovcnt = 1;
1218 luio.uio_offset = 0;
1219 luio.uio_resid = easize;
1220 luio.uio_segflg = UIO_SYSSPACE;
1221 luio.uio_rw = UIO_READ;
1222 luio.uio_td = td;
1223
1224 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
1225 if (error) {
1226 free(eae, M_TEMP);
1227 return(error);
1228 }
1229 *p = eae;
1230 return (0);
1231 }
1232
1233 static int
1234 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
1235 {
1236 struct inode *ip;
1237 struct ufs2_dinode *dp;
1238 int error;
1239
1240 ip = VTOI(vp);
1241
1242 if (ip->i_ea_area != NULL)
1243 return (EBUSY);
1244 dp = ip->i_din2;
1245 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0);
1246 if (error)
1247 return (error);
1248 ip->i_ea_len = dp->di_extsize;
1249 ip->i_ea_error = 0;
1250 return (0);
1251 }
1252
1253 /*
1254 * Vnode extattr transaction commit/abort
1255 */
1256 static int
1257 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
1258 {
1259 struct inode *ip;
1260 struct uio luio;
1261 struct iovec liovec;
1262 int error;
1263 struct ufs2_dinode *dp;
1264
1265 ip = VTOI(vp);
1266 if (ip->i_ea_area == NULL)
1267 return (EINVAL);
1268 dp = ip->i_din2;
1269 error = ip->i_ea_error;
1270 if (commit && error == 0) {
1271 if (cred == NOCRED)
1272 cred = vp->v_mount->mnt_cred;
1273 liovec.iov_base = ip->i_ea_area;
1274 liovec.iov_len = ip->i_ea_len;
1275 luio.uio_iov = &liovec;
1276 luio.uio_iovcnt = 1;
1277 luio.uio_offset = 0;
1278 luio.uio_resid = ip->i_ea_len;
1279 luio.uio_segflg = UIO_SYSSPACE;
1280 luio.uio_rw = UIO_WRITE;
1281 luio.uio_td = td;
1282 /* XXX: I'm not happy about truncating to zero size */
1283 if (ip->i_ea_len < dp->di_extsize)
1284 error = ffs_truncate(vp, 0, IO_EXT, cred, td);
1285 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
1286 }
1287 free(ip->i_ea_area, M_TEMP);
1288 ip->i_ea_area = NULL;
1289 ip->i_ea_len = 0;
1290 ip->i_ea_error = 0;
1291 return (error);
1292 }
1293
1294 /*
1295 * Vnode extattr strategy routine for fifos.
1296 *
1297 * We need to check for a read or write of the external attributes.
1298 * Otherwise we just fall through and do the usual thing.
1299 */
1300 static int
1301 ffsext_strategy(struct vop_strategy_args *ap)
1302 /*
1303 struct vop_strategy_args {
1304 struct vnodeop_desc *a_desc;
1305 struct vnode *a_vp;
1306 struct buf *a_bp;
1307 };
1308 */
1309 {
1310 struct vnode *vp;
1311 daddr_t lbn;
1312
1313 vp = ap->a_vp;
1314 lbn = ap->a_bp->b_lblkno;
1315 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC &&
1316 lbn < 0 && lbn >= -NXADDR)
1317 return (VOP_STRATEGY_APV(&ufs_vnodeops, ap));
1318 if (vp->v_type == VFIFO)
1319 return (VOP_STRATEGY_APV(&ufs_fifoops, ap));
1320 panic("spec nodes went here");
1321 }
1322
1323 /*
1324 * Vnode extattr transaction commit/abort
1325 */
1326 static int
1327 ffs_openextattr(struct vop_openextattr_args *ap)
1328 /*
1329 struct vop_openextattr_args {
1330 struct vnodeop_desc *a_desc;
1331 struct vnode *a_vp;
1332 IN struct ucred *a_cred;
1333 IN struct thread *a_td;
1334 };
1335 */
1336 {
1337 struct inode *ip;
1338 struct fs *fs;
1339
1340 ip = VTOI(ap->a_vp);
1341 fs = ip->i_fs;
1342
1343 if (ap->a_vp->v_type == VCHR)
1344 return (EOPNOTSUPP);
1345
1346 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
1347 }
1348
1349
1350 /*
1351 * Vnode extattr transaction commit/abort
1352 */
1353 static int
1354 ffs_closeextattr(struct vop_closeextattr_args *ap)
1355 /*
1356 struct vop_closeextattr_args {
1357 struct vnodeop_desc *a_desc;
1358 struct vnode *a_vp;
1359 int a_commit;
1360 IN struct ucred *a_cred;
1361 IN struct thread *a_td;
1362 };
1363 */
1364 {
1365 struct inode *ip;
1366 struct fs *fs;
1367
1368 ip = VTOI(ap->a_vp);
1369 fs = ip->i_fs;
1370
1371 if (ap->a_vp->v_type == VCHR)
1372 return (EOPNOTSUPP);
1373
1374 if (ap->a_commit && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY))
1375 return (EROFS);
1376
1377 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td));
1378 }
1379
1380 /*
1381 * Vnode operation to remove a named attribute.
1382 */
1383 static int
1384 ffs_deleteextattr(struct vop_deleteextattr_args *ap)
1385 /*
1386 vop_deleteextattr {
1387 IN struct vnode *a_vp;
1388 IN int a_attrnamespace;
1389 IN const char *a_name;
1390 IN struct ucred *a_cred;
1391 IN struct thread *a_td;
1392 };
1393 */
1394 {
1395 struct inode *ip;
1396 struct fs *fs;
1397 uint32_t ealength, ul;
1398 int ealen, olen, eapad1, eapad2, error, i, easize;
1399 u_char *eae, *p;
1400 int stand_alone;
1401
1402 ip = VTOI(ap->a_vp);
1403 fs = ip->i_fs;
1404
1405 if (ap->a_vp->v_type == VCHR)
1406 return (EOPNOTSUPP);
1407
1408 if (strlen(ap->a_name) == 0)
1409 return (EINVAL);
1410
1411 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1412 return (EROFS);
1413
1414 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1415 ap->a_cred, ap->a_td, IWRITE);
1416 if (error) {
1417 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1418 ip->i_ea_error = error;
1419 return (error);
1420 }
1421
1422 if (ip->i_ea_area == NULL) {
1423 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1424 if (error)
1425 return (error);
1426 stand_alone = 1;
1427 } else {
1428 stand_alone = 0;
1429 }
1430
1431 ealength = eapad1 = ealen = eapad2 = 0;
1432
1433 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
1434 bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1435 easize = ip->i_ea_len;
1436
1437 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1438 &p, NULL);
1439 if (olen == -1) {
1440 /* delete but nonexistent */
1441 free(eae, M_TEMP);
1442 if (stand_alone)
1443 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1444 return(ENOATTR);
1445 }
1446 bcopy(p, &ul, sizeof ul);
1447 i = p - eae + ul;
1448 if (ul != ealength) {
1449 bcopy(p + ul, p + ealength, easize - i);
1450 easize += (ealength - ul);
1451 }
1452 if (easize > NXADDR * fs->fs_bsize) {
1453 free(eae, M_TEMP);
1454 if (stand_alone)
1455 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1456 else if (ip->i_ea_error == 0)
1457 ip->i_ea_error = ENOSPC;
1458 return(ENOSPC);
1459 }
1460 p = ip->i_ea_area;
1461 ip->i_ea_area = eae;
1462 ip->i_ea_len = easize;
1463 free(p, M_TEMP);
1464 if (stand_alone)
1465 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1466 return(error);
1467 }
1468
1469 /*
1470 * Vnode operation to retrieve a named extended attribute.
1471 */
1472 static int
1473 ffs_getextattr(struct vop_getextattr_args *ap)
1474 /*
1475 vop_getextattr {
1476 IN struct vnode *a_vp;
1477 IN int a_attrnamespace;
1478 IN const char *a_name;
1479 INOUT struct uio *a_uio;
1480 OUT size_t *a_size;
1481 IN struct ucred *a_cred;
1482 IN struct thread *a_td;
1483 };
1484 */
1485 {
1486 struct inode *ip;
1487 struct fs *fs;
1488 u_char *eae, *p;
1489 unsigned easize;
1490 int error, ealen, stand_alone;
1491
1492 ip = VTOI(ap->a_vp);
1493 fs = ip->i_fs;
1494
1495 if (ap->a_vp->v_type == VCHR)
1496 return (EOPNOTSUPP);
1497
1498 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1499 ap->a_cred, ap->a_td, IREAD);
1500 if (error)
1501 return (error);
1502
1503 if (ip->i_ea_area == NULL) {
1504 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1505 if (error)
1506 return (error);
1507 stand_alone = 1;
1508 } else {
1509 stand_alone = 0;
1510 }
1511 eae = ip->i_ea_area;
1512 easize = ip->i_ea_len;
1513
1514 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1515 NULL, &p);
1516 if (ealen >= 0) {
1517 error = 0;
1518 if (ap->a_size != NULL)
1519 *ap->a_size = ealen;
1520 else if (ap->a_uio != NULL)
1521 error = uiomove(p, ealen, ap->a_uio);
1522 } else
1523 error = ENOATTR;
1524 if (stand_alone)
1525 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1526 return(error);
1527 }
1528
1529 /*
1530 * Vnode operation to retrieve extended attributes on a vnode.
1531 */
1532 static int
1533 ffs_listextattr(struct vop_listextattr_args *ap)
1534 /*
1535 vop_listextattr {
1536 IN struct vnode *a_vp;
1537 IN int a_attrnamespace;
1538 INOUT struct uio *a_uio;
1539 OUT size_t *a_size;
1540 IN struct ucred *a_cred;
1541 IN struct thread *a_td;
1542 };
1543 */
1544 {
1545 struct inode *ip;
1546 struct fs *fs;
1547 u_char *eae, *p, *pe, *pn;
1548 unsigned easize;
1549 uint32_t ul;
1550 int error, ealen, stand_alone;
1551
1552 ip = VTOI(ap->a_vp);
1553 fs = ip->i_fs;
1554
1555 if (ap->a_vp->v_type == VCHR)
1556 return (EOPNOTSUPP);
1557
1558 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1559 ap->a_cred, ap->a_td, IREAD);
1560 if (error)
1561 return (error);
1562
1563 if (ip->i_ea_area == NULL) {
1564 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1565 if (error)
1566 return (error);
1567 stand_alone = 1;
1568 } else {
1569 stand_alone = 0;
1570 }
1571 eae = ip->i_ea_area;
1572 easize = ip->i_ea_len;
1573
1574 error = 0;
1575 if (ap->a_size != NULL)
1576 *ap->a_size = 0;
1577 pe = eae + easize;
1578 for(p = eae; error == 0 && p < pe; p = pn) {
1579 bcopy(p, &ul, sizeof(ul));
1580 pn = p + ul;
1581 if (pn > pe)
1582 break;
1583 p += sizeof(ul);
1584 if (*p++ != ap->a_attrnamespace)
1585 continue;
1586 p++; /* pad2 */
1587 ealen = *p;
1588 if (ap->a_size != NULL) {
1589 *ap->a_size += ealen + 1;
1590 } else if (ap->a_uio != NULL) {
1591 error = uiomove(p, ealen + 1, ap->a_uio);
1592 }
1593 }
1594 if (stand_alone)
1595 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1596 return(error);
1597 }
1598
1599 /*
1600 * Vnode operation to set a named attribute.
1601 */
1602 static int
1603 ffs_setextattr(struct vop_setextattr_args *ap)
1604 /*
1605 vop_setextattr {
1606 IN struct vnode *a_vp;
1607 IN int a_attrnamespace;
1608 IN const char *a_name;
1609 INOUT struct uio *a_uio;
1610 IN struct ucred *a_cred;
1611 IN struct thread *a_td;
1612 };
1613 */
1614 {
1615 struct inode *ip;
1616 struct fs *fs;
1617 uint32_t ealength, ul;
1618 int ealen, olen, eapad1, eapad2, error, i, easize;
1619 u_char *eae, *p;
1620 int stand_alone;
1621
1622 ip = VTOI(ap->a_vp);
1623 fs = ip->i_fs;
1624
1625 if (ap->a_vp->v_type == VCHR)
1626 return (EOPNOTSUPP);
1627
1628 if (strlen(ap->a_name) == 0)
1629 return (EINVAL);
1630
1631 /* XXX Now unsupported API to delete EAs using NULL uio. */
1632 if (ap->a_uio == NULL)
1633 return (EOPNOTSUPP);
1634
1635 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1636 return (EROFS);
1637
1638 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1639 ap->a_cred, ap->a_td, IWRITE);
1640 if (error) {
1641 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1642 ip->i_ea_error = error;
1643 return (error);
1644 }
1645
1646 if (ip->i_ea_area == NULL) {
1647 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1648 if (error)
1649 return (error);
1650 stand_alone = 1;
1651 } else {
1652 stand_alone = 0;
1653 }
1654
1655 ealen = ap->a_uio->uio_resid;
1656 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
1657 eapad1 = 8 - (ealength % 8);
1658 if (eapad1 == 8)
1659 eapad1 = 0;
1660 eapad2 = 8 - (ealen % 8);
1661 if (eapad2 == 8)
1662 eapad2 = 0;
1663 ealength += eapad1 + ealen + eapad2;
1664
1665 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
1666 bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1667 easize = ip->i_ea_len;
1668
1669 olen = ffs_findextattr(eae, easize,
1670 ap->a_attrnamespace, ap->a_name, &p, NULL);
1671 if (olen == -1) {
1672 /* new, append at end */
1673 p = eae + easize;
1674 easize += ealength;
1675 } else {
1676 bcopy(p, &ul, sizeof ul);
1677 i = p - eae + ul;
1678 if (ul != ealength) {
1679 bcopy(p + ul, p + ealength, easize - i);
1680 easize += (ealength - ul);
1681 }
1682 }
1683 if (easize > NXADDR * fs->fs_bsize) {
1684 free(eae, M_TEMP);
1685 if (stand_alone)
1686 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1687 else if (ip->i_ea_error == 0)
1688 ip->i_ea_error = ENOSPC;
1689 return(ENOSPC);
1690 }
1691 bcopy(&ealength, p, sizeof(ealength));
1692 p += sizeof(ealength);
1693 *p++ = ap->a_attrnamespace;
1694 *p++ = eapad2;
1695 *p++ = strlen(ap->a_name);
1696 strcpy(p, ap->a_name);
1697 p += strlen(ap->a_name);
1698 bzero(p, eapad1);
1699 p += eapad1;
1700 error = uiomove(p, ealen, ap->a_uio);
1701 if (error) {
1702 free(eae, M_TEMP);
1703 if (stand_alone)
1704 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1705 else if (ip->i_ea_error == 0)
1706 ip->i_ea_error = error;
1707 return(error);
1708 }
1709 p += ealen;
1710 bzero(p, eapad2);
1711
1712 p = ip->i_ea_area;
1713 ip->i_ea_area = eae;
1714 ip->i_ea_len = easize;
1715 free(p, M_TEMP);
1716 if (stand_alone)
1717 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1718 return(error);
1719 }
1720
1721 /*
1722 * Vnode pointer to File handle
1723 */
1724 static int
1725 ffs_vptofh(struct vop_vptofh_args *ap)
1726 /*
1727 vop_vptofh {
1728 IN struct vnode *a_vp;
1729 IN struct fid *a_fhp;
1730 };
1731 */
1732 {
1733 struct inode *ip;
1734 struct ufid *ufhp;
1735
1736 ip = VTOI(ap->a_vp);
1737 ufhp = (struct ufid *)ap->a_fhp;
1738 ufhp->ufid_len = sizeof(struct ufid);
1739 ufhp->ufid_ino = ip->i_number;
1740 ufhp->ufid_gen = ip->i_gen;
1741 return (0);
1742 }
Cache object: a6c5512df37b636827bf023b1f750cb2
|