1 /*-
2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
60 * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
61 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95
62 */
63
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
66
67 #include <sys/param.h>
68 #include <sys/bio.h>
69 #include <sys/systm.h>
70 #include <sys/buf.h>
71 #include <sys/conf.h>
72 #include <sys/extattr.h>
73 #include <sys/kernel.h>
74 #include <sys/limits.h>
75 #include <sys/malloc.h>
76 #include <sys/mount.h>
77 #include <sys/proc.h>
78 #include <sys/resourcevar.h>
79 #include <sys/signalvar.h>
80 #include <sys/stat.h>
81 #include <sys/vmmeter.h>
82 #include <sys/vnode.h>
83
84 #include <vm/vm.h>
85 #include <vm/vm_extern.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_page.h>
88 #include <vm/vm_pager.h>
89 #include <vm/vnode_pager.h>
90
91 #include <ufs/ufs/extattr.h>
92 #include <ufs/ufs/quota.h>
93 #include <ufs/ufs/inode.h>
94 #include <ufs/ufs/ufs_extern.h>
95 #include <ufs/ufs/ufsmount.h>
96
97 #include <ufs/ffs/fs.h>
98 #include <ufs/ffs/ffs_extern.h>
99 #include "opt_directio.h"
100 #include "opt_ffs.h"
101
102 #ifdef DIRECTIO
103 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
104 #endif
105 static vop_fsync_t ffs_fsync;
106 static vop_lock_t ffs_lock;
107 static vop_getpages_t ffs_getpages;
108 static vop_read_t ffs_read;
109 static vop_write_t ffs_write;
110 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
111 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
112 struct ucred *cred);
113 static vop_strategy_t ffsext_strategy;
114 static vop_closeextattr_t ffs_closeextattr;
115 static vop_deleteextattr_t ffs_deleteextattr;
116 static vop_getextattr_t ffs_getextattr;
117 static vop_listextattr_t ffs_listextattr;
118 static vop_openextattr_t ffs_openextattr;
119 static vop_setextattr_t ffs_setextattr;
120
121
122 /* Global vfs data structures for ufs. */
123 struct vop_vector ffs_vnodeops1 = {
124 .vop_default = &ufs_vnodeops,
125 .vop_fsync = ffs_fsync,
126 .vop_getpages = ffs_getpages,
127 .vop_lock = ffs_lock,
128 .vop_read = ffs_read,
129 .vop_reallocblks = ffs_reallocblks,
130 .vop_write = ffs_write,
131 };
132
133 struct vop_vector ffs_fifoops1 = {
134 .vop_default = &ufs_fifoops,
135 .vop_fsync = ffs_fsync,
136 .vop_reallocblks = ffs_reallocblks, /* XXX: really ??? */
137 };
138
139 /* Global vfs data structures for ufs. */
140 struct vop_vector ffs_vnodeops2 = {
141 .vop_default = &ufs_vnodeops,
142 .vop_fsync = ffs_fsync,
143 .vop_getpages = ffs_getpages,
144 .vop_lock = ffs_lock,
145 .vop_read = ffs_read,
146 .vop_reallocblks = ffs_reallocblks,
147 .vop_write = ffs_write,
148 .vop_closeextattr = ffs_closeextattr,
149 .vop_deleteextattr = ffs_deleteextattr,
150 .vop_getextattr = ffs_getextattr,
151 .vop_listextattr = ffs_listextattr,
152 .vop_openextattr = ffs_openextattr,
153 .vop_setextattr = ffs_setextattr,
154 };
155
156 struct vop_vector ffs_fifoops2 = {
157 .vop_default = &ufs_fifoops,
158 .vop_fsync = ffs_fsync,
159 .vop_lock = ffs_lock,
160 .vop_reallocblks = ffs_reallocblks,
161 .vop_strategy = ffsext_strategy,
162 .vop_closeextattr = ffs_closeextattr,
163 .vop_deleteextattr = ffs_deleteextattr,
164 .vop_getextattr = ffs_getextattr,
165 .vop_listextattr = ffs_listextattr,
166 .vop_openextattr = ffs_openextattr,
167 .vop_setextattr = ffs_setextattr,
168 };
169
170 /*
171 * Synch an open file.
172 */
173 /* ARGSUSED */
174 static int
175 ffs_fsync(struct vop_fsync_args *ap)
176 {
177 int error;
178
179 error = ffs_syncvnode(ap->a_vp, ap->a_waitfor);
180 if (error)
181 return (error);
182 if (ap->a_waitfor == MNT_WAIT &&
183 (ap->a_vp->v_mount->mnt_flag & MNT_SOFTDEP))
184 error = softdep_fsync(ap->a_vp);
185 return (error);
186 }
187
188 int
189 ffs_syncvnode(struct vnode *vp, int waitfor)
190 {
191 struct inode *ip = VTOI(vp);
192 struct buf *bp;
193 struct buf *nbp;
194 int s, error, wait, passes, skipmeta;
195 ufs_lbn_t lbn;
196
197 wait = (waitfor == MNT_WAIT);
198 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
199
200 /*
201 * Flush all dirty buffers associated with a vnode.
202 */
203 passes = NIADDR + 1;
204 skipmeta = 0;
205 if (wait)
206 skipmeta = 1;
207 s = splbio();
208 VI_LOCK(vp);
209 loop:
210 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs)
211 bp->b_vflags &= ~BV_SCANNED;
212 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
213 /*
214 * Reasons to skip this buffer: it has already been considered
215 * on this pass, this pass is the first time through on a
216 * synchronous flush request and the buffer being considered
217 * is metadata, the buffer has dependencies that will cause
218 * it to be redirtied and it has not already been deferred,
219 * or it is already being written.
220 */
221 if ((bp->b_vflags & BV_SCANNED) != 0)
222 continue;
223 bp->b_vflags |= BV_SCANNED;
224 if ((skipmeta == 1 && bp->b_lblkno < 0))
225 continue;
226 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
227 continue;
228 VI_UNLOCK(vp);
229 if (!wait && !LIST_EMPTY(&bp->b_dep) &&
230 (bp->b_flags & B_DEFERRED) == 0 &&
231 buf_countdeps(bp, 0)) {
232 bp->b_flags |= B_DEFERRED;
233 BUF_UNLOCK(bp);
234 VI_LOCK(vp);
235 continue;
236 }
237 if ((bp->b_flags & B_DELWRI) == 0)
238 panic("ffs_fsync: not dirty");
239 /*
240 * If this is a synchronous flush request, or it is not a
241 * file or device, start the write on this buffer immediatly.
242 */
243 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) {
244
245 /*
246 * On our final pass through, do all I/O synchronously
247 * so that we can find out if our flush is failing
248 * because of write errors.
249 */
250 if (passes > 0 || !wait) {
251 if ((bp->b_flags & B_CLUSTEROK) && !wait) {
252 (void) vfs_bio_awrite(bp);
253 } else {
254 bremfree(bp);
255 splx(s);
256 (void) bawrite(bp);
257 s = splbio();
258 }
259 } else {
260 bremfree(bp);
261 splx(s);
262 if ((error = bwrite(bp)) != 0)
263 return (error);
264 s = splbio();
265 }
266 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) {
267 /*
268 * If the buffer is for data that has been truncated
269 * off the file, then throw it away.
270 */
271 bremfree(bp);
272 bp->b_flags |= B_INVAL | B_NOCACHE;
273 splx(s);
274 brelse(bp);
275 s = splbio();
276 } else
277 vfs_bio_awrite(bp);
278
279 /*
280 * Since we may have slept during the I/O, we need
281 * to start from a known point.
282 */
283 VI_LOCK(vp);
284 nbp = TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd);
285 }
286 /*
287 * If we were asked to do this synchronously, then go back for
288 * another pass, this time doing the metadata.
289 */
290 if (skipmeta) {
291 skipmeta = 0;
292 goto loop;
293 }
294
295 if (wait) {
296 bufobj_wwait(&vp->v_bufobj, 3, 0);
297 VI_UNLOCK(vp);
298
299 /*
300 * Ensure that any filesystem metatdata associated
301 * with the vnode has been written.
302 */
303 splx(s);
304 if ((error = softdep_sync_metadata(vp)) != 0)
305 return (error);
306 s = splbio();
307
308 VI_LOCK(vp);
309 if (vp->v_bufobj.bo_dirty.bv_cnt > 0) {
310 /*
311 * Block devices associated with filesystems may
312 * have new I/O requests posted for them even if
313 * the vnode is locked, so no amount of trying will
314 * get them clean. Thus we give block devices a
315 * good effort, then just give up. For all other file
316 * types, go around and try again until it is clean.
317 */
318 if (passes > 0) {
319 passes -= 1;
320 goto loop;
321 }
322 #ifdef INVARIANTS
323 if (!vn_isdisk(vp, NULL))
324 vprint("ffs_fsync: dirty", vp);
325 #endif
326 }
327 }
328 VI_UNLOCK(vp);
329 splx(s);
330 return (ffs_update(vp, wait));
331 }
332
333 static int
334 ffs_lock(ap)
335 struct vop_lock_args /* {
336 struct vnode *a_vp;
337 int a_flags;
338 struct thread *a_td;
339 } */ *ap;
340 {
341 #ifndef NO_FFS_SNAPSHOT
342 struct vnode *vp;
343 int flags;
344 struct lock *lkp;
345 int result;
346
347 switch (ap->a_flags & LK_TYPE_MASK) {
348 case LK_SHARED:
349 case LK_UPGRADE:
350 case LK_EXCLUSIVE:
351 vp = ap->a_vp;
352 flags = ap->a_flags;
353 for (;;) {
354 /*
355 * vnode interlock must be held to ensure that
356 * the possibly external lock isn't freed,
357 * e.g. when mutating from snapshot file vnode
358 * to regular file vnode.
359 */
360 if ((flags & LK_INTERLOCK) == 0) {
361 VI_LOCK(vp);
362 flags |= LK_INTERLOCK;
363 }
364 lkp = vp->v_vnlock;
365 result = lockmgr(lkp, flags, VI_MTX(vp), ap->a_td);
366 if (lkp == vp->v_vnlock || result != 0)
367 break;
368 /*
369 * Apparent success, except that the vnode
370 * mutated between snapshot file vnode and
371 * regular file vnode while this process
372 * slept. The lock currently held is not the
373 * right lock. Release it, and try to get the
374 * new lock.
375 */
376 (void) lockmgr(lkp, LK_RELEASE, VI_MTX(vp), ap->a_td);
377 if ((flags & LK_TYPE_MASK) == LK_UPGRADE)
378 flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
379 flags &= ~LK_INTERLOCK;
380 }
381 break;
382 default:
383 result = VOP_LOCK_APV(&ufs_vnodeops, ap);
384 }
385 return (result);
386 #else
387 return (VOP_LOCK_APV(&ufs_vnodeops, ap));
388 #endif
389 }
390
391 /*
392 * Vnode op for reading.
393 */
394 /* ARGSUSED */
395 static int
396 ffs_read(ap)
397 struct vop_read_args /* {
398 struct vnode *a_vp;
399 struct uio *a_uio;
400 int a_ioflag;
401 struct ucred *a_cred;
402 } */ *ap;
403 {
404 struct vnode *vp;
405 struct inode *ip;
406 struct uio *uio;
407 struct fs *fs;
408 struct buf *bp;
409 ufs_lbn_t lbn, nextlbn;
410 off_t bytesinfile;
411 long size, xfersize, blkoffset;
412 int error, orig_resid;
413 int seqcount;
414 int ioflag;
415
416 vp = ap->a_vp;
417 uio = ap->a_uio;
418 ioflag = ap->a_ioflag;
419 if (ap->a_ioflag & IO_EXT)
420 #ifdef notyet
421 return (ffs_extread(vp, uio, ioflag));
422 #else
423 panic("ffs_read+IO_EXT");
424 #endif
425 #ifdef DIRECTIO
426 if ((ioflag & IO_DIRECT) != 0) {
427 int workdone;
428
429 error = ffs_rawread(vp, uio, &workdone);
430 if (error != 0 || workdone != 0)
431 return error;
432 }
433 #endif
434
435 seqcount = ap->a_ioflag >> IO_SEQSHIFT;
436 ip = VTOI(vp);
437
438 #ifdef INVARIANTS
439 if (uio->uio_rw != UIO_READ)
440 panic("ffs_read: mode");
441
442 if (vp->v_type == VLNK) {
443 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
444 panic("ffs_read: short symlink");
445 } else if (vp->v_type != VREG && vp->v_type != VDIR)
446 panic("ffs_read: type %d", vp->v_type);
447 #endif
448 orig_resid = uio->uio_resid;
449 KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0"));
450 if (orig_resid == 0)
451 return (0);
452 KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0"));
453 fs = ip->i_fs;
454 if (uio->uio_offset < ip->i_size &&
455 uio->uio_offset >= fs->fs_maxfilesize)
456 return (EOVERFLOW);
457
458 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
459 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
460 break;
461 lbn = lblkno(fs, uio->uio_offset);
462 nextlbn = lbn + 1;
463
464 /*
465 * size of buffer. The buffer representing the
466 * end of the file is rounded up to the size of
467 * the block type ( fragment or full block,
468 * depending ).
469 */
470 size = blksize(fs, ip, lbn);
471 blkoffset = blkoff(fs, uio->uio_offset);
472
473 /*
474 * The amount we want to transfer in this iteration is
475 * one FS block less the amount of the data before
476 * our startpoint (duh!)
477 */
478 xfersize = fs->fs_bsize - blkoffset;
479
480 /*
481 * But if we actually want less than the block,
482 * or the file doesn't have a whole block more of data,
483 * then use the lesser number.
484 */
485 if (uio->uio_resid < xfersize)
486 xfersize = uio->uio_resid;
487 if (bytesinfile < xfersize)
488 xfersize = bytesinfile;
489
490 if (lblktosize(fs, nextlbn) >= ip->i_size) {
491 /*
492 * Don't do readahead if this is the end of the file.
493 */
494 error = bread(vp, lbn, size, NOCRED, &bp);
495 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
496 /*
497 * Otherwise if we are allowed to cluster,
498 * grab as much as we can.
499 *
500 * XXX This may not be a win if we are not
501 * doing sequential access.
502 */
503 error = cluster_read(vp, ip->i_size, lbn,
504 size, NOCRED, blkoffset + uio->uio_resid, seqcount, &bp);
505 } else if (seqcount > 1) {
506 /*
507 * If we are NOT allowed to cluster, then
508 * if we appear to be acting sequentially,
509 * fire off a request for a readahead
510 * as well as a read. Note that the 4th and 5th
511 * arguments point to arrays of the size specified in
512 * the 6th argument.
513 */
514 int nextsize = blksize(fs, ip, nextlbn);
515 error = breadn(vp, lbn,
516 size, &nextlbn, &nextsize, 1, NOCRED, &bp);
517 } else {
518 /*
519 * Failing all of the above, just read what the
520 * user asked for. Interestingly, the same as
521 * the first option above.
522 */
523 error = bread(vp, lbn, size, NOCRED, &bp);
524 }
525 if (error) {
526 brelse(bp);
527 bp = NULL;
528 break;
529 }
530
531 /*
532 * If IO_DIRECT then set B_DIRECT for the buffer. This
533 * will cause us to attempt to release the buffer later on
534 * and will cause the buffer cache to attempt to free the
535 * underlying pages.
536 */
537 if (ioflag & IO_DIRECT)
538 bp->b_flags |= B_DIRECT;
539
540 /*
541 * We should only get non-zero b_resid when an I/O error
542 * has occurred, which should cause us to break above.
543 * However, if the short read did not cause an error,
544 * then we want to ensure that we do not uiomove bad
545 * or uninitialized data.
546 */
547 size -= bp->b_resid;
548 if (size < xfersize) {
549 if (size == 0)
550 break;
551 xfersize = size;
552 }
553
554 error = uiomove((char *)bp->b_data + blkoffset,
555 (int)xfersize, uio);
556 if (error)
557 break;
558
559 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
560 (LIST_EMPTY(&bp->b_dep))) {
561 /*
562 * If there are no dependencies, and it's VMIO,
563 * then we don't need the buf, mark it available
564 * for freeing. The VM has the data.
565 */
566 bp->b_flags |= B_RELBUF;
567 brelse(bp);
568 } else {
569 /*
570 * Otherwise let whoever
571 * made the request take care of
572 * freeing it. We just queue
573 * it onto another list.
574 */
575 bqrelse(bp);
576 }
577 }
578
579 /*
580 * This can only happen in the case of an error
581 * because the loop above resets bp to NULL on each iteration
582 * and on normal completion has not set a new value into it.
583 * so it must have come from a 'break' statement
584 */
585 if (bp != NULL) {
586 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
587 (LIST_EMPTY(&bp->b_dep))) {
588 bp->b_flags |= B_RELBUF;
589 brelse(bp);
590 } else {
591 bqrelse(bp);
592 }
593 }
594
595 if ((error == 0 || uio->uio_resid != orig_resid) &&
596 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
597 VI_LOCK(vp);
598 ip->i_flag |= IN_ACCESS;
599 VI_UNLOCK(vp);
600 }
601 return (error);
602 }
603
604 /*
605 * Vnode op for writing.
606 */
607 static int
608 ffs_write(ap)
609 struct vop_write_args /* {
610 struct vnode *a_vp;
611 struct uio *a_uio;
612 int a_ioflag;
613 struct ucred *a_cred;
614 } */ *ap;
615 {
616 struct vnode *vp;
617 struct uio *uio;
618 struct inode *ip;
619 struct fs *fs;
620 struct buf *bp;
621 struct thread *td;
622 ufs_lbn_t lbn;
623 off_t osize;
624 int seqcount;
625 int blkoffset, error, flags, ioflag, resid, size, xfersize;
626
627 vp = ap->a_vp;
628 uio = ap->a_uio;
629 ioflag = ap->a_ioflag;
630 if (ap->a_ioflag & IO_EXT)
631 #ifdef notyet
632 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
633 #else
634 panic("ffs_write+IO_EXT");
635 #endif
636
637 seqcount = ap->a_ioflag >> IO_SEQSHIFT;
638 ip = VTOI(vp);
639
640 #ifdef INVARIANTS
641 if (uio->uio_rw != UIO_WRITE)
642 panic("ffs_write: mode");
643 #endif
644
645 switch (vp->v_type) {
646 case VREG:
647 if (ioflag & IO_APPEND)
648 uio->uio_offset = ip->i_size;
649 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
650 return (EPERM);
651 /* FALLTHROUGH */
652 case VLNK:
653 break;
654 case VDIR:
655 panic("ffs_write: dir write");
656 break;
657 default:
658 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
659 (int)uio->uio_offset,
660 (int)uio->uio_resid
661 );
662 }
663
664 KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0"));
665 KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0"));
666 fs = ip->i_fs;
667 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
668 return (EFBIG);
669 /*
670 * Maybe this should be above the vnode op call, but so long as
671 * file servers have no limits, I don't think it matters.
672 */
673 td = uio->uio_td;
674 if (vp->v_type == VREG && td != NULL) {
675 PROC_LOCK(td->td_proc);
676 if (uio->uio_offset + uio->uio_resid >
677 lim_cur(td->td_proc, RLIMIT_FSIZE)) {
678 psignal(td->td_proc, SIGXFSZ);
679 PROC_UNLOCK(td->td_proc);
680 return (EFBIG);
681 }
682 PROC_UNLOCK(td->td_proc);
683 }
684
685 resid = uio->uio_resid;
686 osize = ip->i_size;
687 if (seqcount > BA_SEQMAX)
688 flags = BA_SEQMAX << BA_SEQSHIFT;
689 else
690 flags = seqcount << BA_SEQSHIFT;
691 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
692 flags |= IO_SYNC;
693
694 for (error = 0; uio->uio_resid > 0;) {
695 lbn = lblkno(fs, uio->uio_offset);
696 blkoffset = blkoff(fs, uio->uio_offset);
697 xfersize = fs->fs_bsize - blkoffset;
698 if (uio->uio_resid < xfersize)
699 xfersize = uio->uio_resid;
700 if (uio->uio_offset + xfersize > ip->i_size)
701 vnode_pager_setsize(vp, uio->uio_offset + xfersize);
702
703 /*
704 * We must perform a read-before-write if the transfer size
705 * does not cover the entire buffer.
706 */
707 if (fs->fs_bsize > xfersize)
708 flags |= BA_CLRBUF;
709 else
710 flags &= ~BA_CLRBUF;
711 /* XXX is uio->uio_offset the right thing here? */
712 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
713 ap->a_cred, flags, &bp);
714 if (error != 0)
715 break;
716 /*
717 * If the buffer is not valid we have to clear out any
718 * garbage data from the pages instantiated for the buffer.
719 * If we do not, a failed uiomove() during a write can leave
720 * the prior contents of the pages exposed to a userland
721 * mmap(). XXX deal with uiomove() errors a better way.
722 */
723 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
724 vfs_bio_clrbuf(bp);
725 if (ioflag & IO_DIRECT)
726 bp->b_flags |= B_DIRECT;
727 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
728 bp->b_flags |= B_NOCACHE;
729
730 if (uio->uio_offset + xfersize > ip->i_size) {
731 ip->i_size = uio->uio_offset + xfersize;
732 DIP_SET(ip, i_size, ip->i_size);
733 }
734
735 size = blksize(fs, ip, lbn) - bp->b_resid;
736 if (size < xfersize)
737 xfersize = size;
738
739 error =
740 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
741 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
742 (LIST_EMPTY(&bp->b_dep))) {
743 bp->b_flags |= B_RELBUF;
744 }
745
746 /*
747 * If IO_SYNC each buffer is written synchronously. Otherwise
748 * if we have a severe page deficiency write the buffer
749 * asynchronously. Otherwise try to cluster, and if that
750 * doesn't do it then either do an async write (if O_DIRECT),
751 * or a delayed write (if not).
752 */
753 if (ioflag & IO_SYNC) {
754 (void)bwrite(bp);
755 } else if (vm_page_count_severe() ||
756 buf_dirty_count_severe() ||
757 (ioflag & IO_ASYNC)) {
758 bp->b_flags |= B_CLUSTEROK;
759 bawrite(bp);
760 } else if (xfersize + blkoffset == fs->fs_bsize) {
761 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
762 bp->b_flags |= B_CLUSTEROK;
763 cluster_write(vp, bp, ip->i_size, seqcount);
764 } else {
765 bawrite(bp);
766 }
767 } else if (ioflag & IO_DIRECT) {
768 bp->b_flags |= B_CLUSTEROK;
769 bawrite(bp);
770 } else {
771 bp->b_flags |= B_CLUSTEROK;
772 bdwrite(bp);
773 }
774 if (error || xfersize == 0)
775 break;
776 ip->i_flag |= IN_CHANGE | IN_UPDATE;
777 }
778 /*
779 * If we successfully wrote any data, and we are not the superuser
780 * we clear the setuid and setgid bits as a precaution against
781 * tampering.
782 */
783 if (resid > uio->uio_resid && ap->a_cred &&
784 suser_cred(ap->a_cred, SUSER_ALLOWJAIL)) {
785 ip->i_mode &= ~(ISUID | ISGID);
786 DIP_SET(ip, i_mode, ip->i_mode);
787 }
788 if (error) {
789 if (ioflag & IO_UNIT) {
790 (void)ffs_truncate(vp, osize,
791 IO_NORMAL | (ioflag & IO_SYNC),
792 ap->a_cred, uio->uio_td);
793 uio->uio_offset -= resid - uio->uio_resid;
794 uio->uio_resid = resid;
795 }
796 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
797 error = ffs_update(vp, 1);
798 return (error);
799 }
800
801 /*
802 * get page routine
803 */
804 static int
805 ffs_getpages(ap)
806 struct vop_getpages_args *ap;
807 {
808 int i;
809 vm_page_t mreq;
810 int pcount;
811
812 pcount = round_page(ap->a_count) / PAGE_SIZE;
813 mreq = ap->a_m[ap->a_reqpage];
814
815 /*
816 * if ANY DEV_BSIZE blocks are valid on a large filesystem block,
817 * then the entire page is valid. Since the page may be mapped,
818 * user programs might reference data beyond the actual end of file
819 * occuring within the page. We have to zero that data.
820 */
821 VM_OBJECT_LOCK(mreq->object);
822 if (mreq->valid) {
823 if (mreq->valid != VM_PAGE_BITS_ALL)
824 vm_page_zero_invalid(mreq, TRUE);
825 vm_page_lock_queues();
826 for (i = 0; i < pcount; i++) {
827 if (i != ap->a_reqpage) {
828 vm_page_free(ap->a_m[i]);
829 }
830 }
831 vm_page_unlock_queues();
832 VM_OBJECT_UNLOCK(mreq->object);
833 return VM_PAGER_OK;
834 }
835 VM_OBJECT_UNLOCK(mreq->object);
836
837 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
838 ap->a_count,
839 ap->a_reqpage);
840 }
841
842
843 /*
844 * Extended attribute area reading.
845 */
846 static int
847 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
848 {
849 struct inode *ip;
850 struct ufs2_dinode *dp;
851 struct fs *fs;
852 struct buf *bp;
853 ufs_lbn_t lbn, nextlbn;
854 off_t bytesinfile;
855 long size, xfersize, blkoffset;
856 int error, orig_resid;
857
858 ip = VTOI(vp);
859 fs = ip->i_fs;
860 dp = ip->i_din2;
861
862 #ifdef INVARIANTS
863 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
864 panic("ffs_extread: mode");
865
866 #endif
867 orig_resid = uio->uio_resid;
868 KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0"));
869 if (orig_resid == 0)
870 return (0);
871 KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0"));
872
873 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
874 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
875 break;
876 lbn = lblkno(fs, uio->uio_offset);
877 nextlbn = lbn + 1;
878
879 /*
880 * size of buffer. The buffer representing the
881 * end of the file is rounded up to the size of
882 * the block type ( fragment or full block,
883 * depending ).
884 */
885 size = sblksize(fs, dp->di_extsize, lbn);
886 blkoffset = blkoff(fs, uio->uio_offset);
887
888 /*
889 * The amount we want to transfer in this iteration is
890 * one FS block less the amount of the data before
891 * our startpoint (duh!)
892 */
893 xfersize = fs->fs_bsize - blkoffset;
894
895 /*
896 * But if we actually want less than the block,
897 * or the file doesn't have a whole block more of data,
898 * then use the lesser number.
899 */
900 if (uio->uio_resid < xfersize)
901 xfersize = uio->uio_resid;
902 if (bytesinfile < xfersize)
903 xfersize = bytesinfile;
904
905 if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
906 /*
907 * Don't do readahead if this is the end of the info.
908 */
909 error = bread(vp, -1 - lbn, size, NOCRED, &bp);
910 } else {
911 /*
912 * If we have a second block, then
913 * fire off a request for a readahead
914 * as well as a read. Note that the 4th and 5th
915 * arguments point to arrays of the size specified in
916 * the 6th argument.
917 */
918 int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
919
920 nextlbn = -1 - nextlbn;
921 error = breadn(vp, -1 - lbn,
922 size, &nextlbn, &nextsize, 1, NOCRED, &bp);
923 }
924 if (error) {
925 brelse(bp);
926 bp = NULL;
927 break;
928 }
929
930 /*
931 * If IO_DIRECT then set B_DIRECT for the buffer. This
932 * will cause us to attempt to release the buffer later on
933 * and will cause the buffer cache to attempt to free the
934 * underlying pages.
935 */
936 if (ioflag & IO_DIRECT)
937 bp->b_flags |= B_DIRECT;
938
939 /*
940 * We should only get non-zero b_resid when an I/O error
941 * has occurred, which should cause us to break above.
942 * However, if the short read did not cause an error,
943 * then we want to ensure that we do not uiomove bad
944 * or uninitialized data.
945 */
946 size -= bp->b_resid;
947 if (size < xfersize) {
948 if (size == 0)
949 break;
950 xfersize = size;
951 }
952
953 error = uiomove((char *)bp->b_data + blkoffset,
954 (int)xfersize, uio);
955 if (error)
956 break;
957
958 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
959 (LIST_EMPTY(&bp->b_dep))) {
960 /*
961 * If there are no dependencies, and it's VMIO,
962 * then we don't need the buf, mark it available
963 * for freeing. The VM has the data.
964 */
965 bp->b_flags |= B_RELBUF;
966 brelse(bp);
967 } else {
968 /*
969 * Otherwise let whoever
970 * made the request take care of
971 * freeing it. We just queue
972 * it onto another list.
973 */
974 bqrelse(bp);
975 }
976 }
977
978 /*
979 * This can only happen in the case of an error
980 * because the loop above resets bp to NULL on each iteration
981 * and on normal completion has not set a new value into it.
982 * so it must have come from a 'break' statement
983 */
984 if (bp != NULL) {
985 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
986 (LIST_EMPTY(&bp->b_dep))) {
987 bp->b_flags |= B_RELBUF;
988 brelse(bp);
989 } else {
990 bqrelse(bp);
991 }
992 }
993
994 if ((error == 0 || uio->uio_resid != orig_resid) &&
995 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
996 VI_LOCK(vp);
997 ip->i_flag |= IN_ACCESS;
998 VI_UNLOCK(vp);
999 }
1000 return (error);
1001 }
1002
1003 /*
1004 * Extended attribute area writing.
1005 */
1006 static int
1007 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
1008 {
1009 struct inode *ip;
1010 struct ufs2_dinode *dp;
1011 struct fs *fs;
1012 struct buf *bp;
1013 ufs_lbn_t lbn;
1014 off_t osize;
1015 int blkoffset, error, flags, resid, size, xfersize;
1016
1017 ip = VTOI(vp);
1018 fs = ip->i_fs;
1019 dp = ip->i_din2;
1020
1021 KASSERT(!(ip->i_flag & IN_SPACECOUNTED), ("inode %u: inode is dead",
1022 ip->i_number));
1023
1024 #ifdef INVARIANTS
1025 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
1026 panic("ffs_extwrite: mode");
1027 #endif
1028
1029 if (ioflag & IO_APPEND)
1030 uio->uio_offset = dp->di_extsize;
1031 KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0"));
1032 KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0"));
1033 if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize)
1034 return (EFBIG);
1035
1036 resid = uio->uio_resid;
1037 osize = dp->di_extsize;
1038 flags = IO_EXT;
1039 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
1040 flags |= IO_SYNC;
1041
1042 for (error = 0; uio->uio_resid > 0;) {
1043 lbn = lblkno(fs, uio->uio_offset);
1044 blkoffset = blkoff(fs, uio->uio_offset);
1045 xfersize = fs->fs_bsize - blkoffset;
1046 if (uio->uio_resid < xfersize)
1047 xfersize = uio->uio_resid;
1048
1049 /*
1050 * We must perform a read-before-write if the transfer size
1051 * does not cover the entire buffer.
1052 */
1053 if (fs->fs_bsize > xfersize)
1054 flags |= BA_CLRBUF;
1055 else
1056 flags &= ~BA_CLRBUF;
1057 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
1058 ucred, flags, &bp);
1059 if (error != 0)
1060 break;
1061 /*
1062 * If the buffer is not valid we have to clear out any
1063 * garbage data from the pages instantiated for the buffer.
1064 * If we do not, a failed uiomove() during a write can leave
1065 * the prior contents of the pages exposed to a userland
1066 * mmap(). XXX deal with uiomove() errors a better way.
1067 */
1068 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
1069 vfs_bio_clrbuf(bp);
1070 if (ioflag & IO_DIRECT)
1071 bp->b_flags |= B_DIRECT;
1072
1073 if (uio->uio_offset + xfersize > dp->di_extsize)
1074 dp->di_extsize = uio->uio_offset + xfersize;
1075
1076 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
1077 if (size < xfersize)
1078 xfersize = size;
1079
1080 error =
1081 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
1082 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
1083 (LIST_EMPTY(&bp->b_dep))) {
1084 bp->b_flags |= B_RELBUF;
1085 }
1086
1087 /*
1088 * If IO_SYNC each buffer is written synchronously. Otherwise
1089 * if we have a severe page deficiency write the buffer
1090 * asynchronously. Otherwise try to cluster, and if that
1091 * doesn't do it then either do an async write (if O_DIRECT),
1092 * or a delayed write (if not).
1093 */
1094 if (ioflag & IO_SYNC) {
1095 (void)bwrite(bp);
1096 } else if (vm_page_count_severe() ||
1097 buf_dirty_count_severe() ||
1098 xfersize + blkoffset == fs->fs_bsize ||
1099 (ioflag & (IO_ASYNC | IO_DIRECT)))
1100 bawrite(bp);
1101 else
1102 bdwrite(bp);
1103 if (error || xfersize == 0)
1104 break;
1105 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1106 }
1107 /*
1108 * If we successfully wrote any data, and we are not the superuser
1109 * we clear the setuid and setgid bits as a precaution against
1110 * tampering.
1111 */
1112 if (resid > uio->uio_resid && ucred &&
1113 suser_cred(ucred, SUSER_ALLOWJAIL)) {
1114 ip->i_mode &= ~(ISUID | ISGID);
1115 dp->di_mode = ip->i_mode;
1116 }
1117 if (error) {
1118 if (ioflag & IO_UNIT) {
1119 (void)ffs_truncate(vp, osize,
1120 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td);
1121 uio->uio_offset -= resid - uio->uio_resid;
1122 uio->uio_resid = resid;
1123 }
1124 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
1125 error = ffs_update(vp, 1);
1126 return (error);
1127 }
1128
1129
1130 /*
1131 * Vnode operating to retrieve a named extended attribute.
1132 *
1133 * Locate a particular EA (nspace:name) in the area (ptr:length), and return
1134 * the length of the EA, and possibly the pointer to the entry and to the data.
1135 */
1136 static int
1137 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac)
1138 {
1139 u_char *p, *pe, *pn, *p0;
1140 int eapad1, eapad2, ealength, ealen, nlen;
1141 uint32_t ul;
1142
1143 pe = ptr + length;
1144 nlen = strlen(name);
1145
1146 for (p = ptr; p < pe; p = pn) {
1147 p0 = p;
1148 bcopy(p, &ul, sizeof(ul));
1149 pn = p + ul;
1150 /* make sure this entry is complete */
1151 if (pn > pe)
1152 break;
1153 p += sizeof(uint32_t);
1154 if (*p != nspace)
1155 continue;
1156 p++;
1157 eapad2 = *p++;
1158 if (*p != nlen)
1159 continue;
1160 p++;
1161 if (bcmp(p, name, nlen))
1162 continue;
1163 ealength = sizeof(uint32_t) + 3 + nlen;
1164 eapad1 = 8 - (ealength % 8);
1165 if (eapad1 == 8)
1166 eapad1 = 0;
1167 ealength += eapad1;
1168 ealen = ul - ealength - eapad2;
1169 p += nlen + eapad1;
1170 if (eap != NULL)
1171 *eap = p0;
1172 if (eac != NULL)
1173 *eac = p;
1174 return (ealen);
1175 }
1176 return(-1);
1177 }
1178
1179 static int
1180 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra)
1181 {
1182 struct inode *ip;
1183 struct ufs2_dinode *dp;
1184 struct fs *fs;
1185 struct uio luio;
1186 struct iovec liovec;
1187 int easize, error;
1188 u_char *eae;
1189
1190 ip = VTOI(vp);
1191 fs = ip->i_fs;
1192 dp = ip->i_din2;
1193 easize = dp->di_extsize;
1194 if ((uoff_t)easize + extra > NXADDR * fs->fs_bsize)
1195 return (EFBIG);
1196
1197 eae = malloc(easize + extra, M_TEMP, M_WAITOK);
1198
1199 liovec.iov_base = eae;
1200 liovec.iov_len = easize;
1201 luio.uio_iov = &liovec;
1202 luio.uio_iovcnt = 1;
1203 luio.uio_offset = 0;
1204 luio.uio_resid = easize;
1205 luio.uio_segflg = UIO_SYSSPACE;
1206 luio.uio_rw = UIO_READ;
1207 luio.uio_td = td;
1208
1209 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
1210 if (error) {
1211 free(eae, M_TEMP);
1212 return(error);
1213 }
1214 *p = eae;
1215 return (0);
1216 }
1217
1218 static int
1219 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
1220 {
1221 struct inode *ip;
1222 struct ufs2_dinode *dp;
1223 int error;
1224
1225 ip = VTOI(vp);
1226
1227 if (ip->i_ea_area != NULL)
1228 return (EBUSY);
1229 dp = ip->i_din2;
1230 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0);
1231 if (error)
1232 return (error);
1233 ip->i_ea_len = dp->di_extsize;
1234 ip->i_ea_error = 0;
1235 return (0);
1236 }
1237
1238 /*
1239 * Vnode extattr transaction commit/abort
1240 */
1241 static int
1242 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
1243 {
1244 struct inode *ip;
1245 struct uio luio;
1246 struct iovec liovec;
1247 int error;
1248 struct ufs2_dinode *dp;
1249
1250 ip = VTOI(vp);
1251 if (ip->i_ea_area == NULL)
1252 return (EINVAL);
1253 dp = ip->i_din2;
1254 error = ip->i_ea_error;
1255 if (commit && error == 0) {
1256 if (cred == NOCRED)
1257 cred = vp->v_mount->mnt_cred;
1258 liovec.iov_base = ip->i_ea_area;
1259 liovec.iov_len = ip->i_ea_len;
1260 luio.uio_iov = &liovec;
1261 luio.uio_iovcnt = 1;
1262 luio.uio_offset = 0;
1263 luio.uio_resid = ip->i_ea_len;
1264 luio.uio_segflg = UIO_SYSSPACE;
1265 luio.uio_rw = UIO_WRITE;
1266 luio.uio_td = td;
1267 /* XXX: I'm not happy about truncating to zero size */
1268 if (ip->i_ea_len < dp->di_extsize)
1269 error = ffs_truncate(vp, 0, IO_EXT, cred, td);
1270 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
1271 }
1272 free(ip->i_ea_area, M_TEMP);
1273 ip->i_ea_area = NULL;
1274 ip->i_ea_len = 0;
1275 ip->i_ea_error = 0;
1276 return (error);
1277 }
1278
1279 /*
1280 * Vnode extattr strategy routine for fifos.
1281 *
1282 * We need to check for a read or write of the external attributes.
1283 * Otherwise we just fall through and do the usual thing.
1284 */
1285 static int
1286 ffsext_strategy(struct vop_strategy_args *ap)
1287 /*
1288 struct vop_strategy_args {
1289 struct vnodeop_desc *a_desc;
1290 struct vnode *a_vp;
1291 struct buf *a_bp;
1292 };
1293 */
1294 {
1295 struct vnode *vp;
1296 daddr_t lbn;
1297
1298 vp = ap->a_vp;
1299 lbn = ap->a_bp->b_lblkno;
1300 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC &&
1301 lbn < 0 && lbn >= -NXADDR)
1302 return (VOP_STRATEGY_APV(&ufs_vnodeops, ap));
1303 if (vp->v_type == VFIFO)
1304 return (VOP_STRATEGY_APV(&ufs_fifoops, ap));
1305 panic("spec nodes went here");
1306 }
1307
1308 /*
1309 * Vnode extattr transaction commit/abort
1310 */
1311 static int
1312 ffs_openextattr(struct vop_openextattr_args *ap)
1313 /*
1314 struct vop_openextattr_args {
1315 struct vnodeop_desc *a_desc;
1316 struct vnode *a_vp;
1317 IN struct ucred *a_cred;
1318 IN struct thread *a_td;
1319 };
1320 */
1321 {
1322 struct inode *ip;
1323 struct fs *fs;
1324
1325 ip = VTOI(ap->a_vp);
1326 fs = ip->i_fs;
1327
1328 if (ap->a_vp->v_type == VCHR)
1329 return (EOPNOTSUPP);
1330
1331 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
1332 }
1333
1334
1335 /*
1336 * Vnode extattr transaction commit/abort
1337 */
1338 static int
1339 ffs_closeextattr(struct vop_closeextattr_args *ap)
1340 /*
1341 struct vop_closeextattr_args {
1342 struct vnodeop_desc *a_desc;
1343 struct vnode *a_vp;
1344 int a_commit;
1345 IN struct ucred *a_cred;
1346 IN struct thread *a_td;
1347 };
1348 */
1349 {
1350 struct inode *ip;
1351 struct fs *fs;
1352
1353 ip = VTOI(ap->a_vp);
1354 fs = ip->i_fs;
1355
1356 if (ap->a_vp->v_type == VCHR)
1357 return (EOPNOTSUPP);
1358
1359 if (ap->a_commit && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY))
1360 return (EROFS);
1361
1362 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td));
1363 }
1364
1365 /*
1366 * Vnode operation to remove a named attribute.
1367 */
1368 static int
1369 ffs_deleteextattr(struct vop_deleteextattr_args *ap)
1370 /*
1371 vop_deleteextattr {
1372 IN struct vnode *a_vp;
1373 IN int a_attrnamespace;
1374 IN const char *a_name;
1375 IN struct ucred *a_cred;
1376 IN struct thread *a_td;
1377 };
1378 */
1379 {
1380 struct inode *ip;
1381 struct fs *fs;
1382 uint32_t ealength, ul;
1383 int ealen, olen, eapad1, eapad2, error, i, easize;
1384 u_char *eae, *p;
1385 int stand_alone;
1386
1387 ip = VTOI(ap->a_vp);
1388 fs = ip->i_fs;
1389
1390 if (ap->a_vp->v_type == VCHR)
1391 return (EOPNOTSUPP);
1392
1393 if (strlen(ap->a_name) == 0)
1394 return (EINVAL);
1395
1396 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1397 return (EROFS);
1398
1399 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1400 ap->a_cred, ap->a_td, IWRITE);
1401 if (error) {
1402 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1403 ip->i_ea_error = error;
1404 return (error);
1405 }
1406
1407 if (ip->i_ea_area == NULL) {
1408 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1409 if (error)
1410 return (error);
1411 stand_alone = 1;
1412 } else {
1413 stand_alone = 0;
1414 }
1415
1416 ealength = eapad1 = ealen = eapad2 = 0;
1417
1418 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
1419 bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1420 easize = ip->i_ea_len;
1421
1422 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1423 &p, NULL);
1424 if (olen == -1) {
1425 /* delete but nonexistent */
1426 free(eae, M_TEMP);
1427 if (stand_alone)
1428 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1429 return(ENOATTR);
1430 }
1431 bcopy(p, &ul, sizeof ul);
1432 i = p - eae + ul;
1433 if (ul != ealength) {
1434 bcopy(p + ul, p + ealength, easize - i);
1435 easize += (ealength - ul);
1436 }
1437 if (easize > NXADDR * fs->fs_bsize) {
1438 free(eae, M_TEMP);
1439 if (stand_alone)
1440 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1441 else if (ip->i_ea_error == 0)
1442 ip->i_ea_error = ENOSPC;
1443 return(ENOSPC);
1444 }
1445 p = ip->i_ea_area;
1446 ip->i_ea_area = eae;
1447 ip->i_ea_len = easize;
1448 free(p, M_TEMP);
1449 if (stand_alone)
1450 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1451 return(error);
1452 }
1453
1454 /*
1455 * Vnode operation to retrieve a named extended attribute.
1456 */
1457 static int
1458 ffs_getextattr(struct vop_getextattr_args *ap)
1459 /*
1460 vop_getextattr {
1461 IN struct vnode *a_vp;
1462 IN int a_attrnamespace;
1463 IN const char *a_name;
1464 INOUT struct uio *a_uio;
1465 OUT size_t *a_size;
1466 IN struct ucred *a_cred;
1467 IN struct thread *a_td;
1468 };
1469 */
1470 {
1471 struct inode *ip;
1472 struct fs *fs;
1473 u_char *eae, *p;
1474 unsigned easize;
1475 int error, ealen, stand_alone;
1476
1477 ip = VTOI(ap->a_vp);
1478 fs = ip->i_fs;
1479
1480 if (ap->a_vp->v_type == VCHR)
1481 return (EOPNOTSUPP);
1482
1483 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1484 ap->a_cred, ap->a_td, IREAD);
1485 if (error)
1486 return (error);
1487
1488 if (ip->i_ea_area == NULL) {
1489 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1490 if (error)
1491 return (error);
1492 stand_alone = 1;
1493 } else {
1494 stand_alone = 0;
1495 }
1496 eae = ip->i_ea_area;
1497 easize = ip->i_ea_len;
1498
1499 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1500 NULL, &p);
1501 if (ealen >= 0) {
1502 error = 0;
1503 if (ap->a_size != NULL)
1504 *ap->a_size = ealen;
1505 else if (ap->a_uio != NULL)
1506 error = uiomove(p, ealen, ap->a_uio);
1507 } else
1508 error = ENOATTR;
1509 if (stand_alone)
1510 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1511 return(error);
1512 }
1513
1514 /*
1515 * Vnode operation to retrieve extended attributes on a vnode.
1516 */
1517 static int
1518 ffs_listextattr(struct vop_listextattr_args *ap)
1519 /*
1520 vop_listextattr {
1521 IN struct vnode *a_vp;
1522 IN int a_attrnamespace;
1523 INOUT struct uio *a_uio;
1524 OUT size_t *a_size;
1525 IN struct ucred *a_cred;
1526 IN struct thread *a_td;
1527 };
1528 */
1529 {
1530 struct inode *ip;
1531 struct fs *fs;
1532 u_char *eae, *p, *pe, *pn;
1533 unsigned easize;
1534 uint32_t ul;
1535 int error, ealen, stand_alone;
1536
1537 ip = VTOI(ap->a_vp);
1538 fs = ip->i_fs;
1539
1540 if (ap->a_vp->v_type == VCHR)
1541 return (EOPNOTSUPP);
1542
1543 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1544 ap->a_cred, ap->a_td, IREAD);
1545 if (error)
1546 return (error);
1547
1548 if (ip->i_ea_area == NULL) {
1549 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1550 if (error)
1551 return (error);
1552 stand_alone = 1;
1553 } else {
1554 stand_alone = 0;
1555 }
1556 eae = ip->i_ea_area;
1557 easize = ip->i_ea_len;
1558
1559 error = 0;
1560 if (ap->a_size != NULL)
1561 *ap->a_size = 0;
1562 pe = eae + easize;
1563 for(p = eae; error == 0 && p < pe; p = pn) {
1564 bcopy(p, &ul, sizeof(ul));
1565 pn = p + ul;
1566 if (pn > pe)
1567 break;
1568 p += sizeof(ul);
1569 if (*p++ != ap->a_attrnamespace)
1570 continue;
1571 p++; /* pad2 */
1572 ealen = *p;
1573 if (ap->a_size != NULL) {
1574 *ap->a_size += ealen + 1;
1575 } else if (ap->a_uio != NULL) {
1576 error = uiomove(p, ealen + 1, ap->a_uio);
1577 }
1578 }
1579 if (stand_alone)
1580 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1581 return(error);
1582 }
1583
1584 /*
1585 * Vnode operation to set a named attribute.
1586 */
1587 static int
1588 ffs_setextattr(struct vop_setextattr_args *ap)
1589 /*
1590 vop_setextattr {
1591 IN struct vnode *a_vp;
1592 IN int a_attrnamespace;
1593 IN const char *a_name;
1594 INOUT struct uio *a_uio;
1595 IN struct ucred *a_cred;
1596 IN struct thread *a_td;
1597 };
1598 */
1599 {
1600 struct inode *ip;
1601 struct fs *fs;
1602 uint32_t ealength, ul;
1603 int ealen, olen, eapad1, eapad2, error, i, easize;
1604 u_char *eae, *p;
1605 int stand_alone;
1606
1607 ip = VTOI(ap->a_vp);
1608 fs = ip->i_fs;
1609
1610 if (ap->a_vp->v_type == VCHR)
1611 return (EOPNOTSUPP);
1612
1613 if (strlen(ap->a_name) == 0)
1614 return (EINVAL);
1615
1616 /* XXX Now unsupported API to delete EAs using NULL uio. */
1617 if (ap->a_uio == NULL)
1618 return (EOPNOTSUPP);
1619
1620 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1621 return (EROFS);
1622
1623 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1624 ap->a_cred, ap->a_td, IWRITE);
1625 if (error) {
1626 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1627 ip->i_ea_error = error;
1628 return (error);
1629 }
1630
1631 if (ip->i_ea_area == NULL) {
1632 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1633 if (error)
1634 return (error);
1635 stand_alone = 1;
1636 } else {
1637 stand_alone = 0;
1638 }
1639
1640 ealen = ap->a_uio->uio_resid;
1641 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
1642 eapad1 = 8 - (ealength % 8);
1643 if (eapad1 == 8)
1644 eapad1 = 0;
1645 eapad2 = 8 - (ealen % 8);
1646 if (eapad2 == 8)
1647 eapad2 = 0;
1648 ealength += eapad1 + ealen + eapad2;
1649
1650 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
1651 bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1652 easize = ip->i_ea_len;
1653
1654 olen = ffs_findextattr(eae, easize,
1655 ap->a_attrnamespace, ap->a_name, &p, NULL);
1656 if (olen == -1) {
1657 /* new, append at end */
1658 p = eae + easize;
1659 easize += ealength;
1660 } else {
1661 bcopy(p, &ul, sizeof ul);
1662 i = p - eae + ul;
1663 if (ul != ealength) {
1664 bcopy(p + ul, p + ealength, easize - i);
1665 easize += (ealength - ul);
1666 }
1667 }
1668 if (easize > NXADDR * fs->fs_bsize) {
1669 free(eae, M_TEMP);
1670 if (stand_alone)
1671 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1672 else if (ip->i_ea_error == 0)
1673 ip->i_ea_error = ENOSPC;
1674 return(ENOSPC);
1675 }
1676 bcopy(&ealength, p, sizeof(ealength));
1677 p += sizeof(ealength);
1678 *p++ = ap->a_attrnamespace;
1679 *p++ = eapad2;
1680 *p++ = strlen(ap->a_name);
1681 strcpy(p, ap->a_name);
1682 p += strlen(ap->a_name);
1683 bzero(p, eapad1);
1684 p += eapad1;
1685 error = uiomove(p, ealen, ap->a_uio);
1686 if (error) {
1687 free(eae, M_TEMP);
1688 if (stand_alone)
1689 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1690 else if (ip->i_ea_error == 0)
1691 ip->i_ea_error = error;
1692 return(error);
1693 }
1694 p += ealen;
1695 bzero(p, eapad2);
1696
1697 p = ip->i_ea_area;
1698 ip->i_ea_area = eae;
1699 ip->i_ea_len = easize;
1700 free(p, M_TEMP);
1701 if (stand_alone)
1702 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1703 return(error);
1704 }
Cache object: 0ff6f429c4262ac7781c925a62da5157
|