1 /*-
2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
60 * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
61 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95
62 */
63
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD: releng/6.2/sys/ufs/ffs/ffs_vnops.c 164062 2006-11-07 16:56:11Z kib $");
66
67 #include <sys/param.h>
68 #include <sys/bio.h>
69 #include <sys/systm.h>
70 #include <sys/buf.h>
71 #include <sys/conf.h>
72 #include <sys/extattr.h>
73 #include <sys/kernel.h>
74 #include <sys/limits.h>
75 #include <sys/malloc.h>
76 #include <sys/mount.h>
77 #include <sys/proc.h>
78 #include <sys/resourcevar.h>
79 #include <sys/signalvar.h>
80 #include <sys/stat.h>
81 #include <sys/vmmeter.h>
82 #include <sys/vnode.h>
83
84 #include <vm/vm.h>
85 #include <vm/vm_extern.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_page.h>
88 #include <vm/vm_pager.h>
89 #include <vm/vnode_pager.h>
90
91 #include <ufs/ufs/extattr.h>
92 #include <ufs/ufs/quota.h>
93 #include <ufs/ufs/inode.h>
94 #include <ufs/ufs/ufs_extern.h>
95 #include <ufs/ufs/ufsmount.h>
96
97 #include <ufs/ffs/fs.h>
98 #include <ufs/ffs/ffs_extern.h>
99 #include "opt_directio.h"
100 #include "opt_ffs.h"
101
102 #ifdef DIRECTIO
103 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
104 #endif
105 static vop_fsync_t ffs_fsync;
106 static vop_lock_t ffs_lock;
107 static vop_getpages_t ffs_getpages;
108 static vop_read_t ffs_read;
109 static vop_write_t ffs_write;
110 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
111 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
112 struct ucred *cred);
113 static vop_strategy_t ffsext_strategy;
114 static vop_closeextattr_t ffs_closeextattr;
115 static vop_deleteextattr_t ffs_deleteextattr;
116 static vop_getextattr_t ffs_getextattr;
117 static vop_listextattr_t ffs_listextattr;
118 static vop_openextattr_t ffs_openextattr;
119 static vop_setextattr_t ffs_setextattr;
120
121
122 /* Global vfs data structures for ufs. */
123 struct vop_vector ffs_vnodeops1 = {
124 .vop_default = &ufs_vnodeops,
125 .vop_fsync = ffs_fsync,
126 .vop_getpages = ffs_getpages,
127 .vop_lock = ffs_lock,
128 .vop_read = ffs_read,
129 .vop_reallocblks = ffs_reallocblks,
130 .vop_write = ffs_write,
131 };
132
133 struct vop_vector ffs_fifoops1 = {
134 .vop_default = &ufs_fifoops,
135 .vop_fsync = ffs_fsync,
136 .vop_reallocblks = ffs_reallocblks, /* XXX: really ??? */
137 };
138
139 /* Global vfs data structures for ufs. */
140 struct vop_vector ffs_vnodeops2 = {
141 .vop_default = &ufs_vnodeops,
142 .vop_fsync = ffs_fsync,
143 .vop_getpages = ffs_getpages,
144 .vop_lock = ffs_lock,
145 .vop_read = ffs_read,
146 .vop_reallocblks = ffs_reallocblks,
147 .vop_write = ffs_write,
148 .vop_closeextattr = ffs_closeextattr,
149 .vop_deleteextattr = ffs_deleteextattr,
150 .vop_getextattr = ffs_getextattr,
151 .vop_listextattr = ffs_listextattr,
152 .vop_openextattr = ffs_openextattr,
153 .vop_setextattr = ffs_setextattr,
154 };
155
156 struct vop_vector ffs_fifoops2 = {
157 .vop_default = &ufs_fifoops,
158 .vop_fsync = ffs_fsync,
159 .vop_lock = ffs_lock,
160 .vop_reallocblks = ffs_reallocblks,
161 .vop_strategy = ffsext_strategy,
162 .vop_closeextattr = ffs_closeextattr,
163 .vop_deleteextattr = ffs_deleteextattr,
164 .vop_getextattr = ffs_getextattr,
165 .vop_listextattr = ffs_listextattr,
166 .vop_openextattr = ffs_openextattr,
167 .vop_setextattr = ffs_setextattr,
168 };
169
170 /*
171 * Synch an open file.
172 */
173 /* ARGSUSED */
174 static int
175 ffs_fsync(struct vop_fsync_args *ap)
176 {
177 int error;
178
179 error = ffs_syncvnode(ap->a_vp, ap->a_waitfor);
180 if (error)
181 return (error);
182 if (ap->a_waitfor == MNT_WAIT &&
183 (ap->a_vp->v_mount->mnt_flag & MNT_SOFTDEP))
184 error = softdep_fsync(ap->a_vp);
185 return (error);
186 }
187
188 int
189 ffs_syncvnode(struct vnode *vp, int waitfor)
190 {
191 struct inode *ip = VTOI(vp);
192 struct buf *bp;
193 struct buf *nbp;
194 int s, error, wait, passes, skipmeta;
195 ufs_lbn_t lbn;
196
197 wait = (waitfor == MNT_WAIT);
198 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
199
200 /*
201 * Flush all dirty buffers associated with a vnode.
202 */
203 passes = NIADDR + 1;
204 skipmeta = 0;
205 if (wait)
206 skipmeta = 1;
207 s = splbio();
208 VI_LOCK(vp);
209 loop:
210 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs)
211 bp->b_vflags &= ~BV_SCANNED;
212 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
213 /*
214 * Reasons to skip this buffer: it has already been considered
215 * on this pass, this pass is the first time through on a
216 * synchronous flush request and the buffer being considered
217 * is metadata, the buffer has dependencies that will cause
218 * it to be redirtied and it has not already been deferred,
219 * or it is already being written.
220 */
221 if ((bp->b_vflags & BV_SCANNED) != 0)
222 continue;
223 bp->b_vflags |= BV_SCANNED;
224 if ((skipmeta == 1 && bp->b_lblkno < 0))
225 continue;
226 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
227 continue;
228 VI_UNLOCK(vp);
229 if (!wait && LIST_FIRST(&bp->b_dep) != NULL &&
230 (bp->b_flags & B_DEFERRED) == 0 &&
231 buf_countdeps(bp, 0)) {
232 bp->b_flags |= B_DEFERRED;
233 BUF_UNLOCK(bp);
234 VI_LOCK(vp);
235 continue;
236 }
237 if ((bp->b_flags & B_DELWRI) == 0)
238 panic("ffs_fsync: not dirty");
239 /*
240 * If this is a synchronous flush request, or it is not a
241 * file or device, start the write on this buffer immediatly.
242 */
243 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) {
244
245 /*
246 * On our final pass through, do all I/O synchronously
247 * so that we can find out if our flush is failing
248 * because of write errors.
249 */
250 if (passes > 0 || !wait) {
251 if ((bp->b_flags & B_CLUSTEROK) && !wait) {
252 (void) vfs_bio_awrite(bp);
253 } else {
254 bremfree(bp);
255 splx(s);
256 (void) bawrite(bp);
257 s = splbio();
258 }
259 } else {
260 bremfree(bp);
261 splx(s);
262 if ((error = bwrite(bp)) != 0)
263 return (error);
264 s = splbio();
265 }
266 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) {
267 /*
268 * If the buffer is for data that has been truncated
269 * off the file, then throw it away.
270 */
271 bremfree(bp);
272 bp->b_flags |= B_INVAL | B_NOCACHE;
273 splx(s);
274 brelse(bp);
275 s = splbio();
276 } else
277 vfs_bio_awrite(bp);
278
279 /*
280 * Since we may have slept during the I/O, we need
281 * to start from a known point.
282 */
283 VI_LOCK(vp);
284 nbp = TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd);
285 }
286 /*
287 * If we were asked to do this synchronously, then go back for
288 * another pass, this time doing the metadata.
289 */
290 if (skipmeta) {
291 skipmeta = 0;
292 goto loop;
293 }
294
295 if (wait) {
296 bufobj_wwait(&vp->v_bufobj, 3, 0);
297 VI_UNLOCK(vp);
298
299 /*
300 * Ensure that any filesystem metatdata associated
301 * with the vnode has been written.
302 */
303 splx(s);
304 if ((error = softdep_sync_metadata(vp)) != 0)
305 return (error);
306 s = splbio();
307
308 VI_LOCK(vp);
309 if (vp->v_bufobj.bo_dirty.bv_cnt > 0) {
310 /*
311 * Block devices associated with filesystems may
312 * have new I/O requests posted for them even if
313 * the vnode is locked, so no amount of trying will
314 * get them clean. Thus we give block devices a
315 * good effort, then just give up. For all other file
316 * types, go around and try again until it is clean.
317 */
318 if (passes > 0) {
319 passes -= 1;
320 goto loop;
321 }
322 #ifdef DIAGNOSTIC
323 if (!vn_isdisk(vp, NULL))
324 vprint("ffs_fsync: dirty", vp);
325 #endif
326 }
327 }
328 VI_UNLOCK(vp);
329 splx(s);
330 return (ffs_update(vp, wait));
331 }
332
333 static int
334 ffs_lock(ap)
335 struct vop_lock_args /* {
336 struct vnode *a_vp;
337 int a_flags;
338 struct thread *a_td;
339 } */ *ap;
340 {
341 #ifndef NO_FFS_SNAPSHOT
342 struct vnode *vp;
343 int flags;
344 struct lock *lkp;
345 int result;
346
347 switch (ap->a_flags & LK_TYPE_MASK) {
348 case LK_SHARED:
349 case LK_UPGRADE:
350 case LK_EXCLUSIVE:
351 vp = ap->a_vp;
352 flags = ap->a_flags;
353 for (;;) {
354 /*
355 * vnode interlock must be held to ensure that
356 * the possibly external lock isn't freed,
357 * e.g. when mutating from snapshot file vnode
358 * to regular file vnode.
359 */
360 if ((flags & LK_INTERLOCK) == 0) {
361 VI_LOCK(vp);
362 flags |= LK_INTERLOCK;
363 }
364 lkp = vp->v_vnlock;
365 result = lockmgr(lkp, flags, VI_MTX(vp), ap->a_td);
366 if (lkp == vp->v_vnlock || result != 0)
367 break;
368 /*
369 * Apparent success, except that the vnode
370 * mutated between snapshot file vnode and
371 * regular file vnode while this process
372 * slept. The lock currently held is not the
373 * right lock. Release it, and try to get the
374 * new lock.
375 */
376 (void) lockmgr(lkp, LK_RELEASE, VI_MTX(vp), ap->a_td);
377 if ((flags & LK_TYPE_MASK) == LK_UPGRADE)
378 flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
379 flags &= ~LK_INTERLOCK;
380 }
381 break;
382 default:
383 result = VOP_LOCK_APV(&ufs_vnodeops, ap);
384 }
385 return (result);
386 #else
387 return (VOP_LOCK_APV(&ufs_vnodeops, ap));
388 #endif
389 }
390
391 /*
392 * Vnode op for reading.
393 */
394 /* ARGSUSED */
395 static int
396 ffs_read(ap)
397 struct vop_read_args /* {
398 struct vnode *a_vp;
399 struct uio *a_uio;
400 int a_ioflag;
401 struct ucred *a_cred;
402 } */ *ap;
403 {
404 struct vnode *vp;
405 struct inode *ip;
406 struct uio *uio;
407 struct fs *fs;
408 struct buf *bp;
409 ufs_lbn_t lbn, nextlbn;
410 off_t bytesinfile;
411 long size, xfersize, blkoffset;
412 int error, orig_resid;
413 int seqcount;
414 int ioflag;
415
416 vp = ap->a_vp;
417 uio = ap->a_uio;
418 ioflag = ap->a_ioflag;
419 if (ap->a_ioflag & IO_EXT)
420 #ifdef notyet
421 return (ffs_extread(vp, uio, ioflag));
422 #else
423 panic("ffs_read+IO_EXT");
424 #endif
425 #ifdef DIRECTIO
426 if ((ioflag & IO_DIRECT) != 0) {
427 int workdone;
428
429 error = ffs_rawread(vp, uio, &workdone);
430 if (error != 0 || workdone != 0)
431 return error;
432 }
433 #endif
434
435 seqcount = ap->a_ioflag >> IO_SEQSHIFT;
436 ip = VTOI(vp);
437
438 #ifdef DIAGNOSTIC
439 if (uio->uio_rw != UIO_READ)
440 panic("ffs_read: mode");
441
442 if (vp->v_type == VLNK) {
443 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
444 panic("ffs_read: short symlink");
445 } else if (vp->v_type != VREG && vp->v_type != VDIR)
446 panic("ffs_read: type %d", vp->v_type);
447 #endif
448 orig_resid = uio->uio_resid;
449 KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0"));
450 if (orig_resid == 0)
451 return (0);
452 KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0"));
453 fs = ip->i_fs;
454 if (uio->uio_offset < ip->i_size &&
455 uio->uio_offset >= fs->fs_maxfilesize)
456 return (EOVERFLOW);
457
458 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
459 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
460 break;
461 lbn = lblkno(fs, uio->uio_offset);
462 nextlbn = lbn + 1;
463
464 /*
465 * size of buffer. The buffer representing the
466 * end of the file is rounded up to the size of
467 * the block type ( fragment or full block,
468 * depending ).
469 */
470 size = blksize(fs, ip, lbn);
471 blkoffset = blkoff(fs, uio->uio_offset);
472
473 /*
474 * The amount we want to transfer in this iteration is
475 * one FS block less the amount of the data before
476 * our startpoint (duh!)
477 */
478 xfersize = fs->fs_bsize - blkoffset;
479
480 /*
481 * But if we actually want less than the block,
482 * or the file doesn't have a whole block more of data,
483 * then use the lesser number.
484 */
485 if (uio->uio_resid < xfersize)
486 xfersize = uio->uio_resid;
487 if (bytesinfile < xfersize)
488 xfersize = bytesinfile;
489
490 if (lblktosize(fs, nextlbn) >= ip->i_size) {
491 /*
492 * Don't do readahead if this is the end of the file.
493 */
494 error = bread(vp, lbn, size, NOCRED, &bp);
495 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
496 /*
497 * Otherwise if we are allowed to cluster,
498 * grab as much as we can.
499 *
500 * XXX This may not be a win if we are not
501 * doing sequential access.
502 */
503 error = cluster_read(vp, ip->i_size, lbn,
504 size, NOCRED, blkoffset + uio->uio_resid, seqcount, &bp);
505 } else if (seqcount > 1) {
506 /*
507 * If we are NOT allowed to cluster, then
508 * if we appear to be acting sequentially,
509 * fire off a request for a readahead
510 * as well as a read. Note that the 4th and 5th
511 * arguments point to arrays of the size specified in
512 * the 6th argument.
513 */
514 int nextsize = blksize(fs, ip, nextlbn);
515 error = breadn(vp, lbn,
516 size, &nextlbn, &nextsize, 1, NOCRED, &bp);
517 } else {
518 /*
519 * Failing all of the above, just read what the
520 * user asked for. Interestingly, the same as
521 * the first option above.
522 */
523 error = bread(vp, lbn, size, NOCRED, &bp);
524 }
525 if (error) {
526 brelse(bp);
527 bp = NULL;
528 break;
529 }
530
531 /*
532 * If IO_DIRECT then set B_DIRECT for the buffer. This
533 * will cause us to attempt to release the buffer later on
534 * and will cause the buffer cache to attempt to free the
535 * underlying pages.
536 */
537 if (ioflag & IO_DIRECT)
538 bp->b_flags |= B_DIRECT;
539
540 /*
541 * We should only get non-zero b_resid when an I/O error
542 * has occurred, which should cause us to break above.
543 * However, if the short read did not cause an error,
544 * then we want to ensure that we do not uiomove bad
545 * or uninitialized data.
546 */
547 size -= bp->b_resid;
548 if (size < xfersize) {
549 if (size == 0)
550 break;
551 xfersize = size;
552 }
553
554 error = uiomove((char *)bp->b_data + blkoffset,
555 (int)xfersize, uio);
556 if (error)
557 break;
558
559 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
560 (LIST_FIRST(&bp->b_dep) == NULL)) {
561 /*
562 * If there are no dependencies, and it's VMIO,
563 * then we don't need the buf, mark it available
564 * for freeing. The VM has the data.
565 */
566 bp->b_flags |= B_RELBUF;
567 brelse(bp);
568 } else {
569 /*
570 * Otherwise let whoever
571 * made the request take care of
572 * freeing it. We just queue
573 * it onto another list.
574 */
575 bqrelse(bp);
576 }
577 }
578
579 /*
580 * This can only happen in the case of an error
581 * because the loop above resets bp to NULL on each iteration
582 * and on normal completion has not set a new value into it.
583 * so it must have come from a 'break' statement
584 */
585 if (bp != NULL) {
586 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
587 (LIST_FIRST(&bp->b_dep) == NULL)) {
588 bp->b_flags |= B_RELBUF;
589 brelse(bp);
590 } else {
591 bqrelse(bp);
592 }
593 }
594
595 if ((error == 0 || uio->uio_resid != orig_resid) &&
596 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
597 VI_LOCK(vp);
598 ip->i_flag |= IN_ACCESS;
599 VI_UNLOCK(vp);
600 }
601 return (error);
602 }
603
604 /*
605 * Vnode op for writing.
606 */
607 static int
608 ffs_write(ap)
609 struct vop_write_args /* {
610 struct vnode *a_vp;
611 struct uio *a_uio;
612 int a_ioflag;
613 struct ucred *a_cred;
614 } */ *ap;
615 {
616 struct vnode *vp;
617 struct uio *uio;
618 struct inode *ip;
619 struct fs *fs;
620 struct buf *bp;
621 struct thread *td;
622 ufs_lbn_t lbn;
623 off_t osize;
624 int seqcount;
625 int blkoffset, error, flags, ioflag, resid, size, xfersize;
626
627 vp = ap->a_vp;
628 uio = ap->a_uio;
629 ioflag = ap->a_ioflag;
630 if (ap->a_ioflag & IO_EXT)
631 #ifdef notyet
632 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
633 #else
634 panic("ffs_write+IO_EXT");
635 #endif
636
637 seqcount = ap->a_ioflag >> IO_SEQSHIFT;
638 ip = VTOI(vp);
639
640 #ifdef DIAGNOSTIC
641 if (uio->uio_rw != UIO_WRITE)
642 panic("ffs_write: mode");
643 #endif
644
645 switch (vp->v_type) {
646 case VREG:
647 if (ioflag & IO_APPEND)
648 uio->uio_offset = ip->i_size;
649 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
650 return (EPERM);
651 /* FALLTHROUGH */
652 case VLNK:
653 break;
654 case VDIR:
655 panic("ffs_write: dir write");
656 break;
657 default:
658 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
659 (int)uio->uio_offset,
660 (int)uio->uio_resid
661 );
662 }
663
664 KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0"));
665 KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0"));
666 fs = ip->i_fs;
667 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
668 return (EFBIG);
669 /*
670 * Maybe this should be above the vnode op call, but so long as
671 * file servers have no limits, I don't think it matters.
672 */
673 td = uio->uio_td;
674 if (vp->v_type == VREG && td != NULL) {
675 PROC_LOCK(td->td_proc);
676 if (uio->uio_offset + uio->uio_resid >
677 lim_cur(td->td_proc, RLIMIT_FSIZE)) {
678 psignal(td->td_proc, SIGXFSZ);
679 PROC_UNLOCK(td->td_proc);
680 return (EFBIG);
681 }
682 PROC_UNLOCK(td->td_proc);
683 }
684
685 resid = uio->uio_resid;
686 osize = ip->i_size;
687 if (seqcount > BA_SEQMAX)
688 flags = BA_SEQMAX << BA_SEQSHIFT;
689 else
690 flags = seqcount << BA_SEQSHIFT;
691 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
692 flags |= IO_SYNC;
693
694 for (error = 0; uio->uio_resid > 0;) {
695 lbn = lblkno(fs, uio->uio_offset);
696 blkoffset = blkoff(fs, uio->uio_offset);
697 xfersize = fs->fs_bsize - blkoffset;
698 if (uio->uio_resid < xfersize)
699 xfersize = uio->uio_resid;
700 if (uio->uio_offset + xfersize > ip->i_size)
701 vnode_pager_setsize(vp, uio->uio_offset + xfersize);
702
703 /*
704 * We must perform a read-before-write if the transfer size
705 * does not cover the entire buffer.
706 */
707 if (fs->fs_bsize > xfersize)
708 flags |= BA_CLRBUF;
709 else
710 flags &= ~BA_CLRBUF;
711 /* XXX is uio->uio_offset the right thing here? */
712 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
713 ap->a_cred, flags, &bp);
714 if (error != 0)
715 break;
716 /*
717 * If the buffer is not valid we have to clear out any
718 * garbage data from the pages instantiated for the buffer.
719 * If we do not, a failed uiomove() during a write can leave
720 * the prior contents of the pages exposed to a userland
721 * mmap(). XXX deal with uiomove() errors a better way.
722 */
723 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
724 vfs_bio_clrbuf(bp);
725 if (ioflag & IO_DIRECT)
726 bp->b_flags |= B_DIRECT;
727 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
728 bp->b_flags |= B_NOCACHE;
729
730 if (uio->uio_offset + xfersize > ip->i_size) {
731 ip->i_size = uio->uio_offset + xfersize;
732 DIP_SET(ip, i_size, ip->i_size);
733 }
734
735 size = blksize(fs, ip, lbn) - bp->b_resid;
736 if (size < xfersize)
737 xfersize = size;
738
739 error =
740 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
741 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
742 (LIST_FIRST(&bp->b_dep) == NULL)) {
743 bp->b_flags |= B_RELBUF;
744 }
745
746 /*
747 * If IO_SYNC each buffer is written synchronously. Otherwise
748 * if we have a severe page deficiency write the buffer
749 * asynchronously. Otherwise try to cluster, and if that
750 * doesn't do it then either do an async write (if O_DIRECT),
751 * or a delayed write (if not).
752 */
753 if (ioflag & IO_SYNC) {
754 (void)bwrite(bp);
755 } else if (vm_page_count_severe() ||
756 buf_dirty_count_severe() ||
757 (ioflag & IO_ASYNC)) {
758 bp->b_flags |= B_CLUSTEROK;
759 bawrite(bp);
760 } else if (xfersize + blkoffset == fs->fs_bsize) {
761 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
762 bp->b_flags |= B_CLUSTEROK;
763 cluster_write(vp, bp, ip->i_size, seqcount);
764 } else {
765 bawrite(bp);
766 }
767 } else if (ioflag & IO_DIRECT) {
768 bp->b_flags |= B_CLUSTEROK;
769 bawrite(bp);
770 } else {
771 bp->b_flags |= B_CLUSTEROK;
772 bdwrite(bp);
773 }
774 if (error || xfersize == 0)
775 break;
776 ip->i_flag |= IN_CHANGE | IN_UPDATE;
777 }
778 /*
779 * If we successfully wrote any data, and we are not the superuser
780 * we clear the setuid and setgid bits as a precaution against
781 * tampering.
782 */
783 if (resid > uio->uio_resid && ap->a_cred &&
784 suser_cred(ap->a_cred, SUSER_ALLOWJAIL)) {
785 ip->i_mode &= ~(ISUID | ISGID);
786 DIP_SET(ip, i_mode, ip->i_mode);
787 }
788 if (error) {
789 if (ioflag & IO_UNIT) {
790 (void)ffs_truncate(vp, osize,
791 IO_NORMAL | (ioflag & IO_SYNC),
792 ap->a_cred, uio->uio_td);
793 uio->uio_offset -= resid - uio->uio_resid;
794 uio->uio_resid = resid;
795 }
796 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
797 error = ffs_update(vp, 1);
798 return (error);
799 }
800
801 /*
802 * get page routine
803 */
804 static int
805 ffs_getpages(ap)
806 struct vop_getpages_args *ap;
807 {
808 int i;
809 vm_page_t mreq;
810 int pcount;
811
812 pcount = round_page(ap->a_count) / PAGE_SIZE;
813 mreq = ap->a_m[ap->a_reqpage];
814
815 /*
816 * if ANY DEV_BSIZE blocks are valid on a large filesystem block,
817 * then the entire page is valid. Since the page may be mapped,
818 * user programs might reference data beyond the actual end of file
819 * occuring within the page. We have to zero that data.
820 */
821 VM_OBJECT_LOCK(mreq->object);
822 if (mreq->valid) {
823 if (mreq->valid != VM_PAGE_BITS_ALL)
824 vm_page_zero_invalid(mreq, TRUE);
825 vm_page_lock_queues();
826 for (i = 0; i < pcount; i++) {
827 if (i != ap->a_reqpage) {
828 vm_page_free(ap->a_m[i]);
829 }
830 }
831 vm_page_unlock_queues();
832 VM_OBJECT_UNLOCK(mreq->object);
833 return VM_PAGER_OK;
834 }
835 VM_OBJECT_UNLOCK(mreq->object);
836
837 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
838 ap->a_count,
839 ap->a_reqpage);
840 }
841
842
843 /*
844 * Extended attribute area reading.
845 */
846 static int
847 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
848 {
849 struct inode *ip;
850 struct ufs2_dinode *dp;
851 struct fs *fs;
852 struct buf *bp;
853 ufs_lbn_t lbn, nextlbn;
854 off_t bytesinfile;
855 long size, xfersize, blkoffset;
856 int error, orig_resid;
857
858 ip = VTOI(vp);
859 fs = ip->i_fs;
860 dp = ip->i_din2;
861
862 #ifdef DIAGNOSTIC
863 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
864 panic("ffs_extread: mode");
865
866 #endif
867 orig_resid = uio->uio_resid;
868 KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0"));
869 if (orig_resid == 0)
870 return (0);
871 KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0"));
872
873 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
874 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
875 break;
876 lbn = lblkno(fs, uio->uio_offset);
877 nextlbn = lbn + 1;
878
879 /*
880 * size of buffer. The buffer representing the
881 * end of the file is rounded up to the size of
882 * the block type ( fragment or full block,
883 * depending ).
884 */
885 size = sblksize(fs, dp->di_extsize, lbn);
886 blkoffset = blkoff(fs, uio->uio_offset);
887
888 /*
889 * The amount we want to transfer in this iteration is
890 * one FS block less the amount of the data before
891 * our startpoint (duh!)
892 */
893 xfersize = fs->fs_bsize - blkoffset;
894
895 /*
896 * But if we actually want less than the block,
897 * or the file doesn't have a whole block more of data,
898 * then use the lesser number.
899 */
900 if (uio->uio_resid < xfersize)
901 xfersize = uio->uio_resid;
902 if (bytesinfile < xfersize)
903 xfersize = bytesinfile;
904
905 if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
906 /*
907 * Don't do readahead if this is the end of the info.
908 */
909 error = bread(vp, -1 - lbn, size, NOCRED, &bp);
910 } else {
911 /*
912 * If we have a second block, then
913 * fire off a request for a readahead
914 * as well as a read. Note that the 4th and 5th
915 * arguments point to arrays of the size specified in
916 * the 6th argument.
917 */
918 int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
919
920 nextlbn = -1 - nextlbn;
921 error = breadn(vp, -1 - lbn,
922 size, &nextlbn, &nextsize, 1, NOCRED, &bp);
923 }
924 if (error) {
925 brelse(bp);
926 bp = NULL;
927 break;
928 }
929
930 /*
931 * If IO_DIRECT then set B_DIRECT for the buffer. This
932 * will cause us to attempt to release the buffer later on
933 * and will cause the buffer cache to attempt to free the
934 * underlying pages.
935 */
936 if (ioflag & IO_DIRECT)
937 bp->b_flags |= B_DIRECT;
938
939 /*
940 * We should only get non-zero b_resid when an I/O error
941 * has occurred, which should cause us to break above.
942 * However, if the short read did not cause an error,
943 * then we want to ensure that we do not uiomove bad
944 * or uninitialized data.
945 */
946 size -= bp->b_resid;
947 if (size < xfersize) {
948 if (size == 0)
949 break;
950 xfersize = size;
951 }
952
953 error = uiomove((char *)bp->b_data + blkoffset,
954 (int)xfersize, uio);
955 if (error)
956 break;
957
958 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
959 (LIST_FIRST(&bp->b_dep) == NULL)) {
960 /*
961 * If there are no dependencies, and it's VMIO,
962 * then we don't need the buf, mark it available
963 * for freeing. The VM has the data.
964 */
965 bp->b_flags |= B_RELBUF;
966 brelse(bp);
967 } else {
968 /*
969 * Otherwise let whoever
970 * made the request take care of
971 * freeing it. We just queue
972 * it onto another list.
973 */
974 bqrelse(bp);
975 }
976 }
977
978 /*
979 * This can only happen in the case of an error
980 * because the loop above resets bp to NULL on each iteration
981 * and on normal completion has not set a new value into it.
982 * so it must have come from a 'break' statement
983 */
984 if (bp != NULL) {
985 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
986 (LIST_FIRST(&bp->b_dep) == NULL)) {
987 bp->b_flags |= B_RELBUF;
988 brelse(bp);
989 } else {
990 bqrelse(bp);
991 }
992 }
993
994 if ((error == 0 || uio->uio_resid != orig_resid) &&
995 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
996 VI_LOCK(vp);
997 ip->i_flag |= IN_ACCESS;
998 VI_UNLOCK(vp);
999 }
1000 return (error);
1001 }
1002
1003 /*
1004 * Extended attribute area writing.
1005 */
1006 static int
1007 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
1008 {
1009 struct inode *ip;
1010 struct ufs2_dinode *dp;
1011 struct fs *fs;
1012 struct buf *bp;
1013 ufs_lbn_t lbn;
1014 off_t osize;
1015 int blkoffset, error, flags, resid, size, xfersize;
1016
1017 ip = VTOI(vp);
1018 fs = ip->i_fs;
1019 dp = ip->i_din2;
1020
1021 #ifdef DIAGNOSTIC
1022 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
1023 panic("ffs_extwrite: mode");
1024 #endif
1025
1026 if (ioflag & IO_APPEND)
1027 uio->uio_offset = dp->di_extsize;
1028 KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0"));
1029 KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0"));
1030 if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize)
1031 return (EFBIG);
1032
1033 resid = uio->uio_resid;
1034 osize = dp->di_extsize;
1035 flags = IO_EXT;
1036 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
1037 flags |= IO_SYNC;
1038
1039 for (error = 0; uio->uio_resid > 0;) {
1040 lbn = lblkno(fs, uio->uio_offset);
1041 blkoffset = blkoff(fs, uio->uio_offset);
1042 xfersize = fs->fs_bsize - blkoffset;
1043 if (uio->uio_resid < xfersize)
1044 xfersize = uio->uio_resid;
1045
1046 /*
1047 * We must perform a read-before-write if the transfer size
1048 * does not cover the entire buffer.
1049 */
1050 if (fs->fs_bsize > xfersize)
1051 flags |= BA_CLRBUF;
1052 else
1053 flags &= ~BA_CLRBUF;
1054 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
1055 ucred, flags, &bp);
1056 if (error != 0)
1057 break;
1058 /*
1059 * If the buffer is not valid we have to clear out any
1060 * garbage data from the pages instantiated for the buffer.
1061 * If we do not, a failed uiomove() during a write can leave
1062 * the prior contents of the pages exposed to a userland
1063 * mmap(). XXX deal with uiomove() errors a better way.
1064 */
1065 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
1066 vfs_bio_clrbuf(bp);
1067 if (ioflag & IO_DIRECT)
1068 bp->b_flags |= B_DIRECT;
1069
1070 if (uio->uio_offset + xfersize > dp->di_extsize)
1071 dp->di_extsize = uio->uio_offset + xfersize;
1072
1073 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
1074 if (size < xfersize)
1075 xfersize = size;
1076
1077 error =
1078 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
1079 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
1080 (LIST_FIRST(&bp->b_dep) == NULL)) {
1081 bp->b_flags |= B_RELBUF;
1082 }
1083
1084 /*
1085 * If IO_SYNC each buffer is written synchronously. Otherwise
1086 * if we have a severe page deficiency write the buffer
1087 * asynchronously. Otherwise try to cluster, and if that
1088 * doesn't do it then either do an async write (if O_DIRECT),
1089 * or a delayed write (if not).
1090 */
1091 if (ioflag & IO_SYNC) {
1092 (void)bwrite(bp);
1093 } else if (vm_page_count_severe() ||
1094 buf_dirty_count_severe() ||
1095 xfersize + blkoffset == fs->fs_bsize ||
1096 (ioflag & (IO_ASYNC | IO_DIRECT)))
1097 bawrite(bp);
1098 else
1099 bdwrite(bp);
1100 if (error || xfersize == 0)
1101 break;
1102 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1103 }
1104 /*
1105 * If we successfully wrote any data, and we are not the superuser
1106 * we clear the setuid and setgid bits as a precaution against
1107 * tampering.
1108 */
1109 if (resid > uio->uio_resid && ucred &&
1110 suser_cred(ucred, SUSER_ALLOWJAIL)) {
1111 ip->i_mode &= ~(ISUID | ISGID);
1112 dp->di_mode = ip->i_mode;
1113 }
1114 if (error) {
1115 if (ioflag & IO_UNIT) {
1116 (void)ffs_truncate(vp, osize,
1117 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td);
1118 uio->uio_offset -= resid - uio->uio_resid;
1119 uio->uio_resid = resid;
1120 }
1121 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
1122 error = ffs_update(vp, 1);
1123 return (error);
1124 }
1125
1126
1127 /*
1128 * Vnode operating to retrieve a named extended attribute.
1129 *
1130 * Locate a particular EA (nspace:name) in the area (ptr:length), and return
1131 * the length of the EA, and possibly the pointer to the entry and to the data.
1132 */
1133 static int
1134 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac)
1135 {
1136 u_char *p, *pe, *pn, *p0;
1137 int eapad1, eapad2, ealength, ealen, nlen;
1138 uint32_t ul;
1139
1140 pe = ptr + length;
1141 nlen = strlen(name);
1142
1143 for (p = ptr; p < pe; p = pn) {
1144 p0 = p;
1145 bcopy(p, &ul, sizeof(ul));
1146 pn = p + ul;
1147 /* make sure this entry is complete */
1148 if (pn > pe)
1149 break;
1150 p += sizeof(uint32_t);
1151 if (*p != nspace)
1152 continue;
1153 p++;
1154 eapad2 = *p++;
1155 if (*p != nlen)
1156 continue;
1157 p++;
1158 if (bcmp(p, name, nlen))
1159 continue;
1160 ealength = sizeof(uint32_t) + 3 + nlen;
1161 eapad1 = 8 - (ealength % 8);
1162 if (eapad1 == 8)
1163 eapad1 = 0;
1164 ealength += eapad1;
1165 ealen = ul - ealength - eapad2;
1166 p += nlen + eapad1;
1167 if (eap != NULL)
1168 *eap = p0;
1169 if (eac != NULL)
1170 *eac = p;
1171 return (ealen);
1172 }
1173 return(-1);
1174 }
1175
1176 static int
1177 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra)
1178 {
1179 struct inode *ip;
1180 struct ufs2_dinode *dp;
1181 struct uio luio;
1182 struct iovec liovec;
1183 int easize, error;
1184 u_char *eae;
1185
1186 ip = VTOI(vp);
1187 dp = ip->i_din2;
1188 easize = dp->di_extsize;
1189
1190 eae = malloc(easize + extra, M_TEMP, M_WAITOK);
1191
1192 liovec.iov_base = eae;
1193 liovec.iov_len = easize;
1194 luio.uio_iov = &liovec;
1195 luio.uio_iovcnt = 1;
1196 luio.uio_offset = 0;
1197 luio.uio_resid = easize;
1198 luio.uio_segflg = UIO_SYSSPACE;
1199 luio.uio_rw = UIO_READ;
1200 luio.uio_td = td;
1201
1202 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
1203 if (error) {
1204 free(eae, M_TEMP);
1205 return(error);
1206 }
1207 *p = eae;
1208 return (0);
1209 }
1210
1211 static int
1212 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
1213 {
1214 struct inode *ip;
1215 struct ufs2_dinode *dp;
1216 int error;
1217
1218 ip = VTOI(vp);
1219
1220 if (ip->i_ea_area != NULL)
1221 return (EBUSY);
1222 dp = ip->i_din2;
1223 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0);
1224 if (error)
1225 return (error);
1226 ip->i_ea_len = dp->di_extsize;
1227 ip->i_ea_error = 0;
1228 return (0);
1229 }
1230
1231 /*
1232 * Vnode extattr transaction commit/abort
1233 */
1234 static int
1235 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
1236 {
1237 struct inode *ip;
1238 struct uio luio;
1239 struct iovec liovec;
1240 int error;
1241 struct ufs2_dinode *dp;
1242
1243 ip = VTOI(vp);
1244 if (ip->i_ea_area == NULL)
1245 return (EINVAL);
1246 dp = ip->i_din2;
1247 error = ip->i_ea_error;
1248 if (commit && error == 0) {
1249 if (cred == NOCRED)
1250 cred = vp->v_mount->mnt_cred;
1251 liovec.iov_base = ip->i_ea_area;
1252 liovec.iov_len = ip->i_ea_len;
1253 luio.uio_iov = &liovec;
1254 luio.uio_iovcnt = 1;
1255 luio.uio_offset = 0;
1256 luio.uio_resid = ip->i_ea_len;
1257 luio.uio_segflg = UIO_SYSSPACE;
1258 luio.uio_rw = UIO_WRITE;
1259 luio.uio_td = td;
1260 /* XXX: I'm not happy about truncating to zero size */
1261 if (ip->i_ea_len < dp->di_extsize)
1262 error = ffs_truncate(vp, 0, IO_EXT, cred, td);
1263 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
1264 }
1265 free(ip->i_ea_area, M_TEMP);
1266 ip->i_ea_area = NULL;
1267 ip->i_ea_len = 0;
1268 ip->i_ea_error = 0;
1269 return (error);
1270 }
1271
1272 /*
1273 * Vnode extattr strategy routine for fifos.
1274 *
1275 * We need to check for a read or write of the external attributes.
1276 * Otherwise we just fall through and do the usual thing.
1277 */
1278 static int
1279 ffsext_strategy(struct vop_strategy_args *ap)
1280 /*
1281 struct vop_strategy_args {
1282 struct vnodeop_desc *a_desc;
1283 struct vnode *a_vp;
1284 struct buf *a_bp;
1285 };
1286 */
1287 {
1288 struct vnode *vp;
1289 daddr_t lbn;
1290
1291 vp = ap->a_vp;
1292 lbn = ap->a_bp->b_lblkno;
1293 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC &&
1294 lbn < 0 && lbn >= -NXADDR)
1295 return (VOP_STRATEGY_APV(&ufs_vnodeops, ap));
1296 if (vp->v_type == VFIFO)
1297 return (VOP_STRATEGY_APV(&ufs_fifoops, ap));
1298 panic("spec nodes went here");
1299 }
1300
1301 /*
1302 * Vnode extattr transaction commit/abort
1303 */
1304 static int
1305 ffs_openextattr(struct vop_openextattr_args *ap)
1306 /*
1307 struct vop_openextattr_args {
1308 struct vnodeop_desc *a_desc;
1309 struct vnode *a_vp;
1310 IN struct ucred *a_cred;
1311 IN struct thread *a_td;
1312 };
1313 */
1314 {
1315 struct inode *ip;
1316 struct fs *fs;
1317
1318 ip = VTOI(ap->a_vp);
1319 fs = ip->i_fs;
1320
1321 if (ap->a_vp->v_type == VCHR)
1322 return (EOPNOTSUPP);
1323
1324 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
1325 }
1326
1327
1328 /*
1329 * Vnode extattr transaction commit/abort
1330 */
1331 static int
1332 ffs_closeextattr(struct vop_closeextattr_args *ap)
1333 /*
1334 struct vop_closeextattr_args {
1335 struct vnodeop_desc *a_desc;
1336 struct vnode *a_vp;
1337 int a_commit;
1338 IN struct ucred *a_cred;
1339 IN struct thread *a_td;
1340 };
1341 */
1342 {
1343 struct inode *ip;
1344 struct fs *fs;
1345
1346 ip = VTOI(ap->a_vp);
1347 fs = ip->i_fs;
1348
1349 if (ap->a_vp->v_type == VCHR)
1350 return (EOPNOTSUPP);
1351
1352 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td));
1353 }
1354
1355 /*
1356 * Vnode operation to remove a named attribute.
1357 */
1358 static int
1359 ffs_deleteextattr(struct vop_deleteextattr_args *ap)
1360 /*
1361 vop_deleteextattr {
1362 IN struct vnode *a_vp;
1363 IN int a_attrnamespace;
1364 IN const char *a_name;
1365 IN struct ucred *a_cred;
1366 IN struct thread *a_td;
1367 };
1368 */
1369 {
1370 struct inode *ip;
1371 struct fs *fs;
1372 uint32_t ealength, ul;
1373 int ealen, olen, eapad1, eapad2, error, i, easize;
1374 u_char *eae, *p;
1375 int stand_alone;
1376
1377 ip = VTOI(ap->a_vp);
1378 fs = ip->i_fs;
1379
1380 if (ap->a_vp->v_type == VCHR)
1381 return (EOPNOTSUPP);
1382
1383 if (strlen(ap->a_name) == 0)
1384 return (EINVAL);
1385
1386 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1387 ap->a_cred, ap->a_td, IWRITE);
1388 if (error) {
1389 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1390 ip->i_ea_error = error;
1391 return (error);
1392 }
1393
1394 if (ip->i_ea_area == NULL) {
1395 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1396 if (error)
1397 return (error);
1398 stand_alone = 1;
1399 } else {
1400 stand_alone = 0;
1401 }
1402
1403 ealength = eapad1 = ealen = eapad2 = 0;
1404
1405 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
1406 bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1407 easize = ip->i_ea_len;
1408
1409 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1410 &p, NULL);
1411 if (olen == -1) {
1412 /* delete but nonexistent */
1413 free(eae, M_TEMP);
1414 if (stand_alone)
1415 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1416 return(ENOATTR);
1417 }
1418 bcopy(p, &ul, sizeof ul);
1419 i = p - eae + ul;
1420 if (ul != ealength) {
1421 bcopy(p + ul, p + ealength, easize - i);
1422 easize += (ealength - ul);
1423 }
1424 if (easize > NXADDR * fs->fs_bsize) {
1425 free(eae, M_TEMP);
1426 if (stand_alone)
1427 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1428 else if (ip->i_ea_error == 0)
1429 ip->i_ea_error = ENOSPC;
1430 return(ENOSPC);
1431 }
1432 p = ip->i_ea_area;
1433 ip->i_ea_area = eae;
1434 ip->i_ea_len = easize;
1435 free(p, M_TEMP);
1436 if (stand_alone)
1437 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1438 return(error);
1439 }
1440
1441 /*
1442 * Vnode operation to retrieve a named extended attribute.
1443 */
1444 static int
1445 ffs_getextattr(struct vop_getextattr_args *ap)
1446 /*
1447 vop_getextattr {
1448 IN struct vnode *a_vp;
1449 IN int a_attrnamespace;
1450 IN const char *a_name;
1451 INOUT struct uio *a_uio;
1452 OUT size_t *a_size;
1453 IN struct ucred *a_cred;
1454 IN struct thread *a_td;
1455 };
1456 */
1457 {
1458 struct inode *ip;
1459 struct fs *fs;
1460 u_char *eae, *p;
1461 unsigned easize;
1462 int error, ealen, stand_alone;
1463
1464 ip = VTOI(ap->a_vp);
1465 fs = ip->i_fs;
1466
1467 if (ap->a_vp->v_type == VCHR)
1468 return (EOPNOTSUPP);
1469
1470 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1471 ap->a_cred, ap->a_td, IREAD);
1472 if (error)
1473 return (error);
1474
1475 if (ip->i_ea_area == NULL) {
1476 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1477 if (error)
1478 return (error);
1479 stand_alone = 1;
1480 } else {
1481 stand_alone = 0;
1482 }
1483 eae = ip->i_ea_area;
1484 easize = ip->i_ea_len;
1485
1486 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1487 NULL, &p);
1488 if (ealen >= 0) {
1489 error = 0;
1490 if (ap->a_size != NULL)
1491 *ap->a_size = ealen;
1492 else if (ap->a_uio != NULL)
1493 error = uiomove(p, ealen, ap->a_uio);
1494 } else
1495 error = ENOATTR;
1496 if (stand_alone)
1497 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1498 return(error);
1499 }
1500
1501 /*
1502 * Vnode operation to retrieve extended attributes on a vnode.
1503 */
1504 static int
1505 ffs_listextattr(struct vop_listextattr_args *ap)
1506 /*
1507 vop_listextattr {
1508 IN struct vnode *a_vp;
1509 IN int a_attrnamespace;
1510 INOUT struct uio *a_uio;
1511 OUT size_t *a_size;
1512 IN struct ucred *a_cred;
1513 IN struct thread *a_td;
1514 };
1515 */
1516 {
1517 struct inode *ip;
1518 struct fs *fs;
1519 u_char *eae, *p, *pe, *pn;
1520 unsigned easize;
1521 uint32_t ul;
1522 int error, ealen, stand_alone;
1523
1524 ip = VTOI(ap->a_vp);
1525 fs = ip->i_fs;
1526
1527 if (ap->a_vp->v_type == VCHR)
1528 return (EOPNOTSUPP);
1529
1530 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1531 ap->a_cred, ap->a_td, IREAD);
1532 if (error)
1533 return (error);
1534
1535 if (ip->i_ea_area == NULL) {
1536 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1537 if (error)
1538 return (error);
1539 stand_alone = 1;
1540 } else {
1541 stand_alone = 0;
1542 }
1543 eae = ip->i_ea_area;
1544 easize = ip->i_ea_len;
1545
1546 error = 0;
1547 if (ap->a_size != NULL)
1548 *ap->a_size = 0;
1549 pe = eae + easize;
1550 for(p = eae; error == 0 && p < pe; p = pn) {
1551 bcopy(p, &ul, sizeof(ul));
1552 pn = p + ul;
1553 if (pn > pe)
1554 break;
1555 p += sizeof(ul);
1556 if (*p++ != ap->a_attrnamespace)
1557 continue;
1558 p++; /* pad2 */
1559 ealen = *p;
1560 if (ap->a_size != NULL) {
1561 *ap->a_size += ealen + 1;
1562 } else if (ap->a_uio != NULL) {
1563 error = uiomove(p, ealen + 1, ap->a_uio);
1564 }
1565 }
1566 if (stand_alone)
1567 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1568 return(error);
1569 }
1570
1571 /*
1572 * Vnode operation to set a named attribute.
1573 */
1574 static int
1575 ffs_setextattr(struct vop_setextattr_args *ap)
1576 /*
1577 vop_setextattr {
1578 IN struct vnode *a_vp;
1579 IN int a_attrnamespace;
1580 IN const char *a_name;
1581 INOUT struct uio *a_uio;
1582 IN struct ucred *a_cred;
1583 IN struct thread *a_td;
1584 };
1585 */
1586 {
1587 struct inode *ip;
1588 struct fs *fs;
1589 uint32_t ealength, ul;
1590 int ealen, olen, eapad1, eapad2, error, i, easize;
1591 u_char *eae, *p;
1592 int stand_alone;
1593
1594 ip = VTOI(ap->a_vp);
1595 fs = ip->i_fs;
1596
1597 if (ap->a_vp->v_type == VCHR)
1598 return (EOPNOTSUPP);
1599
1600 if (strlen(ap->a_name) == 0)
1601 return (EINVAL);
1602
1603 /* XXX Now unsupported API to delete EAs using NULL uio. */
1604 if (ap->a_uio == NULL)
1605 return (EOPNOTSUPP);
1606
1607 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1608 ap->a_cred, ap->a_td, IWRITE);
1609 if (error) {
1610 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1611 ip->i_ea_error = error;
1612 return (error);
1613 }
1614
1615 if (ip->i_ea_area == NULL) {
1616 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1617 if (error)
1618 return (error);
1619 stand_alone = 1;
1620 } else {
1621 stand_alone = 0;
1622 }
1623
1624 ealen = ap->a_uio->uio_resid;
1625 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
1626 eapad1 = 8 - (ealength % 8);
1627 if (eapad1 == 8)
1628 eapad1 = 0;
1629 eapad2 = 8 - (ealen % 8);
1630 if (eapad2 == 8)
1631 eapad2 = 0;
1632 ealength += eapad1 + ealen + eapad2;
1633
1634 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
1635 bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1636 easize = ip->i_ea_len;
1637
1638 olen = ffs_findextattr(eae, easize,
1639 ap->a_attrnamespace, ap->a_name, &p, NULL);
1640 if (olen == -1) {
1641 /* new, append at end */
1642 p = eae + easize;
1643 easize += ealength;
1644 } else {
1645 bcopy(p, &ul, sizeof ul);
1646 i = p - eae + ul;
1647 if (ul != ealength) {
1648 bcopy(p + ul, p + ealength, easize - i);
1649 easize += (ealength - ul);
1650 }
1651 }
1652 if (easize > NXADDR * fs->fs_bsize) {
1653 free(eae, M_TEMP);
1654 if (stand_alone)
1655 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1656 else if (ip->i_ea_error == 0)
1657 ip->i_ea_error = ENOSPC;
1658 return(ENOSPC);
1659 }
1660 bcopy(&ealength, p, sizeof(ealength));
1661 p += sizeof(ealength);
1662 *p++ = ap->a_attrnamespace;
1663 *p++ = eapad2;
1664 *p++ = strlen(ap->a_name);
1665 strcpy(p, ap->a_name);
1666 p += strlen(ap->a_name);
1667 bzero(p, eapad1);
1668 p += eapad1;
1669 error = uiomove(p, ealen, ap->a_uio);
1670 if (error) {
1671 free(eae, M_TEMP);
1672 if (stand_alone)
1673 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1674 else if (ip->i_ea_error == 0)
1675 ip->i_ea_error = error;
1676 return(error);
1677 }
1678 p += ealen;
1679 bzero(p, eapad2);
1680
1681 p = ip->i_ea_area;
1682 ip->i_ea_area = eae;
1683 ip->i_ea_len = easize;
1684 free(p, M_TEMP);
1685 if (stand_alone)
1686 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1687 return(error);
1688 }
Cache object: c8453cbdd2519c2762cd92c5f815f8c0
|