1 /*-
2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
60 * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
61 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95
62 */
63
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD: releng/5.4/sys/ufs/ffs/ffs_vnops.c 145335 2005-04-20 19:11:07Z cvs2svn $");
66
67 #include <sys/param.h>
68 #include <sys/bio.h>
69 #include <sys/systm.h>
70 #include <sys/buf.h>
71 #include <sys/conf.h>
72 #include <sys/extattr.h>
73 #include <sys/kernel.h>
74 #include <sys/limits.h>
75 #include <sys/malloc.h>
76 #include <sys/mount.h>
77 #include <sys/proc.h>
78 #include <sys/resourcevar.h>
79 #include <sys/signalvar.h>
80 #include <sys/stat.h>
81 #include <sys/vmmeter.h>
82 #include <sys/vnode.h>
83
84 #include <vm/vm.h>
85 #include <vm/vm_extern.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_page.h>
88 #include <vm/vm_pager.h>
89 #include <vm/vnode_pager.h>
90
91 #include <ufs/ufs/extattr.h>
92 #include <ufs/ufs/quota.h>
93 #include <ufs/ufs/inode.h>
94 #include <ufs/ufs/ufs_extern.h>
95 #include <ufs/ufs/ufsmount.h>
96
97 #include <ufs/ffs/fs.h>
98 #include <ufs/ffs/ffs_extern.h>
99 #include "opt_directio.h"
100
101 #ifdef DIRECTIO
102 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
103 #endif
104 static int ffs_fsync(struct vop_fsync_args *);
105 static int ffs_getpages(struct vop_getpages_args *);
106 static int ffs_read(struct vop_read_args *);
107 static int ffs_write(struct vop_write_args *);
108 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
109 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
110 struct ucred *cred);
111 static int ffsext_strategy(struct vop_strategy_args *);
112 static int ffs_closeextattr(struct vop_closeextattr_args *);
113 static int ffs_deleteextattr(struct vop_deleteextattr_args *);
114 static int ffs_getextattr(struct vop_getextattr_args *);
115 static int ffs_listextattr(struct vop_listextattr_args *);
116 static int ffs_openextattr(struct vop_openextattr_args *);
117 static int ffs_setextattr(struct vop_setextattr_args *);
118
119
120 /* Global vfs data structures for ufs. */
121 vop_t **ffs_vnodeop_p;
122 static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
123 { &vop_default_desc, (vop_t *) ufs_vnoperate },
124 { &vop_fsync_desc, (vop_t *) ffs_fsync },
125 { &vop_getpages_desc, (vop_t *) ffs_getpages },
126 { &vop_read_desc, (vop_t *) ffs_read },
127 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks },
128 { &vop_write_desc, (vop_t *) ffs_write },
129 { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr },
130 { &vop_deleteextattr_desc, (vop_t *) ffs_deleteextattr },
131 { &vop_getextattr_desc, (vop_t *) ffs_getextattr },
132 { &vop_listextattr_desc, (vop_t *) ffs_listextattr },
133 { &vop_openextattr_desc, (vop_t *) ffs_openextattr },
134 { &vop_setextattr_desc, (vop_t *) ffs_setextattr },
135 { NULL, NULL }
136 };
137 static struct vnodeopv_desc ffs_vnodeop_opv_desc =
138 { &ffs_vnodeop_p, ffs_vnodeop_entries };
139
140 vop_t **ffs_specop_p;
141 static struct vnodeopv_entry_desc ffs_specop_entries[] = {
142 { &vop_default_desc, (vop_t *) ufs_vnoperatespec },
143 { &vop_fsync_desc, (vop_t *) ffs_fsync },
144 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks },
145 { &vop_strategy_desc, (vop_t *) ffsext_strategy },
146 { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr },
147 { &vop_deleteextattr_desc, (vop_t *) ffs_deleteextattr },
148 { &vop_getextattr_desc, (vop_t *) ffs_getextattr },
149 { &vop_listextattr_desc, (vop_t *) ffs_listextattr },
150 { &vop_openextattr_desc, (vop_t *) ffs_openextattr },
151 { &vop_setextattr_desc, (vop_t *) ffs_setextattr },
152 { NULL, NULL }
153 };
154 static struct vnodeopv_desc ffs_specop_opv_desc =
155 { &ffs_specop_p, ffs_specop_entries };
156
157 vop_t **ffs_fifoop_p;
158 static struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
159 { &vop_default_desc, (vop_t *) ufs_vnoperatefifo },
160 { &vop_fsync_desc, (vop_t *) ffs_fsync },
161 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks },
162 { &vop_strategy_desc, (vop_t *) ffsext_strategy },
163 { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr },
164 { &vop_deleteextattr_desc, (vop_t *) ffs_deleteextattr },
165 { &vop_getextattr_desc, (vop_t *) ffs_getextattr },
166 { &vop_listextattr_desc, (vop_t *) ffs_listextattr },
167 { &vop_openextattr_desc, (vop_t *) ffs_openextattr },
168 { &vop_setextattr_desc, (vop_t *) ffs_setextattr },
169 { NULL, NULL }
170 };
171 static struct vnodeopv_desc ffs_fifoop_opv_desc =
172 { &ffs_fifoop_p, ffs_fifoop_entries };
173
174 VNODEOP_SET(ffs_vnodeop_opv_desc);
175 VNODEOP_SET(ffs_specop_opv_desc);
176 VNODEOP_SET(ffs_fifoop_opv_desc);
177
178 /*
179 * Synch an open file.
180 */
181 /* ARGSUSED */
182 static int
183 ffs_fsync(ap)
184 struct vop_fsync_args /* {
185 struct vnode *a_vp;
186 struct ucred *a_cred;
187 int a_waitfor;
188 struct thread *a_td;
189 } */ *ap;
190 {
191 struct vnode *vp = ap->a_vp;
192 struct inode *ip = VTOI(vp);
193 struct buf *bp;
194 struct buf *nbp;
195 int s, error, wait, passes, skipmeta;
196 ufs_lbn_t lbn;
197
198 wait = (ap->a_waitfor == MNT_WAIT);
199 if (vn_isdisk(vp, NULL)) {
200 lbn = INT_MAX;
201 if (vp->v_rdev->si_mountpoint != NULL &&
202 (vp->v_rdev->si_mountpoint->mnt_flag & MNT_SOFTDEP))
203 softdep_fsync_mountdev(vp);
204 } else {
205 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
206 }
207
208 /*
209 * Flush all dirty buffers associated with a vnode.
210 */
211 passes = NIADDR + 1;
212 skipmeta = 0;
213 if (wait)
214 skipmeta = 1;
215 s = splbio();
216 VI_LOCK(vp);
217 loop:
218 TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs)
219 bp->b_vflags &= ~BV_SCANNED;
220 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
221 nbp = TAILQ_NEXT(bp, b_vnbufs);
222 /*
223 * Reasons to skip this buffer: it has already been considered
224 * on this pass, this pass is the first time through on a
225 * synchronous flush request and the buffer being considered
226 * is metadata, the buffer has dependencies that will cause
227 * it to be redirtied and it has not already been deferred,
228 * or it is already being written.
229 */
230 if ((bp->b_vflags & BV_SCANNED) != 0)
231 continue;
232 bp->b_vflags |= BV_SCANNED;
233 if ((skipmeta == 1 && bp->b_lblkno < 0))
234 continue;
235 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
236 continue;
237 if (!wait && LIST_FIRST(&bp->b_dep) != NULL &&
238 (bp->b_flags & B_DEFERRED) == 0 &&
239 buf_countdeps(bp, 0)) {
240 bp->b_flags |= B_DEFERRED;
241 BUF_UNLOCK(bp);
242 continue;
243 }
244 VI_UNLOCK(vp);
245 if ((bp->b_flags & B_DELWRI) == 0)
246 panic("ffs_fsync: not dirty");
247 if (vp != bp->b_vp)
248 panic("ffs_fsync: vp != vp->b_vp");
249 /*
250 * If this is a synchronous flush request, or it is not a
251 * file or device, start the write on this buffer immediatly.
252 */
253 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) {
254
255 /*
256 * On our final pass through, do all I/O synchronously
257 * so that we can find out if our flush is failing
258 * because of write errors.
259 */
260 if (passes > 0 || !wait) {
261 if ((bp->b_flags & B_CLUSTEROK) && !wait) {
262 (void) vfs_bio_awrite(bp);
263 } else {
264 bremfree(bp);
265 splx(s);
266 (void) bawrite(bp);
267 s = splbio();
268 }
269 } else {
270 bremfree(bp);
271 splx(s);
272 if ((error = bwrite(bp)) != 0)
273 return (error);
274 s = splbio();
275 }
276 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) {
277 /*
278 * If the buffer is for data that has been truncated
279 * off the file, then throw it away.
280 */
281 bremfree(bp);
282 bp->b_flags |= B_INVAL | B_NOCACHE;
283 splx(s);
284 brelse(bp);
285 s = splbio();
286 } else
287 vfs_bio_awrite(bp);
288
289 /*
290 * Since we may have slept during the I/O, we need
291 * to start from a known point.
292 */
293 VI_LOCK(vp);
294 nbp = TAILQ_FIRST(&vp->v_dirtyblkhd);
295 }
296 /*
297 * If we were asked to do this synchronously, then go back for
298 * another pass, this time doing the metadata.
299 */
300 if (skipmeta) {
301 skipmeta = 0;
302 goto loop;
303 }
304
305 if (wait) {
306 while (vp->v_numoutput) {
307 vp->v_iflag |= VI_BWAIT;
308 msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp),
309 PRIBIO + 4, "ffsfsn", 0);
310 }
311 VI_UNLOCK(vp);
312
313 /*
314 * Ensure that any filesystem metatdata associated
315 * with the vnode has been written.
316 */
317 splx(s);
318 if ((error = softdep_sync_metadata(ap)) != 0)
319 return (error);
320 s = splbio();
321
322 VI_LOCK(vp);
323 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
324 /*
325 * Block devices associated with filesystems may
326 * have new I/O requests posted for them even if
327 * the vnode is locked, so no amount of trying will
328 * get them clean. Thus we give block devices a
329 * good effort, then just give up. For all other file
330 * types, go around and try again until it is clean.
331 */
332 if (passes > 0) {
333 passes -= 1;
334 goto loop;
335 }
336 #ifdef DIAGNOSTIC
337 if (!vn_isdisk(vp, NULL))
338 vprint("ffs_fsync: dirty", vp);
339 #endif
340 }
341 }
342 VI_UNLOCK(vp);
343 splx(s);
344 return (UFS_UPDATE(vp, wait));
345 }
346
347
348 /*
349 * Vnode op for reading.
350 */
351 /* ARGSUSED */
352 static int
353 ffs_read(ap)
354 struct vop_read_args /* {
355 struct vnode *a_vp;
356 struct uio *a_uio;
357 int a_ioflag;
358 struct ucred *a_cred;
359 } */ *ap;
360 {
361 struct vnode *vp;
362 struct inode *ip;
363 struct uio *uio;
364 struct fs *fs;
365 struct buf *bp;
366 ufs_lbn_t lbn, nextlbn;
367 off_t bytesinfile;
368 long size, xfersize, blkoffset;
369 int error, orig_resid;
370 int seqcount;
371 int ioflag;
372
373 vp = ap->a_vp;
374 uio = ap->a_uio;
375 ioflag = ap->a_ioflag;
376 if (ap->a_ioflag & IO_EXT)
377 #ifdef notyet
378 return (ffs_extread(vp, uio, ioflag));
379 #else
380 panic("ffs_read+IO_EXT");
381 #endif
382 #ifdef DIRECTIO
383 if ((ioflag & IO_DIRECT) != 0) {
384 int workdone;
385
386 error = ffs_rawread(vp, uio, &workdone);
387 if (error != 0 || workdone != 0)
388 return error;
389 }
390 #endif
391
392 GIANT_REQUIRED;
393
394 seqcount = ap->a_ioflag >> IO_SEQSHIFT;
395 ip = VTOI(vp);
396
397 #ifdef DIAGNOSTIC
398 if (uio->uio_rw != UIO_READ)
399 panic("ffs_read: mode");
400
401 if (vp->v_type == VLNK) {
402 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
403 panic("ffs_read: short symlink");
404 } else if (vp->v_type != VREG && vp->v_type != VDIR)
405 panic("ffs_read: type %d", vp->v_type);
406 #endif
407 orig_resid = uio->uio_resid;
408 KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0"));
409 if (orig_resid == 0)
410 return (0);
411 KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0"));
412 fs = ip->i_fs;
413 if (uio->uio_offset < ip->i_size &&
414 uio->uio_offset >= fs->fs_maxfilesize)
415 return (EOVERFLOW);
416
417 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
418 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
419 break;
420 lbn = lblkno(fs, uio->uio_offset);
421 nextlbn = lbn + 1;
422
423 /*
424 * size of buffer. The buffer representing the
425 * end of the file is rounded up to the size of
426 * the block type ( fragment or full block,
427 * depending ).
428 */
429 size = blksize(fs, ip, lbn);
430 blkoffset = blkoff(fs, uio->uio_offset);
431
432 /*
433 * The amount we want to transfer in this iteration is
434 * one FS block less the amount of the data before
435 * our startpoint (duh!)
436 */
437 xfersize = fs->fs_bsize - blkoffset;
438
439 /*
440 * But if we actually want less than the block,
441 * or the file doesn't have a whole block more of data,
442 * then use the lesser number.
443 */
444 if (uio->uio_resid < xfersize)
445 xfersize = uio->uio_resid;
446 if (bytesinfile < xfersize)
447 xfersize = bytesinfile;
448
449 if (lblktosize(fs, nextlbn) >= ip->i_size) {
450 /*
451 * Don't do readahead if this is the end of the file.
452 */
453 error = bread(vp, lbn, size, NOCRED, &bp);
454 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
455 /*
456 * Otherwise if we are allowed to cluster,
457 * grab as much as we can.
458 *
459 * XXX This may not be a win if we are not
460 * doing sequential access.
461 */
462 error = cluster_read(vp, ip->i_size, lbn,
463 size, NOCRED, uio->uio_resid, seqcount, &bp);
464 } else if (seqcount > 1) {
465 /*
466 * If we are NOT allowed to cluster, then
467 * if we appear to be acting sequentially,
468 * fire off a request for a readahead
469 * as well as a read. Note that the 4th and 5th
470 * arguments point to arrays of the size specified in
471 * the 6th argument.
472 */
473 int nextsize = blksize(fs, ip, nextlbn);
474 error = breadn(vp, lbn,
475 size, &nextlbn, &nextsize, 1, NOCRED, &bp);
476 } else {
477 /*
478 * Failing all of the above, just read what the
479 * user asked for. Interestingly, the same as
480 * the first option above.
481 */
482 error = bread(vp, lbn, size, NOCRED, &bp);
483 }
484 if (error) {
485 brelse(bp);
486 bp = NULL;
487 break;
488 }
489
490 /*
491 * If IO_DIRECT then set B_DIRECT for the buffer. This
492 * will cause us to attempt to release the buffer later on
493 * and will cause the buffer cache to attempt to free the
494 * underlying pages.
495 */
496 if (ioflag & IO_DIRECT)
497 bp->b_flags |= B_DIRECT;
498
499 /*
500 * We should only get non-zero b_resid when an I/O error
501 * has occurred, which should cause us to break above.
502 * However, if the short read did not cause an error,
503 * then we want to ensure that we do not uiomove bad
504 * or uninitialized data.
505 */
506 size -= bp->b_resid;
507 if (size < xfersize) {
508 if (size == 0)
509 break;
510 xfersize = size;
511 }
512
513 error = uiomove((char *)bp->b_data + blkoffset,
514 (int)xfersize, uio);
515 if (error)
516 break;
517
518 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
519 (LIST_FIRST(&bp->b_dep) == NULL)) {
520 /*
521 * If there are no dependencies, and it's VMIO,
522 * then we don't need the buf, mark it available
523 * for freeing. The VM has the data.
524 */
525 bp->b_flags |= B_RELBUF;
526 brelse(bp);
527 } else {
528 /*
529 * Otherwise let whoever
530 * made the request take care of
531 * freeing it. We just queue
532 * it onto another list.
533 */
534 bqrelse(bp);
535 }
536 }
537
538 /*
539 * This can only happen in the case of an error
540 * because the loop above resets bp to NULL on each iteration
541 * and on normal completion has not set a new value into it.
542 * so it must have come from a 'break' statement
543 */
544 if (bp != NULL) {
545 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
546 (LIST_FIRST(&bp->b_dep) == NULL)) {
547 bp->b_flags |= B_RELBUF;
548 brelse(bp);
549 } else {
550 bqrelse(bp);
551 }
552 }
553
554 if ((error == 0 || uio->uio_resid != orig_resid) &&
555 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
556 ip->i_flag |= IN_ACCESS;
557 return (error);
558 }
559
560 /*
561 * Vnode op for writing.
562 */
563 static int
564 ffs_write(ap)
565 struct vop_write_args /* {
566 struct vnode *a_vp;
567 struct uio *a_uio;
568 int a_ioflag;
569 struct ucred *a_cred;
570 } */ *ap;
571 {
572 struct vnode *vp;
573 struct uio *uio;
574 struct inode *ip;
575 struct fs *fs;
576 struct buf *bp;
577 struct thread *td;
578 ufs_lbn_t lbn;
579 off_t osize;
580 int seqcount;
581 int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
582
583 vp = ap->a_vp;
584 uio = ap->a_uio;
585 ioflag = ap->a_ioflag;
586 if (ap->a_ioflag & IO_EXT)
587 #ifdef notyet
588 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
589 #else
590 panic("ffs_write+IO_EXT");
591 #endif
592
593 GIANT_REQUIRED;
594
595 extended = 0;
596 seqcount = ap->a_ioflag >> IO_SEQSHIFT;
597 ip = VTOI(vp);
598
599 #ifdef DIAGNOSTIC
600 if (uio->uio_rw != UIO_WRITE)
601 panic("ffs_write: mode");
602 #endif
603
604 switch (vp->v_type) {
605 case VREG:
606 if (ioflag & IO_APPEND)
607 uio->uio_offset = ip->i_size;
608 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
609 return (EPERM);
610 /* FALLTHROUGH */
611 case VLNK:
612 break;
613 case VDIR:
614 panic("ffs_write: dir write");
615 break;
616 default:
617 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
618 (int)uio->uio_offset,
619 (int)uio->uio_resid
620 );
621 }
622
623 KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0"));
624 KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0"));
625 fs = ip->i_fs;
626 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
627 return (EFBIG);
628 /*
629 * Maybe this should be above the vnode op call, but so long as
630 * file servers have no limits, I don't think it matters.
631 */
632 td = uio->uio_td;
633 if (vp->v_type == VREG && td != NULL) {
634 PROC_LOCK(td->td_proc);
635 if (uio->uio_offset + uio->uio_resid >
636 lim_cur(td->td_proc, RLIMIT_FSIZE)) {
637 psignal(td->td_proc, SIGXFSZ);
638 PROC_UNLOCK(td->td_proc);
639 return (EFBIG);
640 }
641 PROC_UNLOCK(td->td_proc);
642 }
643
644 resid = uio->uio_resid;
645 osize = ip->i_size;
646 if (seqcount > BA_SEQMAX)
647 flags = BA_SEQMAX << BA_SEQSHIFT;
648 else
649 flags = seqcount << BA_SEQSHIFT;
650 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
651 flags |= IO_SYNC;
652
653 for (error = 0; uio->uio_resid > 0;) {
654 lbn = lblkno(fs, uio->uio_offset);
655 blkoffset = blkoff(fs, uio->uio_offset);
656 xfersize = fs->fs_bsize - blkoffset;
657 if (uio->uio_resid < xfersize)
658 xfersize = uio->uio_resid;
659 if (uio->uio_offset + xfersize > ip->i_size)
660 vnode_pager_setsize(vp, uio->uio_offset + xfersize);
661
662 /*
663 * We must perform a read-before-write if the transfer size
664 * does not cover the entire buffer.
665 */
666 if (fs->fs_bsize > xfersize)
667 flags |= BA_CLRBUF;
668 else
669 flags &= ~BA_CLRBUF;
670 /* XXX is uio->uio_offset the right thing here? */
671 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
672 ap->a_cred, flags, &bp);
673 if (error != 0)
674 break;
675 /*
676 * If the buffer is not valid we have to clear out any
677 * garbage data from the pages instantiated for the buffer.
678 * If we do not, a failed uiomove() during a write can leave
679 * the prior contents of the pages exposed to a userland
680 * mmap(). XXX deal with uiomove() errors a better way.
681 */
682 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
683 vfs_bio_clrbuf(bp);
684 if (ioflag & IO_DIRECT)
685 bp->b_flags |= B_DIRECT;
686 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
687 bp->b_flags |= B_NOCACHE;
688
689 if (uio->uio_offset + xfersize > ip->i_size) {
690 ip->i_size = uio->uio_offset + xfersize;
691 DIP_SET(ip, i_size, ip->i_size);
692 extended = 1;
693 }
694
695 size = blksize(fs, ip, lbn) - bp->b_resid;
696 if (size < xfersize)
697 xfersize = size;
698
699 error =
700 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
701 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
702 (LIST_FIRST(&bp->b_dep) == NULL)) {
703 bp->b_flags |= B_RELBUF;
704 }
705
706 /*
707 * If IO_SYNC each buffer is written synchronously. Otherwise
708 * if we have a severe page deficiency write the buffer
709 * asynchronously. Otherwise try to cluster, and if that
710 * doesn't do it then either do an async write (if O_DIRECT),
711 * or a delayed write (if not).
712 */
713 if (ioflag & IO_SYNC) {
714 (void)bwrite(bp);
715 } else if (vm_page_count_severe() ||
716 buf_dirty_count_severe() ||
717 (ioflag & IO_ASYNC)) {
718 bp->b_flags |= B_CLUSTEROK;
719 bawrite(bp);
720 } else if (xfersize + blkoffset == fs->fs_bsize) {
721 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
722 bp->b_flags |= B_CLUSTEROK;
723 cluster_write(bp, ip->i_size, seqcount);
724 } else {
725 bawrite(bp);
726 }
727 } else if (ioflag & IO_DIRECT) {
728 bp->b_flags |= B_CLUSTEROK;
729 bawrite(bp);
730 } else {
731 bp->b_flags |= B_CLUSTEROK;
732 bdwrite(bp);
733 }
734 if (error || xfersize == 0)
735 break;
736 ip->i_flag |= IN_CHANGE | IN_UPDATE;
737 }
738 /*
739 * If we successfully wrote any data, and we are not the superuser
740 * we clear the setuid and setgid bits as a precaution against
741 * tampering.
742 */
743 if (resid > uio->uio_resid && ap->a_cred &&
744 suser_cred(ap->a_cred, SUSER_ALLOWJAIL)) {
745 ip->i_mode &= ~(ISUID | ISGID);
746 DIP_SET(ip, i_mode, ip->i_mode);
747 }
748 if (resid > uio->uio_resid)
749 VN_KNOTE_UNLOCKED(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
750 if (error) {
751 if (ioflag & IO_UNIT) {
752 (void)UFS_TRUNCATE(vp, osize,
753 IO_NORMAL | (ioflag & IO_SYNC),
754 ap->a_cred, uio->uio_td);
755 uio->uio_offset -= resid - uio->uio_resid;
756 uio->uio_resid = resid;
757 }
758 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
759 error = UFS_UPDATE(vp, 1);
760 return (error);
761 }
762
763 /*
764 * get page routine
765 */
766 static int
767 ffs_getpages(ap)
768 struct vop_getpages_args *ap;
769 {
770 off_t foff, physoffset;
771 int i, size, bsize;
772 struct vnode *dp, *vp;
773 vm_object_t obj;
774 vm_pindex_t pindex;
775 vm_page_t mreq;
776 int bbackwards, bforwards;
777 int pbackwards, pforwards;
778 int firstpage;
779 ufs2_daddr_t reqblkno, reqlblkno;
780 int poff;
781 int pcount;
782 int rtval;
783 int pagesperblock;
784
785 GIANT_REQUIRED;
786
787 pcount = round_page(ap->a_count) / PAGE_SIZE;
788 mreq = ap->a_m[ap->a_reqpage];
789
790 /*
791 * if ANY DEV_BSIZE blocks are valid on a large filesystem block,
792 * then the entire page is valid. Since the page may be mapped,
793 * user programs might reference data beyond the actual end of file
794 * occuring within the page. We have to zero that data.
795 */
796 VM_OBJECT_LOCK(mreq->object);
797 if (mreq->valid) {
798 if (mreq->valid != VM_PAGE_BITS_ALL)
799 vm_page_zero_invalid(mreq, TRUE);
800 vm_page_lock_queues();
801 for (i = 0; i < pcount; i++) {
802 if (i != ap->a_reqpage) {
803 vm_page_free(ap->a_m[i]);
804 }
805 }
806 vm_page_unlock_queues();
807 VM_OBJECT_UNLOCK(mreq->object);
808 return VM_PAGER_OK;
809 }
810 VM_OBJECT_UNLOCK(mreq->object);
811 vp = ap->a_vp;
812 obj = vp->v_object;
813 bsize = vp->v_mount->mnt_stat.f_iosize;
814 pindex = mreq->pindex;
815 foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */;
816
817 if (bsize < PAGE_SIZE)
818 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
819 ap->a_count,
820 ap->a_reqpage);
821
822 /*
823 * foff is the file offset of the required page
824 * reqlblkno is the logical block that contains the page
825 * poff is the index of the page into the logical block
826 */
827 reqlblkno = foff / bsize;
828 poff = (foff % bsize) / PAGE_SIZE;
829
830 dp = VTOI(vp)->i_devvp;
831 if (ufs_bmaparray(vp, reqlblkno, &reqblkno, 0, &bforwards, &bbackwards)
832 || (reqblkno == -1)) {
833 VM_OBJECT_LOCK(obj);
834 vm_page_lock_queues();
835 for(i = 0; i < pcount; i++) {
836 if (i != ap->a_reqpage)
837 vm_page_free(ap->a_m[i]);
838 }
839 vm_page_unlock_queues();
840 if (reqblkno == -1) {
841 if ((mreq->flags & PG_ZERO) == 0)
842 pmap_zero_page(mreq);
843 vm_page_undirty(mreq);
844 mreq->valid = VM_PAGE_BITS_ALL;
845 VM_OBJECT_UNLOCK(obj);
846 return VM_PAGER_OK;
847 } else {
848 VM_OBJECT_UNLOCK(obj);
849 return VM_PAGER_ERROR;
850 }
851 }
852
853 physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE;
854 pagesperblock = bsize / PAGE_SIZE;
855 /*
856 * find the first page that is contiguous...
857 * note that pbackwards is the number of pages that are contiguous
858 * backwards.
859 */
860 firstpage = 0;
861 if (ap->a_count) {
862 pbackwards = poff + bbackwards * pagesperblock;
863 if (ap->a_reqpage > pbackwards) {
864 firstpage = ap->a_reqpage - pbackwards;
865 VM_OBJECT_LOCK(obj);
866 vm_page_lock_queues();
867 for(i=0;i<firstpage;i++)
868 vm_page_free(ap->a_m[i]);
869 vm_page_unlock_queues();
870 VM_OBJECT_UNLOCK(obj);
871 }
872
873 /*
874 * pforwards is the number of pages that are contiguous
875 * after the current page.
876 */
877 pforwards = (pagesperblock - (poff + 1)) +
878 bforwards * pagesperblock;
879 if (pforwards < (pcount - (ap->a_reqpage + 1))) {
880 VM_OBJECT_LOCK(obj);
881 vm_page_lock_queues();
882 for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++)
883 vm_page_free(ap->a_m[i]);
884 vm_page_unlock_queues();
885 VM_OBJECT_UNLOCK(obj);
886 pcount = ap->a_reqpage + pforwards + 1;
887 }
888
889 /*
890 * number of pages for I/O corrected for the non-contig pages at
891 * the beginning of the array.
892 */
893 pcount -= firstpage;
894 }
895
896 /*
897 * calculate the size of the transfer
898 */
899
900 size = pcount * PAGE_SIZE;
901
902 if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) >
903 obj->un_pager.vnp.vnp_size)
904 size = obj->un_pager.vnp.vnp_size -
905 IDX_TO_OFF(ap->a_m[firstpage]->pindex);
906
907 physoffset -= foff;
908 rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size,
909 (ap->a_reqpage - firstpage), physoffset);
910
911 return (rtval);
912 }
913
914 /*
915 * Extended attribute area reading.
916 */
917 static int
918 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
919 {
920 struct inode *ip;
921 struct ufs2_dinode *dp;
922 struct fs *fs;
923 struct buf *bp;
924 ufs_lbn_t lbn, nextlbn;
925 off_t bytesinfile;
926 long size, xfersize, blkoffset;
927 int error, orig_resid;
928
929 GIANT_REQUIRED;
930
931 ip = VTOI(vp);
932 fs = ip->i_fs;
933 dp = ip->i_din2;
934
935 #ifdef DIAGNOSTIC
936 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
937 panic("ffs_extread: mode");
938
939 #endif
940 orig_resid = uio->uio_resid;
941 KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0"));
942 if (orig_resid == 0)
943 return (0);
944 KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0"));
945
946 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
947 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
948 break;
949 lbn = lblkno(fs, uio->uio_offset);
950 nextlbn = lbn + 1;
951
952 /*
953 * size of buffer. The buffer representing the
954 * end of the file is rounded up to the size of
955 * the block type ( fragment or full block,
956 * depending ).
957 */
958 size = sblksize(fs, dp->di_extsize, lbn);
959 blkoffset = blkoff(fs, uio->uio_offset);
960
961 /*
962 * The amount we want to transfer in this iteration is
963 * one FS block less the amount of the data before
964 * our startpoint (duh!)
965 */
966 xfersize = fs->fs_bsize - blkoffset;
967
968 /*
969 * But if we actually want less than the block,
970 * or the file doesn't have a whole block more of data,
971 * then use the lesser number.
972 */
973 if (uio->uio_resid < xfersize)
974 xfersize = uio->uio_resid;
975 if (bytesinfile < xfersize)
976 xfersize = bytesinfile;
977
978 if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
979 /*
980 * Don't do readahead if this is the end of the info.
981 */
982 error = bread(vp, -1 - lbn, size, NOCRED, &bp);
983 } else {
984 /*
985 * If we have a second block, then
986 * fire off a request for a readahead
987 * as well as a read. Note that the 4th and 5th
988 * arguments point to arrays of the size specified in
989 * the 6th argument.
990 */
991 int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
992
993 nextlbn = -1 - nextlbn;
994 error = breadn(vp, -1 - lbn,
995 size, &nextlbn, &nextsize, 1, NOCRED, &bp);
996 }
997 if (error) {
998 brelse(bp);
999 bp = NULL;
1000 break;
1001 }
1002
1003 /*
1004 * If IO_DIRECT then set B_DIRECT for the buffer. This
1005 * will cause us to attempt to release the buffer later on
1006 * and will cause the buffer cache to attempt to free the
1007 * underlying pages.
1008 */
1009 if (ioflag & IO_DIRECT)
1010 bp->b_flags |= B_DIRECT;
1011
1012 /*
1013 * We should only get non-zero b_resid when an I/O error
1014 * has occurred, which should cause us to break above.
1015 * However, if the short read did not cause an error,
1016 * then we want to ensure that we do not uiomove bad
1017 * or uninitialized data.
1018 */
1019 size -= bp->b_resid;
1020 if (size < xfersize) {
1021 if (size == 0)
1022 break;
1023 xfersize = size;
1024 }
1025
1026 error = uiomove((char *)bp->b_data + blkoffset,
1027 (int)xfersize, uio);
1028 if (error)
1029 break;
1030
1031 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
1032 (LIST_FIRST(&bp->b_dep) == NULL)) {
1033 /*
1034 * If there are no dependencies, and it's VMIO,
1035 * then we don't need the buf, mark it available
1036 * for freeing. The VM has the data.
1037 */
1038 bp->b_flags |= B_RELBUF;
1039 brelse(bp);
1040 } else {
1041 /*
1042 * Otherwise let whoever
1043 * made the request take care of
1044 * freeing it. We just queue
1045 * it onto another list.
1046 */
1047 bqrelse(bp);
1048 }
1049 }
1050
1051 /*
1052 * This can only happen in the case of an error
1053 * because the loop above resets bp to NULL on each iteration
1054 * and on normal completion has not set a new value into it.
1055 * so it must have come from a 'break' statement
1056 */
1057 if (bp != NULL) {
1058 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
1059 (LIST_FIRST(&bp->b_dep) == NULL)) {
1060 bp->b_flags |= B_RELBUF;
1061 brelse(bp);
1062 } else {
1063 bqrelse(bp);
1064 }
1065 }
1066
1067 if ((error == 0 || uio->uio_resid != orig_resid) &&
1068 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1069 ip->i_flag |= IN_ACCESS;
1070 return (error);
1071 }
1072
1073 /*
1074 * Extended attribute area writing.
1075 */
1076 static int
1077 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
1078 {
1079 struct inode *ip;
1080 struct ufs2_dinode *dp;
1081 struct fs *fs;
1082 struct buf *bp;
1083 ufs_lbn_t lbn;
1084 off_t osize;
1085 int blkoffset, error, flags, resid, size, xfersize;
1086
1087 GIANT_REQUIRED;
1088
1089 ip = VTOI(vp);
1090 fs = ip->i_fs;
1091 dp = ip->i_din2;
1092
1093 #ifdef DIAGNOSTIC
1094 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
1095 panic("ffs_extwrite: mode");
1096 #endif
1097
1098 if (ioflag & IO_APPEND)
1099 uio->uio_offset = dp->di_extsize;
1100 KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0"));
1101 KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0"));
1102 if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize)
1103 return (EFBIG);
1104
1105 resid = uio->uio_resid;
1106 osize = dp->di_extsize;
1107 flags = IO_EXT;
1108 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
1109 flags |= IO_SYNC;
1110
1111 for (error = 0; uio->uio_resid > 0;) {
1112 lbn = lblkno(fs, uio->uio_offset);
1113 blkoffset = blkoff(fs, uio->uio_offset);
1114 xfersize = fs->fs_bsize - blkoffset;
1115 if (uio->uio_resid < xfersize)
1116 xfersize = uio->uio_resid;
1117
1118 /*
1119 * We must perform a read-before-write if the transfer size
1120 * does not cover the entire buffer.
1121 */
1122 if (fs->fs_bsize > xfersize)
1123 flags |= BA_CLRBUF;
1124 else
1125 flags &= ~BA_CLRBUF;
1126 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
1127 ucred, flags, &bp);
1128 if (error != 0)
1129 break;
1130 /*
1131 * If the buffer is not valid we have to clear out any
1132 * garbage data from the pages instantiated for the buffer.
1133 * If we do not, a failed uiomove() during a write can leave
1134 * the prior contents of the pages exposed to a userland
1135 * mmap(). XXX deal with uiomove() errors a better way.
1136 */
1137 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
1138 vfs_bio_clrbuf(bp);
1139 if (ioflag & IO_DIRECT)
1140 bp->b_flags |= B_DIRECT;
1141
1142 if (uio->uio_offset + xfersize > dp->di_extsize)
1143 dp->di_extsize = uio->uio_offset + xfersize;
1144
1145 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
1146 if (size < xfersize)
1147 xfersize = size;
1148
1149 error =
1150 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
1151 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
1152 (LIST_FIRST(&bp->b_dep) == NULL)) {
1153 bp->b_flags |= B_RELBUF;
1154 }
1155
1156 /*
1157 * If IO_SYNC each buffer is written synchronously. Otherwise
1158 * if we have a severe page deficiency write the buffer
1159 * asynchronously. Otherwise try to cluster, and if that
1160 * doesn't do it then either do an async write (if O_DIRECT),
1161 * or a delayed write (if not).
1162 */
1163 if (ioflag & IO_SYNC) {
1164 (void)bwrite(bp);
1165 } else if (vm_page_count_severe() ||
1166 buf_dirty_count_severe() ||
1167 xfersize + blkoffset == fs->fs_bsize ||
1168 (ioflag & (IO_ASYNC | IO_DIRECT)))
1169 bawrite(bp);
1170 else
1171 bdwrite(bp);
1172 if (error || xfersize == 0)
1173 break;
1174 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1175 }
1176 /*
1177 * If we successfully wrote any data, and we are not the superuser
1178 * we clear the setuid and setgid bits as a precaution against
1179 * tampering.
1180 */
1181 if (resid > uio->uio_resid && ucred &&
1182 suser_cred(ucred, SUSER_ALLOWJAIL)) {
1183 ip->i_mode &= ~(ISUID | ISGID);
1184 dp->di_mode = ip->i_mode;
1185 }
1186 if (error) {
1187 if (ioflag & IO_UNIT) {
1188 (void)UFS_TRUNCATE(vp, osize,
1189 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td);
1190 uio->uio_offset -= resid - uio->uio_resid;
1191 uio->uio_resid = resid;
1192 }
1193 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
1194 error = UFS_UPDATE(vp, 1);
1195 return (error);
1196 }
1197
1198
1199 /*
1200 * Vnode operating to retrieve a named extended attribute.
1201 *
1202 * Locate a particular EA (nspace:name) in the area (ptr:length), and return
1203 * the length of the EA, and possibly the pointer to the entry and to the data.
1204 */
1205 static int
1206 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac)
1207 {
1208 u_char *p, *pe, *pn, *p0;
1209 int eapad1, eapad2, ealength, ealen, nlen;
1210 uint32_t ul;
1211
1212 pe = ptr + length;
1213 nlen = strlen(name);
1214
1215 for (p = ptr; p < pe; p = pn) {
1216 p0 = p;
1217 bcopy(p, &ul, sizeof(ul));
1218 pn = p + ul;
1219 /* make sure this entry is complete */
1220 if (pn > pe)
1221 break;
1222 p += sizeof(uint32_t);
1223 if (*p != nspace)
1224 continue;
1225 p++;
1226 eapad2 = *p++;
1227 if (*p != nlen)
1228 continue;
1229 p++;
1230 if (bcmp(p, name, nlen))
1231 continue;
1232 ealength = sizeof(uint32_t) + 3 + nlen;
1233 eapad1 = 8 - (ealength % 8);
1234 if (eapad1 == 8)
1235 eapad1 = 0;
1236 ealength += eapad1;
1237 ealen = ul - ealength - eapad2;
1238 p += nlen + eapad1;
1239 if (eap != NULL)
1240 *eap = p0;
1241 if (eac != NULL)
1242 *eac = p;
1243 return (ealen);
1244 }
1245 return(-1);
1246 }
1247
1248 static int
1249 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra)
1250 {
1251 struct inode *ip;
1252 struct ufs2_dinode *dp;
1253 struct uio luio;
1254 struct iovec liovec;
1255 int easize, error;
1256 u_char *eae;
1257
1258 ip = VTOI(vp);
1259 dp = ip->i_din2;
1260 easize = dp->di_extsize;
1261
1262 eae = malloc(easize + extra, M_TEMP, M_WAITOK);
1263
1264 liovec.iov_base = eae;
1265 liovec.iov_len = easize;
1266 luio.uio_iov = &liovec;
1267 luio.uio_iovcnt = 1;
1268 luio.uio_offset = 0;
1269 luio.uio_resid = easize;
1270 luio.uio_segflg = UIO_SYSSPACE;
1271 luio.uio_rw = UIO_READ;
1272 luio.uio_td = td;
1273
1274 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
1275 if (error) {
1276 free(eae, M_TEMP);
1277 return(error);
1278 }
1279 *p = eae;
1280 return (0);
1281 }
1282
1283 static int
1284 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
1285 {
1286 struct inode *ip;
1287 struct ufs2_dinode *dp;
1288 int error;
1289
1290 ip = VTOI(vp);
1291
1292 if (ip->i_ea_area != NULL)
1293 return (EBUSY);
1294 dp = ip->i_din2;
1295 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0);
1296 if (error)
1297 return (error);
1298 ip->i_ea_len = dp->di_extsize;
1299 ip->i_ea_error = 0;
1300 return (0);
1301 }
1302
1303 /*
1304 * Vnode extattr transaction commit/abort
1305 */
1306 static int
1307 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
1308 {
1309 struct inode *ip;
1310 struct uio luio;
1311 struct iovec liovec;
1312 int error;
1313 struct ufs2_dinode *dp;
1314
1315 ip = VTOI(vp);
1316 if (ip->i_ea_area == NULL)
1317 return (EINVAL);
1318 dp = ip->i_din2;
1319 error = ip->i_ea_error;
1320 if (commit && error == 0) {
1321 if (cred == NOCRED)
1322 cred = vp->v_mount->mnt_cred;
1323 liovec.iov_base = ip->i_ea_area;
1324 liovec.iov_len = ip->i_ea_len;
1325 luio.uio_iov = &liovec;
1326 luio.uio_iovcnt = 1;
1327 luio.uio_offset = 0;
1328 luio.uio_resid = ip->i_ea_len;
1329 luio.uio_segflg = UIO_SYSSPACE;
1330 luio.uio_rw = UIO_WRITE;
1331 luio.uio_td = td;
1332 /* XXX: I'm not happy about truncating to zero size */
1333 if (ip->i_ea_len < dp->di_extsize)
1334 error = ffs_truncate(vp, 0, IO_EXT, cred, td);
1335 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
1336 }
1337 free(ip->i_ea_area, M_TEMP);
1338 ip->i_ea_area = NULL;
1339 ip->i_ea_len = 0;
1340 ip->i_ea_error = 0;
1341 return (error);
1342 }
1343
1344 /*
1345 * Vnode extattr strategy routine for special devices and fifos.
1346 *
1347 * We need to check for a read or write of the external attributes.
1348 * Otherwise we just fall through and do the usual thing.
1349 */
1350 static int
1351 ffsext_strategy(struct vop_strategy_args *ap)
1352 /*
1353 struct vop_strategy_args {
1354 struct vnodeop_desc *a_desc;
1355 struct vnode *a_vp;
1356 struct buf *a_bp;
1357 };
1358 */
1359 {
1360 struct vnode *vp;
1361 daddr_t lbn;
1362
1363 KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)",
1364 __func__, ap->a_vp, ap->a_bp->b_vp));
1365 vp = ap->a_vp;
1366 lbn = ap->a_bp->b_lblkno;
1367 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC &&
1368 lbn < 0 && lbn >= -NXADDR)
1369 return (ufs_vnoperate((struct vop_generic_args *)ap));
1370 if (vp->v_type == VFIFO)
1371 return (ufs_vnoperatefifo((struct vop_generic_args *)ap));
1372 return (ufs_vnoperatespec((struct vop_generic_args *)ap));
1373 }
1374
1375 /*
1376 * Vnode extattr transaction commit/abort
1377 */
1378 static int
1379 ffs_openextattr(struct vop_openextattr_args *ap)
1380 /*
1381 struct vop_openextattr_args {
1382 struct vnodeop_desc *a_desc;
1383 struct vnode *a_vp;
1384 IN struct ucred *a_cred;
1385 IN struct thread *a_td;
1386 };
1387 */
1388 {
1389 struct inode *ip;
1390 struct fs *fs;
1391
1392 ip = VTOI(ap->a_vp);
1393 fs = ip->i_fs;
1394 if (fs->fs_magic == FS_UFS1_MAGIC)
1395 return (ufs_vnoperate((struct vop_generic_args *)ap));
1396
1397 if (ap->a_vp->v_type == VCHR)
1398 return (EOPNOTSUPP);
1399
1400 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
1401 }
1402
1403
1404 /*
1405 * Vnode extattr transaction commit/abort
1406 */
1407 static int
1408 ffs_closeextattr(struct vop_closeextattr_args *ap)
1409 /*
1410 struct vop_closeextattr_args {
1411 struct vnodeop_desc *a_desc;
1412 struct vnode *a_vp;
1413 int a_commit;
1414 IN struct ucred *a_cred;
1415 IN struct thread *a_td;
1416 };
1417 */
1418 {
1419 struct inode *ip;
1420 struct fs *fs;
1421
1422 ip = VTOI(ap->a_vp);
1423 fs = ip->i_fs;
1424 if (fs->fs_magic == FS_UFS1_MAGIC)
1425 return (ufs_vnoperate((struct vop_generic_args *)ap));
1426
1427 if (ap->a_vp->v_type == VCHR)
1428 return (EOPNOTSUPP);
1429
1430 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td));
1431 }
1432
1433 /*
1434 * Vnode operation to remove a named attribute.
1435 */
1436 static int
1437 ffs_deleteextattr(struct vop_deleteextattr_args *ap)
1438 /*
1439 vop_deleteextattr {
1440 IN struct vnode *a_vp;
1441 IN int a_attrnamespace;
1442 IN const char *a_name;
1443 IN struct ucred *a_cred;
1444 IN struct thread *a_td;
1445 };
1446 */
1447 {
1448 struct inode *ip;
1449 struct fs *fs;
1450 uint32_t ealength, ul;
1451 int ealen, olen, eapad1, eapad2, error, i, easize;
1452 u_char *eae, *p;
1453 int stand_alone;
1454
1455 ip = VTOI(ap->a_vp);
1456 fs = ip->i_fs;
1457
1458 if (fs->fs_magic == FS_UFS1_MAGIC)
1459 return (ufs_vnoperate((struct vop_generic_args *)ap));
1460
1461 if (ap->a_vp->v_type == VCHR)
1462 return (EOPNOTSUPP);
1463
1464 if (strlen(ap->a_name) == 0)
1465 return (EINVAL);
1466
1467 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1468 ap->a_cred, ap->a_td, IWRITE);
1469 if (error) {
1470 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1471 ip->i_ea_error = error;
1472 return (error);
1473 }
1474
1475 if (ip->i_ea_area == NULL) {
1476 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1477 if (error)
1478 return (error);
1479 stand_alone = 1;
1480 } else {
1481 stand_alone = 0;
1482 }
1483
1484 ealength = eapad1 = ealen = eapad2 = 0;
1485
1486 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
1487 bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1488 easize = ip->i_ea_len;
1489
1490 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1491 &p, NULL);
1492 if (olen == -1) {
1493 /* delete but nonexistent */
1494 free(eae, M_TEMP);
1495 if (stand_alone)
1496 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1497 return(ENOATTR);
1498 }
1499 bcopy(p, &ul, sizeof ul);
1500 i = p - eae + ul;
1501 if (ul != ealength) {
1502 bcopy(p + ul, p + ealength, easize - i);
1503 easize += (ealength - ul);
1504 }
1505 if (easize > NXADDR * fs->fs_bsize) {
1506 free(eae, M_TEMP);
1507 if (stand_alone)
1508 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1509 else if (ip->i_ea_error == 0)
1510 ip->i_ea_error = ENOSPC;
1511 return(ENOSPC);
1512 }
1513 p = ip->i_ea_area;
1514 ip->i_ea_area = eae;
1515 ip->i_ea_len = easize;
1516 free(p, M_TEMP);
1517 if (stand_alone)
1518 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1519 return(error);
1520 }
1521
1522 /*
1523 * Vnode operation to retrieve a named extended attribute.
1524 */
1525 static int
1526 ffs_getextattr(struct vop_getextattr_args *ap)
1527 /*
1528 vop_getextattr {
1529 IN struct vnode *a_vp;
1530 IN int a_attrnamespace;
1531 IN const char *a_name;
1532 INOUT struct uio *a_uio;
1533 OUT size_t *a_size;
1534 IN struct ucred *a_cred;
1535 IN struct thread *a_td;
1536 };
1537 */
1538 {
1539 struct inode *ip;
1540 struct fs *fs;
1541 u_char *eae, *p;
1542 unsigned easize;
1543 int error, ealen, stand_alone;
1544
1545 ip = VTOI(ap->a_vp);
1546 fs = ip->i_fs;
1547
1548 if (fs->fs_magic == FS_UFS1_MAGIC)
1549 return (ufs_vnoperate((struct vop_generic_args *)ap));
1550
1551 if (ap->a_vp->v_type == VCHR)
1552 return (EOPNOTSUPP);
1553
1554 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1555 ap->a_cred, ap->a_td, IREAD);
1556 if (error)
1557 return (error);
1558
1559 if (ip->i_ea_area == NULL) {
1560 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1561 if (error)
1562 return (error);
1563 stand_alone = 1;
1564 } else {
1565 stand_alone = 0;
1566 }
1567 eae = ip->i_ea_area;
1568 easize = ip->i_ea_len;
1569
1570 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1571 NULL, &p);
1572 if (ealen >= 0) {
1573 error = 0;
1574 if (ap->a_size != NULL)
1575 *ap->a_size = ealen;
1576 else if (ap->a_uio != NULL)
1577 error = uiomove(p, ealen, ap->a_uio);
1578 } else
1579 error = ENOATTR;
1580 if (stand_alone)
1581 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1582 return(error);
1583 }
1584
1585 /*
1586 * Vnode operation to retrieve extended attributes on a vnode.
1587 */
1588 static int
1589 ffs_listextattr(struct vop_listextattr_args *ap)
1590 /*
1591 vop_listextattr {
1592 IN struct vnode *a_vp;
1593 IN int a_attrnamespace;
1594 INOUT struct uio *a_uio;
1595 OUT size_t *a_size;
1596 IN struct ucred *a_cred;
1597 IN struct thread *a_td;
1598 };
1599 */
1600 {
1601 struct inode *ip;
1602 struct fs *fs;
1603 u_char *eae, *p, *pe, *pn;
1604 unsigned easize;
1605 uint32_t ul;
1606 int error, ealen, stand_alone;
1607
1608 ip = VTOI(ap->a_vp);
1609 fs = ip->i_fs;
1610
1611 if (fs->fs_magic == FS_UFS1_MAGIC)
1612 return (ufs_vnoperate((struct vop_generic_args *)ap));
1613
1614 if (ap->a_vp->v_type == VCHR)
1615 return (EOPNOTSUPP);
1616
1617 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1618 ap->a_cred, ap->a_td, IREAD);
1619 if (error)
1620 return (error);
1621
1622 if (ip->i_ea_area == NULL) {
1623 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1624 if (error)
1625 return (error);
1626 stand_alone = 1;
1627 } else {
1628 stand_alone = 0;
1629 }
1630 eae = ip->i_ea_area;
1631 easize = ip->i_ea_len;
1632
1633 error = 0;
1634 if (ap->a_size != NULL)
1635 *ap->a_size = 0;
1636 pe = eae + easize;
1637 for(p = eae; error == 0 && p < pe; p = pn) {
1638 bcopy(p, &ul, sizeof(ul));
1639 pn = p + ul;
1640 if (pn > pe)
1641 break;
1642 p += sizeof(ul);
1643 if (*p++ != ap->a_attrnamespace)
1644 continue;
1645 p++; /* pad2 */
1646 ealen = *p;
1647 if (ap->a_size != NULL) {
1648 *ap->a_size += ealen + 1;
1649 } else if (ap->a_uio != NULL) {
1650 error = uiomove(p, ealen + 1, ap->a_uio);
1651 }
1652 }
1653 if (stand_alone)
1654 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1655 return(error);
1656 }
1657
1658 /*
1659 * Vnode operation to set a named attribute.
1660 */
1661 static int
1662 ffs_setextattr(struct vop_setextattr_args *ap)
1663 /*
1664 vop_setextattr {
1665 IN struct vnode *a_vp;
1666 IN int a_attrnamespace;
1667 IN const char *a_name;
1668 INOUT struct uio *a_uio;
1669 IN struct ucred *a_cred;
1670 IN struct thread *a_td;
1671 };
1672 */
1673 {
1674 struct inode *ip;
1675 struct fs *fs;
1676 uint32_t ealength, ul;
1677 int ealen, olen, eapad1, eapad2, error, i, easize;
1678 u_char *eae, *p;
1679 int stand_alone;
1680
1681 ip = VTOI(ap->a_vp);
1682 fs = ip->i_fs;
1683
1684 if (fs->fs_magic == FS_UFS1_MAGIC)
1685 return (ufs_vnoperate((struct vop_generic_args *)ap));
1686
1687 if (ap->a_vp->v_type == VCHR)
1688 return (EOPNOTSUPP);
1689
1690 if (strlen(ap->a_name) == 0)
1691 return (EINVAL);
1692
1693 /* XXX Now unsupported API to delete EAs using NULL uio. */
1694 if (ap->a_uio == NULL)
1695 return (EOPNOTSUPP);
1696
1697 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1698 ap->a_cred, ap->a_td, IWRITE);
1699 if (error) {
1700 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1701 ip->i_ea_error = error;
1702 return (error);
1703 }
1704
1705 if (ip->i_ea_area == NULL) {
1706 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1707 if (error)
1708 return (error);
1709 stand_alone = 1;
1710 } else {
1711 stand_alone = 0;
1712 }
1713
1714 ealen = ap->a_uio->uio_resid;
1715 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
1716 eapad1 = 8 - (ealength % 8);
1717 if (eapad1 == 8)
1718 eapad1 = 0;
1719 eapad2 = 8 - (ealen % 8);
1720 if (eapad2 == 8)
1721 eapad2 = 0;
1722 ealength += eapad1 + ealen + eapad2;
1723
1724 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
1725 bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1726 easize = ip->i_ea_len;
1727
1728 olen = ffs_findextattr(eae, easize,
1729 ap->a_attrnamespace, ap->a_name, &p, NULL);
1730 if (olen == -1) {
1731 /* new, append at end */
1732 p = eae + easize;
1733 easize += ealength;
1734 } else {
1735 bcopy(p, &ul, sizeof ul);
1736 i = p - eae + ul;
1737 if (ul != ealength) {
1738 bcopy(p + ul, p + ealength, easize - i);
1739 easize += (ealength - ul);
1740 }
1741 }
1742 if (easize > NXADDR * fs->fs_bsize) {
1743 free(eae, M_TEMP);
1744 if (stand_alone)
1745 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1746 else if (ip->i_ea_error == 0)
1747 ip->i_ea_error = ENOSPC;
1748 return(ENOSPC);
1749 }
1750 bcopy(&ealength, p, sizeof(ealength));
1751 p += sizeof(ealength);
1752 *p++ = ap->a_attrnamespace;
1753 *p++ = eapad2;
1754 *p++ = strlen(ap->a_name);
1755 strcpy(p, ap->a_name);
1756 p += strlen(ap->a_name);
1757 bzero(p, eapad1);
1758 p += eapad1;
1759 error = uiomove(p, ealen, ap->a_uio);
1760 if (error) {
1761 free(eae, M_TEMP);
1762 if (stand_alone)
1763 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1764 else if (ip->i_ea_error == 0)
1765 ip->i_ea_error = error;
1766 return(error);
1767 }
1768 p += ealen;
1769 bzero(p, eapad2);
1770
1771 p = ip->i_ea_area;
1772 ip->i_ea_area = eae;
1773 ip->i_ea_len = easize;
1774 free(p, M_TEMP);
1775 if (stand_alone)
1776 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1777 return(error);
1778 }
Cache object: 0dd607efc26db8fc6cbb5611b2e0f952
|