1 /*
2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
3 * All rights reserved.
4 *
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9 * research program
10 *
11 * Copyright (c) 1982, 1986, 1989, 1993
12 * The Regents of the University of California. All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed by the University of
25 * California, Berkeley and its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 *
42 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95
43 */
44
45 #include <sys/cdefs.h>
46 __FBSDID("$FreeBSD: releng/5.2/sys/ufs/ffs/ffs_vnops.c 129737 2004-05-25 23:07:55Z des $");
47
48 #include <sys/param.h>
49 #include <sys/bio.h>
50 #include <sys/systm.h>
51 #include <sys/buf.h>
52 #include <sys/conf.h>
53 #include <sys/extattr.h>
54 #include <sys/kernel.h>
55 #include <sys/limits.h>
56 #include <sys/malloc.h>
57 #include <sys/mount.h>
58 #include <sys/proc.h>
59 #include <sys/resourcevar.h>
60 #include <sys/signalvar.h>
61 #include <sys/stat.h>
62 #include <sys/vmmeter.h>
63 #include <sys/vnode.h>
64
65 #include <vm/vm.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_pager.h>
70 #include <vm/vnode_pager.h>
71
72 #include <ufs/ufs/extattr.h>
73 #include <ufs/ufs/quota.h>
74 #include <ufs/ufs/inode.h>
75 #include <ufs/ufs/ufs_extern.h>
76 #include <ufs/ufs/ufsmount.h>
77
78 #include <ufs/ffs/fs.h>
79 #include <ufs/ffs/ffs_extern.h>
80 #include "opt_directio.h"
81
82 #ifdef DIRECTIO
83 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
84 #endif
85 static int ffs_fsync(struct vop_fsync_args *);
86 static int ffs_getpages(struct vop_getpages_args *);
87 static int ffs_read(struct vop_read_args *);
88 static int ffs_write(struct vop_write_args *);
89 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
90 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
91 struct ucred *cred);
92 static int ffsext_strategy(struct vop_strategy_args *);
93 static int ffs_closeextattr(struct vop_closeextattr_args *);
94 static int ffs_deleteextattr(struct vop_deleteextattr_args *);
95 static int ffs_getextattr(struct vop_getextattr_args *);
96 static int ffs_listextattr(struct vop_listextattr_args *);
97 static int ffs_openextattr(struct vop_openextattr_args *);
98 static int ffs_setextattr(struct vop_setextattr_args *);
99
100
101 /* Global vfs data structures for ufs. */
102 vop_t **ffs_vnodeop_p;
103 static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
104 { &vop_default_desc, (vop_t *) ufs_vnoperate },
105 { &vop_fsync_desc, (vop_t *) ffs_fsync },
106 { &vop_getpages_desc, (vop_t *) ffs_getpages },
107 { &vop_read_desc, (vop_t *) ffs_read },
108 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks },
109 { &vop_write_desc, (vop_t *) ffs_write },
110 { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr },
111 { &vop_deleteextattr_desc, (vop_t *) ffs_deleteextattr },
112 { &vop_getextattr_desc, (vop_t *) ffs_getextattr },
113 { &vop_listextattr_desc, (vop_t *) ffs_listextattr },
114 { &vop_openextattr_desc, (vop_t *) ffs_openextattr },
115 { &vop_setextattr_desc, (vop_t *) ffs_setextattr },
116 { NULL, NULL }
117 };
118 static struct vnodeopv_desc ffs_vnodeop_opv_desc =
119 { &ffs_vnodeop_p, ffs_vnodeop_entries };
120
121 vop_t **ffs_specop_p;
122 static struct vnodeopv_entry_desc ffs_specop_entries[] = {
123 { &vop_default_desc, (vop_t *) ufs_vnoperatespec },
124 { &vop_fsync_desc, (vop_t *) ffs_fsync },
125 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks },
126 { &vop_strategy_desc, (vop_t *) ffsext_strategy },
127 { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr },
128 { &vop_deleteextattr_desc, (vop_t *) ffs_deleteextattr },
129 { &vop_getextattr_desc, (vop_t *) ffs_getextattr },
130 { &vop_listextattr_desc, (vop_t *) ffs_listextattr },
131 { &vop_openextattr_desc, (vop_t *) ffs_openextattr },
132 { &vop_setextattr_desc, (vop_t *) ffs_setextattr },
133 { NULL, NULL }
134 };
135 static struct vnodeopv_desc ffs_specop_opv_desc =
136 { &ffs_specop_p, ffs_specop_entries };
137
138 vop_t **ffs_fifoop_p;
139 static struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
140 { &vop_default_desc, (vop_t *) ufs_vnoperatefifo },
141 { &vop_fsync_desc, (vop_t *) ffs_fsync },
142 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks },
143 { &vop_strategy_desc, (vop_t *) ffsext_strategy },
144 { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr },
145 { &vop_deleteextattr_desc, (vop_t *) ffs_deleteextattr },
146 { &vop_getextattr_desc, (vop_t *) ffs_getextattr },
147 { &vop_listextattr_desc, (vop_t *) ffs_listextattr },
148 { &vop_openextattr_desc, (vop_t *) ffs_openextattr },
149 { &vop_setextattr_desc, (vop_t *) ffs_setextattr },
150 { NULL, NULL }
151 };
152 static struct vnodeopv_desc ffs_fifoop_opv_desc =
153 { &ffs_fifoop_p, ffs_fifoop_entries };
154
155 VNODEOP_SET(ffs_vnodeop_opv_desc);
156 VNODEOP_SET(ffs_specop_opv_desc);
157 VNODEOP_SET(ffs_fifoop_opv_desc);
158
159 /*
160 * Synch an open file.
161 */
162 /* ARGSUSED */
163 static int
164 ffs_fsync(ap)
165 struct vop_fsync_args /* {
166 struct vnode *a_vp;
167 struct ucred *a_cred;
168 int a_waitfor;
169 struct thread *a_td;
170 } */ *ap;
171 {
172 struct vnode *vp = ap->a_vp;
173 struct inode *ip = VTOI(vp);
174 struct buf *bp;
175 struct buf *nbp;
176 int s, error, wait, passes, skipmeta;
177 ufs_lbn_t lbn;
178
179 wait = (ap->a_waitfor == MNT_WAIT);
180 if (vn_isdisk(vp, NULL)) {
181 lbn = INT_MAX;
182 if (vp->v_rdev->si_mountpoint != NULL &&
183 (vp->v_rdev->si_mountpoint->mnt_flag & MNT_SOFTDEP))
184 softdep_fsync_mountdev(vp);
185 } else {
186 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
187 }
188
189 /*
190 * Flush all dirty buffers associated with a vnode.
191 */
192 passes = NIADDR + 1;
193 skipmeta = 0;
194 if (wait)
195 skipmeta = 1;
196 s = splbio();
197 VI_LOCK(vp);
198 loop:
199 TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs)
200 bp->b_vflags &= ~BV_SCANNED;
201 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
202 nbp = TAILQ_NEXT(bp, b_vnbufs);
203 /*
204 * Reasons to skip this buffer: it has already been considered
205 * on this pass, this pass is the first time through on a
206 * synchronous flush request and the buffer being considered
207 * is metadata, the buffer has dependencies that will cause
208 * it to be redirtied and it has not already been deferred,
209 * or it is already being written.
210 */
211 if ((bp->b_vflags & BV_SCANNED) != 0)
212 continue;
213 bp->b_vflags |= BV_SCANNED;
214 if ((skipmeta == 1 && bp->b_lblkno < 0))
215 continue;
216 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
217 continue;
218 if (!wait && LIST_FIRST(&bp->b_dep) != NULL &&
219 (bp->b_flags & B_DEFERRED) == 0 &&
220 buf_countdeps(bp, 0)) {
221 bp->b_flags |= B_DEFERRED;
222 BUF_UNLOCK(bp);
223 continue;
224 }
225 VI_UNLOCK(vp);
226 if ((bp->b_flags & B_DELWRI) == 0)
227 panic("ffs_fsync: not dirty");
228 if (vp != bp->b_vp)
229 panic("ffs_fsync: vp != vp->b_vp");
230 /*
231 * If this is a synchronous flush request, or it is not a
232 * file or device, start the write on this buffer immediatly.
233 */
234 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) {
235
236 /*
237 * On our final pass through, do all I/O synchronously
238 * so that we can find out if our flush is failing
239 * because of write errors.
240 */
241 if (passes > 0 || !wait) {
242 if ((bp->b_flags & B_CLUSTEROK) && !wait) {
243 (void) vfs_bio_awrite(bp);
244 } else {
245 bremfree(bp);
246 splx(s);
247 (void) bawrite(bp);
248 s = splbio();
249 }
250 } else {
251 bremfree(bp);
252 splx(s);
253 if ((error = bwrite(bp)) != 0)
254 return (error);
255 s = splbio();
256 }
257 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) {
258 /*
259 * If the buffer is for data that has been truncated
260 * off the file, then throw it away.
261 */
262 bremfree(bp);
263 bp->b_flags |= B_INVAL | B_NOCACHE;
264 splx(s);
265 brelse(bp);
266 s = splbio();
267 } else
268 vfs_bio_awrite(bp);
269
270 /*
271 * Since we may have slept during the I/O, we need
272 * to start from a known point.
273 */
274 VI_LOCK(vp);
275 nbp = TAILQ_FIRST(&vp->v_dirtyblkhd);
276 }
277 /*
278 * If we were asked to do this synchronously, then go back for
279 * another pass, this time doing the metadata.
280 */
281 if (skipmeta) {
282 skipmeta = 0;
283 goto loop;
284 }
285
286 if (wait) {
287 while (vp->v_numoutput) {
288 vp->v_iflag |= VI_BWAIT;
289 msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp),
290 PRIBIO + 4, "ffsfsn", 0);
291 }
292 VI_UNLOCK(vp);
293
294 /*
295 * Ensure that any filesystem metatdata associated
296 * with the vnode has been written.
297 */
298 splx(s);
299 if ((error = softdep_sync_metadata(ap)) != 0)
300 return (error);
301 s = splbio();
302
303 VI_LOCK(vp);
304 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
305 /*
306 * Block devices associated with filesystems may
307 * have new I/O requests posted for them even if
308 * the vnode is locked, so no amount of trying will
309 * get them clean. Thus we give block devices a
310 * good effort, then just give up. For all other file
311 * types, go around and try again until it is clean.
312 */
313 if (passes > 0) {
314 passes -= 1;
315 goto loop;
316 }
317 #ifdef DIAGNOSTIC
318 if (!vn_isdisk(vp, NULL))
319 vprint("ffs_fsync: dirty", vp);
320 #endif
321 }
322 }
323 VI_UNLOCK(vp);
324 splx(s);
325 return (UFS_UPDATE(vp, wait));
326 }
327
328
329 /*
330 * Vnode op for reading.
331 */
332 /* ARGSUSED */
333 static int
334 ffs_read(ap)
335 struct vop_read_args /* {
336 struct vnode *a_vp;
337 struct uio *a_uio;
338 int a_ioflag;
339 struct ucred *a_cred;
340 } */ *ap;
341 {
342 struct vnode *vp;
343 struct inode *ip;
344 struct uio *uio;
345 struct fs *fs;
346 struct buf *bp;
347 ufs_lbn_t lbn, nextlbn;
348 off_t bytesinfile;
349 long size, xfersize, blkoffset;
350 int error, orig_resid;
351 int seqcount;
352 int ioflag;
353 vm_object_t object;
354
355 vp = ap->a_vp;
356 uio = ap->a_uio;
357 ioflag = ap->a_ioflag;
358 if (ap->a_ioflag & IO_EXT)
359 #ifdef notyet
360 return (ffs_extread(vp, uio, ioflag));
361 #else
362 panic("ffs_read+IO_EXT");
363 #endif
364 #ifdef DIRECTIO
365 if ((ioflag & IO_DIRECT) != 0) {
366 int workdone;
367
368 error = ffs_rawread(vp, uio, &workdone);
369 if (error != 0 || workdone != 0)
370 return error;
371 }
372 #endif
373
374 GIANT_REQUIRED;
375
376 seqcount = ap->a_ioflag >> 16;
377 ip = VTOI(vp);
378
379 #ifdef DIAGNOSTIC
380 if (uio->uio_rw != UIO_READ)
381 panic("ffs_read: mode");
382
383 if (vp->v_type == VLNK) {
384 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
385 panic("ffs_read: short symlink");
386 } else if (vp->v_type != VREG && vp->v_type != VDIR)
387 panic("ffs_read: type %d", vp->v_type);
388 #endif
389 fs = ip->i_fs;
390 if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize)
391 return (EFBIG);
392
393 orig_resid = uio->uio_resid;
394 if (orig_resid <= 0)
395 return (0);
396
397 object = vp->v_object;
398
399 bytesinfile = ip->i_size - uio->uio_offset;
400 if (bytesinfile <= 0) {
401 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
402 ip->i_flag |= IN_ACCESS;
403 return 0;
404 }
405
406 if (object) {
407 vm_object_reference(object);
408 }
409
410 /*
411 * Ok so we couldn't do it all in one vm trick...
412 * so cycle around trying smaller bites..
413 */
414 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
415 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
416 break;
417
418 lbn = lblkno(fs, uio->uio_offset);
419 nextlbn = lbn + 1;
420
421 /*
422 * size of buffer. The buffer representing the
423 * end of the file is rounded up to the size of
424 * the block type ( fragment or full block,
425 * depending ).
426 */
427 size = blksize(fs, ip, lbn);
428 blkoffset = blkoff(fs, uio->uio_offset);
429
430 /*
431 * The amount we want to transfer in this iteration is
432 * one FS block less the amount of the data before
433 * our startpoint (duh!)
434 */
435 xfersize = fs->fs_bsize - blkoffset;
436
437 /*
438 * But if we actually want less than the block,
439 * or the file doesn't have a whole block more of data,
440 * then use the lesser number.
441 */
442 if (uio->uio_resid < xfersize)
443 xfersize = uio->uio_resid;
444 if (bytesinfile < xfersize)
445 xfersize = bytesinfile;
446
447 if (lblktosize(fs, nextlbn) >= ip->i_size) {
448 /*
449 * Don't do readahead if this is the end of the file.
450 */
451 error = bread(vp, lbn, size, NOCRED, &bp);
452 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
453 /*
454 * Otherwise if we are allowed to cluster,
455 * grab as much as we can.
456 *
457 * XXX This may not be a win if we are not
458 * doing sequential access.
459 */
460 error = cluster_read(vp, ip->i_size, lbn,
461 size, NOCRED, uio->uio_resid, seqcount, &bp);
462 } else if (seqcount > 1) {
463 /*
464 * If we are NOT allowed to cluster, then
465 * if we appear to be acting sequentially,
466 * fire off a request for a readahead
467 * as well as a read. Note that the 4th and 5th
468 * arguments point to arrays of the size specified in
469 * the 6th argument.
470 */
471 int nextsize = blksize(fs, ip, nextlbn);
472 error = breadn(vp, lbn,
473 size, &nextlbn, &nextsize, 1, NOCRED, &bp);
474 } else {
475 /*
476 * Failing all of the above, just read what the
477 * user asked for. Interestingly, the same as
478 * the first option above.
479 */
480 error = bread(vp, lbn, size, NOCRED, &bp);
481 }
482 if (error) {
483 brelse(bp);
484 bp = NULL;
485 break;
486 }
487
488 /*
489 * If IO_DIRECT then set B_DIRECT for the buffer. This
490 * will cause us to attempt to release the buffer later on
491 * and will cause the buffer cache to attempt to free the
492 * underlying pages.
493 */
494 if (ioflag & IO_DIRECT)
495 bp->b_flags |= B_DIRECT;
496
497 /*
498 * We should only get non-zero b_resid when an I/O error
499 * has occurred, which should cause us to break above.
500 * However, if the short read did not cause an error,
501 * then we want to ensure that we do not uiomove bad
502 * or uninitialized data.
503 */
504 size -= bp->b_resid;
505 if (size < xfersize) {
506 if (size == 0)
507 break;
508 xfersize = size;
509 }
510
511 {
512 /*
513 * otherwise use the general form
514 */
515 error =
516 uiomove((char *)bp->b_data + blkoffset,
517 (int)xfersize, uio);
518 }
519
520 if (error)
521 break;
522
523 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
524 (LIST_FIRST(&bp->b_dep) == NULL)) {
525 /*
526 * If there are no dependencies, and it's VMIO,
527 * then we don't need the buf, mark it available
528 * for freeing. The VM has the data.
529 */
530 bp->b_flags |= B_RELBUF;
531 brelse(bp);
532 } else {
533 /*
534 * Otherwise let whoever
535 * made the request take care of
536 * freeing it. We just queue
537 * it onto another list.
538 */
539 bqrelse(bp);
540 }
541 }
542
543 /*
544 * This can only happen in the case of an error
545 * because the loop above resets bp to NULL on each iteration
546 * and on normal completion has not set a new value into it.
547 * so it must have come from a 'break' statement
548 */
549 if (bp != NULL) {
550 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
551 (LIST_FIRST(&bp->b_dep) == NULL)) {
552 bp->b_flags |= B_RELBUF;
553 brelse(bp);
554 } else {
555 bqrelse(bp);
556 }
557 }
558
559 if (object) {
560 VM_OBJECT_LOCK(object);
561 vm_object_vndeallocate(object);
562 }
563 if ((error == 0 || uio->uio_resid != orig_resid) &&
564 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
565 ip->i_flag |= IN_ACCESS;
566 return (error);
567 }
568
569 /*
570 * Vnode op for writing.
571 */
572 static int
573 ffs_write(ap)
574 struct vop_write_args /* {
575 struct vnode *a_vp;
576 struct uio *a_uio;
577 int a_ioflag;
578 struct ucred *a_cred;
579 } */ *ap;
580 {
581 struct vnode *vp;
582 struct uio *uio;
583 struct inode *ip;
584 struct fs *fs;
585 struct buf *bp;
586 struct thread *td;
587 ufs_lbn_t lbn;
588 off_t osize;
589 int seqcount;
590 int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
591 vm_object_t object;
592
593 vp = ap->a_vp;
594 uio = ap->a_uio;
595 ioflag = ap->a_ioflag;
596 if (ap->a_ioflag & IO_EXT)
597 #ifdef notyet
598 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
599 #else
600 panic("ffs_read+IO_EXT");
601 #endif
602
603 GIANT_REQUIRED;
604
605 extended = 0;
606 seqcount = ap->a_ioflag >> 16;
607 ip = VTOI(vp);
608
609 object = vp->v_object;
610 if (object) {
611 vm_object_reference(object);
612 }
613
614 #ifdef DIAGNOSTIC
615 if (uio->uio_rw != UIO_WRITE)
616 panic("ffswrite: mode");
617 #endif
618
619 switch (vp->v_type) {
620 case VREG:
621 if (ioflag & IO_APPEND)
622 uio->uio_offset = ip->i_size;
623 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) {
624 if (object) {
625 VM_OBJECT_LOCK(object);
626 vm_object_vndeallocate(object);
627 }
628 return (EPERM);
629 }
630 /* FALLTHROUGH */
631 case VLNK:
632 break;
633 case VDIR:
634 panic("ffswrite: dir write");
635 break;
636 default:
637 panic("ffswrite: type %p %d (%d,%d)", vp, (int)vp->v_type,
638 (int)uio->uio_offset,
639 (int)uio->uio_resid
640 );
641 }
642
643 fs = ip->i_fs;
644 if (uio->uio_offset < 0 ||
645 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) {
646 if (object) {
647 VM_OBJECT_LOCK(object);
648 vm_object_vndeallocate(object);
649 }
650 return (EFBIG);
651 }
652 /*
653 * Maybe this should be above the vnode op call, but so long as
654 * file servers have no limits, I don't think it matters.
655 */
656 td = uio->uio_td;
657 if (vp->v_type == VREG && td &&
658 uio->uio_offset + uio->uio_resid >
659 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
660 PROC_LOCK(td->td_proc);
661 psignal(td->td_proc, SIGXFSZ);
662 PROC_UNLOCK(td->td_proc);
663 if (object) {
664 VM_OBJECT_LOCK(object);
665 vm_object_vndeallocate(object);
666 }
667 return (EFBIG);
668 }
669
670 resid = uio->uio_resid;
671 osize = ip->i_size;
672 if (seqcount > BA_SEQMAX)
673 flags = BA_SEQMAX << BA_SEQSHIFT;
674 else
675 flags = seqcount << BA_SEQSHIFT;
676 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
677 flags |= IO_SYNC;
678
679 for (error = 0; uio->uio_resid > 0;) {
680 lbn = lblkno(fs, uio->uio_offset);
681 blkoffset = blkoff(fs, uio->uio_offset);
682 xfersize = fs->fs_bsize - blkoffset;
683 if (uio->uio_resid < xfersize)
684 xfersize = uio->uio_resid;
685
686 if (uio->uio_offset + xfersize > ip->i_size)
687 vnode_pager_setsize(vp, uio->uio_offset + xfersize);
688
689 /*
690 * We must perform a read-before-write if the transfer size
691 * does not cover the entire buffer.
692 */
693 if (fs->fs_bsize > xfersize)
694 flags |= BA_CLRBUF;
695 else
696 flags &= ~BA_CLRBUF;
697 /* XXX is uio->uio_offset the right thing here? */
698 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
699 ap->a_cred, flags, &bp);
700 if (error != 0)
701 break;
702 /*
703 * If the buffer is not valid we have to clear out any
704 * garbage data from the pages instantiated for the buffer.
705 * If we do not, a failed uiomove() during a write can leave
706 * the prior contents of the pages exposed to a userland
707 * mmap(). XXX deal with uiomove() errors a better way.
708 */
709 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
710 vfs_bio_clrbuf(bp);
711 if (ioflag & IO_DIRECT)
712 bp->b_flags |= B_DIRECT;
713 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
714 bp->b_flags |= B_NOCACHE;
715
716 if (uio->uio_offset + xfersize > ip->i_size) {
717 ip->i_size = uio->uio_offset + xfersize;
718 DIP(ip, i_size) = ip->i_size;
719 extended = 1;
720 }
721
722 size = blksize(fs, ip, lbn) - bp->b_resid;
723 if (size < xfersize)
724 xfersize = size;
725
726 error =
727 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
728 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
729 (LIST_FIRST(&bp->b_dep) == NULL)) {
730 bp->b_flags |= B_RELBUF;
731 }
732
733 /*
734 * If IO_SYNC each buffer is written synchronously. Otherwise
735 * if we have a severe page deficiency write the buffer
736 * asynchronously. Otherwise try to cluster, and if that
737 * doesn't do it then either do an async write (if O_DIRECT),
738 * or a delayed write (if not).
739 */
740 if (ioflag & IO_SYNC) {
741 (void)bwrite(bp);
742 } else if (vm_page_count_severe() ||
743 buf_dirty_count_severe() ||
744 (ioflag & IO_ASYNC)) {
745 bp->b_flags |= B_CLUSTEROK;
746 bawrite(bp);
747 } else if (xfersize + blkoffset == fs->fs_bsize) {
748 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
749 bp->b_flags |= B_CLUSTEROK;
750 cluster_write(bp, ip->i_size, seqcount);
751 } else {
752 bawrite(bp);
753 }
754 } else if (ioflag & IO_DIRECT) {
755 bp->b_flags |= B_CLUSTEROK;
756 bawrite(bp);
757 } else {
758 bp->b_flags |= B_CLUSTEROK;
759 bdwrite(bp);
760 }
761 if (error || xfersize == 0)
762 break;
763 ip->i_flag |= IN_CHANGE | IN_UPDATE;
764 }
765 /*
766 * If we successfully wrote any data, and we are not the superuser
767 * we clear the setuid and setgid bits as a precaution against
768 * tampering.
769 */
770 if (resid > uio->uio_resid && ap->a_cred &&
771 suser_cred(ap->a_cred, PRISON_ROOT)) {
772 ip->i_mode &= ~(ISUID | ISGID);
773 DIP(ip, i_mode) = ip->i_mode;
774 }
775 if (resid > uio->uio_resid)
776 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
777 if (error) {
778 if (ioflag & IO_UNIT) {
779 (void)UFS_TRUNCATE(vp, osize,
780 IO_NORMAL | (ioflag & IO_SYNC),
781 ap->a_cred, uio->uio_td);
782 uio->uio_offset -= resid - uio->uio_resid;
783 uio->uio_resid = resid;
784 }
785 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
786 error = UFS_UPDATE(vp, 1);
787
788 if (object) {
789 VM_OBJECT_LOCK(object);
790 vm_object_vndeallocate(object);
791 }
792
793 return (error);
794 }
795
796 /*
797 * get page routine
798 */
799 static int
800 ffs_getpages(ap)
801 struct vop_getpages_args *ap;
802 {
803 off_t foff, physoffset;
804 int i, size, bsize;
805 struct vnode *dp, *vp;
806 vm_object_t obj;
807 vm_pindex_t pindex;
808 vm_page_t mreq;
809 int bbackwards, bforwards;
810 int pbackwards, pforwards;
811 int firstpage;
812 ufs2_daddr_t reqblkno, reqlblkno;
813 int poff;
814 int pcount;
815 int rtval;
816 int pagesperblock;
817
818 GIANT_REQUIRED;
819
820 pcount = round_page(ap->a_count) / PAGE_SIZE;
821 mreq = ap->a_m[ap->a_reqpage];
822
823 /*
824 * if ANY DEV_BSIZE blocks are valid on a large filesystem block,
825 * then the entire page is valid. Since the page may be mapped,
826 * user programs might reference data beyond the actual end of file
827 * occuring within the page. We have to zero that data.
828 */
829 VM_OBJECT_LOCK(mreq->object);
830 if (mreq->valid) {
831 if (mreq->valid != VM_PAGE_BITS_ALL)
832 vm_page_zero_invalid(mreq, TRUE);
833 vm_page_lock_queues();
834 for (i = 0; i < pcount; i++) {
835 if (i != ap->a_reqpage) {
836 vm_page_free(ap->a_m[i]);
837 }
838 }
839 vm_page_unlock_queues();
840 VM_OBJECT_UNLOCK(mreq->object);
841 return VM_PAGER_OK;
842 }
843 VM_OBJECT_UNLOCK(mreq->object);
844 vp = ap->a_vp;
845 obj = vp->v_object;
846 bsize = vp->v_mount->mnt_stat.f_iosize;
847 pindex = mreq->pindex;
848 foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */;
849
850 if (bsize < PAGE_SIZE)
851 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
852 ap->a_count,
853 ap->a_reqpage);
854
855 /*
856 * foff is the file offset of the required page
857 * reqlblkno is the logical block that contains the page
858 * poff is the index of the page into the logical block
859 */
860 reqlblkno = foff / bsize;
861 poff = (foff % bsize) / PAGE_SIZE;
862
863 dp = VTOI(vp)->i_devvp;
864 if (ufs_bmaparray(vp, reqlblkno, &reqblkno, 0, &bforwards, &bbackwards)
865 || (reqblkno == -1)) {
866 VM_OBJECT_LOCK(obj);
867 vm_page_lock_queues();
868 for(i = 0; i < pcount; i++) {
869 if (i != ap->a_reqpage)
870 vm_page_free(ap->a_m[i]);
871 }
872 vm_page_unlock_queues();
873 if (reqblkno == -1) {
874 if ((mreq->flags & PG_ZERO) == 0)
875 pmap_zero_page(mreq);
876 vm_page_undirty(mreq);
877 mreq->valid = VM_PAGE_BITS_ALL;
878 VM_OBJECT_UNLOCK(obj);
879 return VM_PAGER_OK;
880 } else {
881 VM_OBJECT_UNLOCK(obj);
882 return VM_PAGER_ERROR;
883 }
884 }
885
886 physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE;
887 pagesperblock = bsize / PAGE_SIZE;
888 /*
889 * find the first page that is contiguous...
890 * note that pbackwards is the number of pages that are contiguous
891 * backwards.
892 */
893 firstpage = 0;
894 if (ap->a_count) {
895 pbackwards = poff + bbackwards * pagesperblock;
896 if (ap->a_reqpage > pbackwards) {
897 firstpage = ap->a_reqpage - pbackwards;
898 VM_OBJECT_LOCK(obj);
899 vm_page_lock_queues();
900 for(i=0;i<firstpage;i++)
901 vm_page_free(ap->a_m[i]);
902 vm_page_unlock_queues();
903 VM_OBJECT_UNLOCK(obj);
904 }
905
906 /*
907 * pforwards is the number of pages that are contiguous
908 * after the current page.
909 */
910 pforwards = (pagesperblock - (poff + 1)) +
911 bforwards * pagesperblock;
912 if (pforwards < (pcount - (ap->a_reqpage + 1))) {
913 VM_OBJECT_LOCK(obj);
914 vm_page_lock_queues();
915 for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++)
916 vm_page_free(ap->a_m[i]);
917 vm_page_unlock_queues();
918 VM_OBJECT_UNLOCK(obj);
919 pcount = ap->a_reqpage + pforwards + 1;
920 }
921
922 /*
923 * number of pages for I/O corrected for the non-contig pages at
924 * the beginning of the array.
925 */
926 pcount -= firstpage;
927 }
928
929 /*
930 * calculate the size of the transfer
931 */
932
933 size = pcount * PAGE_SIZE;
934
935 if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) >
936 obj->un_pager.vnp.vnp_size)
937 size = obj->un_pager.vnp.vnp_size -
938 IDX_TO_OFF(ap->a_m[firstpage]->pindex);
939
940 physoffset -= foff;
941 rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size,
942 (ap->a_reqpage - firstpage), physoffset);
943
944 return (rtval);
945 }
946
947 /*
948 * Extended attribute area reading.
949 */
950 static int
951 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
952 {
953 struct inode *ip;
954 struct ufs2_dinode *dp;
955 struct fs *fs;
956 struct buf *bp;
957 ufs_lbn_t lbn, nextlbn;
958 off_t bytesinfile;
959 long size, xfersize, blkoffset;
960 int error, orig_resid;
961
962 GIANT_REQUIRED;
963
964 ip = VTOI(vp);
965 fs = ip->i_fs;
966 dp = ip->i_din2;
967
968 #ifdef DIAGNOSTIC
969 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
970 panic("ffs_extread: mode");
971
972 #endif
973 orig_resid = uio->uio_resid;
974 if (orig_resid <= 0)
975 return (0);
976
977 bytesinfile = dp->di_extsize - uio->uio_offset;
978 if (bytesinfile <= 0) {
979 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
980 ip->i_flag |= IN_ACCESS;
981 return 0;
982 }
983
984 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
985 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
986 break;
987
988 lbn = lblkno(fs, uio->uio_offset);
989 nextlbn = lbn + 1;
990
991 /*
992 * size of buffer. The buffer representing the
993 * end of the file is rounded up to the size of
994 * the block type ( fragment or full block,
995 * depending ).
996 */
997 size = sblksize(fs, dp->di_extsize, lbn);
998 blkoffset = blkoff(fs, uio->uio_offset);
999
1000 /*
1001 * The amount we want to transfer in this iteration is
1002 * one FS block less the amount of the data before
1003 * our startpoint (duh!)
1004 */
1005 xfersize = fs->fs_bsize - blkoffset;
1006
1007 /*
1008 * But if we actually want less than the block,
1009 * or the file doesn't have a whole block more of data,
1010 * then use the lesser number.
1011 */
1012 if (uio->uio_resid < xfersize)
1013 xfersize = uio->uio_resid;
1014 if (bytesinfile < xfersize)
1015 xfersize = bytesinfile;
1016
1017 if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
1018 /*
1019 * Don't do readahead if this is the end of the info.
1020 */
1021 error = bread(vp, -1 - lbn, size, NOCRED, &bp);
1022 } else {
1023 /*
1024 * If we have a second block, then
1025 * fire off a request for a readahead
1026 * as well as a read. Note that the 4th and 5th
1027 * arguments point to arrays of the size specified in
1028 * the 6th argument.
1029 */
1030 int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
1031
1032 nextlbn = -1 - nextlbn;
1033 error = breadn(vp, -1 - lbn,
1034 size, &nextlbn, &nextsize, 1, NOCRED, &bp);
1035 }
1036 if (error) {
1037 brelse(bp);
1038 bp = NULL;
1039 break;
1040 }
1041
1042 /*
1043 * If IO_DIRECT then set B_DIRECT for the buffer. This
1044 * will cause us to attempt to release the buffer later on
1045 * and will cause the buffer cache to attempt to free the
1046 * underlying pages.
1047 */
1048 if (ioflag & IO_DIRECT)
1049 bp->b_flags |= B_DIRECT;
1050
1051 /*
1052 * We should only get non-zero b_resid when an I/O error
1053 * has occurred, which should cause us to break above.
1054 * However, if the short read did not cause an error,
1055 * then we want to ensure that we do not uiomove bad
1056 * or uninitialized data.
1057 */
1058 size -= bp->b_resid;
1059 if (size < xfersize) {
1060 if (size == 0)
1061 break;
1062 xfersize = size;
1063 }
1064
1065 error = uiomove((char *)bp->b_data + blkoffset,
1066 (int)xfersize, uio);
1067 if (error)
1068 break;
1069
1070 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
1071 (LIST_FIRST(&bp->b_dep) == NULL)) {
1072 /*
1073 * If there are no dependencies, and it's VMIO,
1074 * then we don't need the buf, mark it available
1075 * for freeing. The VM has the data.
1076 */
1077 bp->b_flags |= B_RELBUF;
1078 brelse(bp);
1079 } else {
1080 /*
1081 * Otherwise let whoever
1082 * made the request take care of
1083 * freeing it. We just queue
1084 * it onto another list.
1085 */
1086 bqrelse(bp);
1087 }
1088 }
1089
1090 /*
1091 * This can only happen in the case of an error
1092 * because the loop above resets bp to NULL on each iteration
1093 * and on normal completion has not set a new value into it.
1094 * so it must have come from a 'break' statement
1095 */
1096 if (bp != NULL) {
1097 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
1098 (LIST_FIRST(&bp->b_dep) == NULL)) {
1099 bp->b_flags |= B_RELBUF;
1100 brelse(bp);
1101 } else {
1102 bqrelse(bp);
1103 }
1104 }
1105
1106 if ((error == 0 || uio->uio_resid != orig_resid) &&
1107 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1108 ip->i_flag |= IN_ACCESS;
1109 return (error);
1110 }
1111
1112 /*
1113 * Extended attribute area writing.
1114 */
1115 static int
1116 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
1117 {
1118 struct inode *ip;
1119 struct ufs2_dinode *dp;
1120 struct fs *fs;
1121 struct buf *bp;
1122 ufs_lbn_t lbn;
1123 off_t osize;
1124 int blkoffset, error, flags, resid, size, xfersize;
1125
1126 GIANT_REQUIRED;
1127
1128 ip = VTOI(vp);
1129 fs = ip->i_fs;
1130 dp = ip->i_din2;
1131
1132 #ifdef DIAGNOSTIC
1133 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
1134 panic("ext_write: mode");
1135 #endif
1136
1137 if (ioflag & IO_APPEND)
1138 uio->uio_offset = dp->di_extsize;
1139
1140 if (uio->uio_offset < 0 ||
1141 (u_int64_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize)
1142 return (EFBIG);
1143
1144 resid = uio->uio_resid;
1145 osize = dp->di_extsize;
1146 flags = IO_EXT;
1147 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
1148 flags |= IO_SYNC;
1149
1150 for (error = 0; uio->uio_resid > 0;) {
1151 lbn = lblkno(fs, uio->uio_offset);
1152 blkoffset = blkoff(fs, uio->uio_offset);
1153 xfersize = fs->fs_bsize - blkoffset;
1154 if (uio->uio_resid < xfersize)
1155 xfersize = uio->uio_resid;
1156
1157 /*
1158 * We must perform a read-before-write if the transfer size
1159 * does not cover the entire buffer.
1160 */
1161 if (fs->fs_bsize > xfersize)
1162 flags |= BA_CLRBUF;
1163 else
1164 flags &= ~BA_CLRBUF;
1165 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
1166 ucred, flags, &bp);
1167 if (error != 0)
1168 break;
1169 /*
1170 * If the buffer is not valid we have to clear out any
1171 * garbage data from the pages instantiated for the buffer.
1172 * If we do not, a failed uiomove() during a write can leave
1173 * the prior contents of the pages exposed to a userland
1174 * mmap(). XXX deal with uiomove() errors a better way.
1175 */
1176 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
1177 vfs_bio_clrbuf(bp);
1178 if (ioflag & IO_DIRECT)
1179 bp->b_flags |= B_DIRECT;
1180
1181 if (uio->uio_offset + xfersize > dp->di_extsize)
1182 dp->di_extsize = uio->uio_offset + xfersize;
1183
1184 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
1185 if (size < xfersize)
1186 xfersize = size;
1187
1188 error =
1189 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
1190 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
1191 (LIST_FIRST(&bp->b_dep) == NULL)) {
1192 bp->b_flags |= B_RELBUF;
1193 }
1194
1195 /*
1196 * If IO_SYNC each buffer is written synchronously. Otherwise
1197 * if we have a severe page deficiency write the buffer
1198 * asynchronously. Otherwise try to cluster, and if that
1199 * doesn't do it then either do an async write (if O_DIRECT),
1200 * or a delayed write (if not).
1201 */
1202 if (ioflag & IO_SYNC) {
1203 (void)bwrite(bp);
1204 } else if (vm_page_count_severe() ||
1205 buf_dirty_count_severe() ||
1206 xfersize + blkoffset == fs->fs_bsize ||
1207 (ioflag & (IO_ASYNC | IO_DIRECT)))
1208 bawrite(bp);
1209 else
1210 bdwrite(bp);
1211 if (error || xfersize == 0)
1212 break;
1213 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1214 }
1215 /*
1216 * If we successfully wrote any data, and we are not the superuser
1217 * we clear the setuid and setgid bits as a precaution against
1218 * tampering.
1219 */
1220 if (resid > uio->uio_resid && ucred &&
1221 suser_cred(ucred, PRISON_ROOT)) {
1222 ip->i_mode &= ~(ISUID | ISGID);
1223 dp->di_mode = ip->i_mode;
1224 }
1225 if (error) {
1226 if (ioflag & IO_UNIT) {
1227 (void)UFS_TRUNCATE(vp, osize,
1228 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td);
1229 uio->uio_offset -= resid - uio->uio_resid;
1230 uio->uio_resid = resid;
1231 }
1232 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
1233 error = UFS_UPDATE(vp, 1);
1234 return (error);
1235 }
1236
1237
1238 /*
1239 * Vnode operating to retrieve a named extended attribute.
1240 *
1241 * Locate a particular EA (nspace:name) in the area (ptr:length), and return
1242 * the length of the EA, and possibly the pointer to the entry and to the data.
1243 */
1244 static int
1245 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac)
1246 {
1247 u_char *p, *pe, *pn, *p0;
1248 int eapad1, eapad2, ealength, ealen, nlen;
1249 uint32_t ul;
1250
1251 pe = ptr + length;
1252 nlen = strlen(name);
1253
1254 for (p = ptr; p < pe; p = pn) {
1255 p0 = p;
1256 bcopy(p, &ul, sizeof(ul));
1257 pn = p + ul;
1258 /* make sure this entry is complete */
1259 if (pn > pe)
1260 break;
1261 p += sizeof(uint32_t);
1262 if (*p != nspace)
1263 continue;
1264 p++;
1265 eapad2 = *p++;
1266 if (*p != nlen)
1267 continue;
1268 p++;
1269 if (bcmp(p, name, nlen))
1270 continue;
1271 ealength = sizeof(uint32_t) + 3 + nlen;
1272 eapad1 = 8 - (ealength % 8);
1273 if (eapad1 == 8)
1274 eapad1 = 0;
1275 ealength += eapad1;
1276 ealen = ul - ealength - eapad2;
1277 p += nlen + eapad1;
1278 if (eap != NULL)
1279 *eap = p0;
1280 if (eac != NULL)
1281 *eac = p;
1282 return (ealen);
1283 }
1284 return(-1);
1285 }
1286
1287 static int
1288 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra)
1289 {
1290 struct inode *ip;
1291 struct ufs2_dinode *dp;
1292 struct uio luio;
1293 struct iovec liovec;
1294 int easize, error;
1295 u_char *eae;
1296
1297 ip = VTOI(vp);
1298 dp = ip->i_din2;
1299 easize = dp->di_extsize;
1300
1301 eae = malloc(easize + extra, M_TEMP, M_WAITOK);
1302
1303 liovec.iov_base = eae;
1304 liovec.iov_len = easize;
1305 luio.uio_iov = &liovec;
1306 luio.uio_iovcnt = 1;
1307 luio.uio_offset = 0;
1308 luio.uio_resid = easize;
1309 luio.uio_segflg = UIO_SYSSPACE;
1310 luio.uio_rw = UIO_READ;
1311 luio.uio_td = td;
1312
1313 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
1314 if (error) {
1315 free(eae, M_TEMP);
1316 return(error);
1317 }
1318 *p = eae;
1319 return (0);
1320 }
1321
1322 static int
1323 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
1324 {
1325 struct inode *ip;
1326 struct ufs2_dinode *dp;
1327 int error;
1328
1329 ip = VTOI(vp);
1330
1331 if (ip->i_ea_area != NULL)
1332 return (EBUSY);
1333 dp = ip->i_din2;
1334 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0);
1335 if (error)
1336 return (error);
1337 ip->i_ea_len = dp->di_extsize;
1338 ip->i_ea_error = 0;
1339 return (0);
1340 }
1341
1342 /*
1343 * Vnode extattr transaction commit/abort
1344 */
1345 static int
1346 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
1347 {
1348 struct inode *ip;
1349 struct uio luio;
1350 struct iovec liovec;
1351 int error;
1352 struct ufs2_dinode *dp;
1353
1354 ip = VTOI(vp);
1355 if (ip->i_ea_area == NULL)
1356 return (EINVAL);
1357 dp = ip->i_din2;
1358 error = ip->i_ea_error;
1359 if (commit && error == 0) {
1360 if (cred == NOCRED)
1361 cred = vp->v_mount->mnt_cred;
1362 liovec.iov_base = ip->i_ea_area;
1363 liovec.iov_len = ip->i_ea_len;
1364 luio.uio_iov = &liovec;
1365 luio.uio_iovcnt = 1;
1366 luio.uio_offset = 0;
1367 luio.uio_resid = ip->i_ea_len;
1368 luio.uio_segflg = UIO_SYSSPACE;
1369 luio.uio_rw = UIO_WRITE;
1370 luio.uio_td = td;
1371 /* XXX: I'm not happy about truncating to zero size */
1372 if (ip->i_ea_len < dp->di_extsize)
1373 error = ffs_truncate(vp, 0, IO_EXT, cred, td);
1374 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
1375 }
1376 free(ip->i_ea_area, M_TEMP);
1377 ip->i_ea_area = NULL;
1378 ip->i_ea_len = 0;
1379 ip->i_ea_error = 0;
1380 return (error);
1381 }
1382
1383 /*
1384 * Vnode extattr strategy routine for special devices and fifos.
1385 *
1386 * We need to check for a read or write of the external attributes.
1387 * Otherwise we just fall through and do the usual thing.
1388 */
1389 static int
1390 ffsext_strategy(struct vop_strategy_args *ap)
1391 /*
1392 struct vop_strategy_args {
1393 struct vnodeop_desc *a_desc;
1394 struct vnode *a_vp;
1395 struct buf *a_bp;
1396 };
1397 */
1398 {
1399 struct vnode *vp;
1400 daddr_t lbn;
1401
1402 KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)",
1403 __func__, ap->a_vp, ap->a_bp->b_vp));
1404 vp = ap->a_vp;
1405 lbn = ap->a_bp->b_lblkno;
1406 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC &&
1407 lbn < 0 && lbn >= -NXADDR)
1408 return (ufs_vnoperate((struct vop_generic_args *)ap));
1409 if (vp->v_type == VFIFO)
1410 return (ufs_vnoperatefifo((struct vop_generic_args *)ap));
1411 return (ufs_vnoperatespec((struct vop_generic_args *)ap));
1412 }
1413
1414 /*
1415 * Vnode extattr transaction commit/abort
1416 */
1417 static int
1418 ffs_openextattr(struct vop_openextattr_args *ap)
1419 /*
1420 struct vop_openextattr_args {
1421 struct vnodeop_desc *a_desc;
1422 struct vnode *a_vp;
1423 IN struct ucred *a_cred;
1424 IN struct thread *a_td;
1425 };
1426 */
1427 {
1428 struct inode *ip;
1429 struct fs *fs;
1430
1431 ip = VTOI(ap->a_vp);
1432 fs = ip->i_fs;
1433 if (fs->fs_magic == FS_UFS1_MAGIC)
1434 return (ufs_vnoperate((struct vop_generic_args *)ap));
1435
1436 if (ap->a_vp->v_type == VCHR)
1437 return (EOPNOTSUPP);
1438
1439 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
1440 }
1441
1442
1443 /*
1444 * Vnode extattr transaction commit/abort
1445 */
1446 static int
1447 ffs_closeextattr(struct vop_closeextattr_args *ap)
1448 /*
1449 struct vop_closeextattr_args {
1450 struct vnodeop_desc *a_desc;
1451 struct vnode *a_vp;
1452 int a_commit;
1453 IN struct ucred *a_cred;
1454 IN struct thread *a_td;
1455 };
1456 */
1457 {
1458 struct inode *ip;
1459 struct fs *fs;
1460
1461 ip = VTOI(ap->a_vp);
1462 fs = ip->i_fs;
1463 if (fs->fs_magic == FS_UFS1_MAGIC)
1464 return (ufs_vnoperate((struct vop_generic_args *)ap));
1465
1466 if (ap->a_vp->v_type == VCHR)
1467 return (EOPNOTSUPP);
1468
1469 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td));
1470 }
1471
1472 /*
1473 * Vnode operation to remove a named attribute.
1474 */
1475 static int
1476 ffs_deleteextattr(struct vop_deleteextattr_args *ap)
1477 /*
1478 vop_deleteextattr {
1479 IN struct vnode *a_vp;
1480 IN int a_attrnamespace;
1481 IN const char *a_name;
1482 IN struct ucred *a_cred;
1483 IN struct thread *a_td;
1484 };
1485 */
1486 {
1487 struct inode *ip;
1488 struct fs *fs;
1489 uint32_t ealength, ul;
1490 int ealen, olen, eapad1, eapad2, error, i, easize;
1491 u_char *eae, *p;
1492 int stand_alone;
1493
1494 ip = VTOI(ap->a_vp);
1495 fs = ip->i_fs;
1496
1497 if (fs->fs_magic == FS_UFS1_MAGIC)
1498 return (ufs_vnoperate((struct vop_generic_args *)ap));
1499
1500 if (ap->a_vp->v_type == VCHR)
1501 return (EOPNOTSUPP);
1502
1503 if (strlen(ap->a_name) == 0)
1504 return (EINVAL);
1505
1506 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1507 ap->a_cred, ap->a_td, IWRITE);
1508 if (error) {
1509 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1510 ip->i_ea_error = error;
1511 return (error);
1512 }
1513
1514 if (ip->i_ea_area == NULL) {
1515 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1516 if (error)
1517 return (error);
1518 stand_alone = 1;
1519 } else {
1520 stand_alone = 0;
1521 }
1522
1523 ealength = eapad1 = ealen = eapad2 = 0;
1524
1525 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
1526 bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1527 easize = ip->i_ea_len;
1528
1529 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1530 &p, NULL);
1531 if (olen == -1) {
1532 /* delete but nonexistent */
1533 free(eae, M_TEMP);
1534 if (stand_alone)
1535 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1536 return(ENOATTR);
1537 }
1538 bcopy(p, &ul, sizeof ul);
1539 i = p - eae + ul;
1540 if (ul != ealength) {
1541 bcopy(p + ul, p + ealength, easize - i);
1542 easize += (ealength - ul);
1543 }
1544 if (easize > NXADDR * fs->fs_bsize) {
1545 free(eae, M_TEMP);
1546 if (stand_alone)
1547 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1548 else if (ip->i_ea_error == 0)
1549 ip->i_ea_error = ENOSPC;
1550 return(ENOSPC);
1551 }
1552 p = ip->i_ea_area;
1553 ip->i_ea_area = eae;
1554 ip->i_ea_len = easize;
1555 free(p, M_TEMP);
1556 if (stand_alone)
1557 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1558 return(error);
1559 }
1560
1561 /*
1562 * Vnode operation to retrieve a named extended attribute.
1563 */
1564 static int
1565 ffs_getextattr(struct vop_getextattr_args *ap)
1566 /*
1567 vop_getextattr {
1568 IN struct vnode *a_vp;
1569 IN int a_attrnamespace;
1570 IN const char *a_name;
1571 INOUT struct uio *a_uio;
1572 OUT size_t *a_size;
1573 IN struct ucred *a_cred;
1574 IN struct thread *a_td;
1575 };
1576 */
1577 {
1578 struct inode *ip;
1579 struct fs *fs;
1580 u_char *eae, *p;
1581 unsigned easize;
1582 int error, ealen, stand_alone;
1583
1584 ip = VTOI(ap->a_vp);
1585 fs = ip->i_fs;
1586
1587 if (fs->fs_magic == FS_UFS1_MAGIC)
1588 return (ufs_vnoperate((struct vop_generic_args *)ap));
1589
1590 if (ap->a_vp->v_type == VCHR)
1591 return (EOPNOTSUPP);
1592
1593 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1594 ap->a_cred, ap->a_td, IREAD);
1595 if (error)
1596 return (error);
1597
1598 if (ip->i_ea_area == NULL) {
1599 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1600 if (error)
1601 return (error);
1602 stand_alone = 1;
1603 } else {
1604 stand_alone = 0;
1605 }
1606 eae = ip->i_ea_area;
1607 easize = ip->i_ea_len;
1608
1609 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1610 NULL, &p);
1611 if (ealen >= 0) {
1612 error = 0;
1613 if (ap->a_size != NULL)
1614 *ap->a_size = ealen;
1615 else if (ap->a_uio != NULL)
1616 error = uiomove(p, ealen, ap->a_uio);
1617 } else
1618 error = ENOATTR;
1619 if (stand_alone)
1620 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1621 return(error);
1622 }
1623
1624 /*
1625 * Vnode operation to retrieve extended attributes on a vnode.
1626 */
1627 static int
1628 ffs_listextattr(struct vop_listextattr_args *ap)
1629 /*
1630 vop_listextattr {
1631 IN struct vnode *a_vp;
1632 IN int a_attrnamespace;
1633 INOUT struct uio *a_uio;
1634 OUT size_t *a_size;
1635 IN struct ucred *a_cred;
1636 IN struct thread *a_td;
1637 };
1638 */
1639 {
1640 struct inode *ip;
1641 struct fs *fs;
1642 u_char *eae, *p, *pe, *pn;
1643 unsigned easize;
1644 uint32_t ul;
1645 int error, ealen, stand_alone;
1646
1647 ip = VTOI(ap->a_vp);
1648 fs = ip->i_fs;
1649
1650 if (fs->fs_magic == FS_UFS1_MAGIC)
1651 return (ufs_vnoperate((struct vop_generic_args *)ap));
1652
1653 if (ap->a_vp->v_type == VCHR)
1654 return (EOPNOTSUPP);
1655
1656 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1657 ap->a_cred, ap->a_td, IREAD);
1658 if (error)
1659 return (error);
1660
1661 if (ip->i_ea_area == NULL) {
1662 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1663 if (error)
1664 return (error);
1665 stand_alone = 1;
1666 } else {
1667 stand_alone = 0;
1668 }
1669 eae = ip->i_ea_area;
1670 easize = ip->i_ea_len;
1671
1672 error = 0;
1673 if (ap->a_size != NULL)
1674 *ap->a_size = 0;
1675 pe = eae + easize;
1676 for(p = eae; error == 0 && p < pe; p = pn) {
1677 bcopy(p, &ul, sizeof(ul));
1678 pn = p + ul;
1679 if (pn > pe)
1680 break;
1681 p += sizeof(ul);
1682 if (*p++ != ap->a_attrnamespace)
1683 continue;
1684 p++; /* pad2 */
1685 ealen = *p;
1686 if (ap->a_size != NULL) {
1687 *ap->a_size += ealen + 1;
1688 } else if (ap->a_uio != NULL) {
1689 error = uiomove(p, ealen + 1, ap->a_uio);
1690 }
1691 }
1692 if (stand_alone)
1693 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1694 return(error);
1695 }
1696
1697 /*
1698 * Vnode operation to set a named attribute.
1699 */
1700 static int
1701 ffs_setextattr(struct vop_setextattr_args *ap)
1702 /*
1703 vop_setextattr {
1704 IN struct vnode *a_vp;
1705 IN int a_attrnamespace;
1706 IN const char *a_name;
1707 INOUT struct uio *a_uio;
1708 IN struct ucred *a_cred;
1709 IN struct thread *a_td;
1710 };
1711 */
1712 {
1713 struct inode *ip;
1714 struct fs *fs;
1715 uint32_t ealength, ul;
1716 int ealen, olen, eapad1, eapad2, error, i, easize;
1717 u_char *eae, *p;
1718 int stand_alone;
1719
1720 ip = VTOI(ap->a_vp);
1721 fs = ip->i_fs;
1722
1723 if (fs->fs_magic == FS_UFS1_MAGIC)
1724 return (ufs_vnoperate((struct vop_generic_args *)ap));
1725
1726 if (ap->a_vp->v_type == VCHR)
1727 return (EOPNOTSUPP);
1728
1729 if (strlen(ap->a_name) == 0)
1730 return (EINVAL);
1731
1732 /* XXX Now unsupported API to delete EAs using NULL uio. */
1733 if (ap->a_uio == NULL)
1734 return (EOPNOTSUPP);
1735
1736 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
1737 ap->a_cred, ap->a_td, IWRITE);
1738 if (error) {
1739 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
1740 ip->i_ea_error = error;
1741 return (error);
1742 }
1743
1744 if (ip->i_ea_area == NULL) {
1745 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
1746 if (error)
1747 return (error);
1748 stand_alone = 1;
1749 } else {
1750 stand_alone = 0;
1751 }
1752
1753 ealen = ap->a_uio->uio_resid;
1754 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
1755 eapad1 = 8 - (ealength % 8);
1756 if (eapad1 == 8)
1757 eapad1 = 0;
1758 eapad2 = 8 - (ealen % 8);
1759 if (eapad2 == 8)
1760 eapad2 = 0;
1761 ealength += eapad1 + ealen + eapad2;
1762
1763 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
1764 bcopy(ip->i_ea_area, eae, ip->i_ea_len);
1765 easize = ip->i_ea_len;
1766
1767 olen = ffs_findextattr(eae, easize,
1768 ap->a_attrnamespace, ap->a_name, &p, NULL);
1769 if (olen == -1) {
1770 /* new, append at end */
1771 p = eae + easize;
1772 easize += ealength;
1773 } else {
1774 bcopy(p, &ul, sizeof ul);
1775 i = p - eae + ul;
1776 if (ul != ealength) {
1777 bcopy(p + ul, p + ealength, easize - i);
1778 easize += (ealength - ul);
1779 }
1780 }
1781 if (easize > NXADDR * fs->fs_bsize) {
1782 free(eae, M_TEMP);
1783 if (stand_alone)
1784 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1785 else if (ip->i_ea_error == 0)
1786 ip->i_ea_error = ENOSPC;
1787 return(ENOSPC);
1788 }
1789 bcopy(&ealength, p, sizeof(ealength));
1790 p += sizeof(ealength);
1791 *p++ = ap->a_attrnamespace;
1792 *p++ = eapad2;
1793 *p++ = strlen(ap->a_name);
1794 strcpy(p, ap->a_name);
1795 p += strlen(ap->a_name);
1796 bzero(p, eapad1);
1797 p += eapad1;
1798 error = uiomove(p, ealen, ap->a_uio);
1799 if (error) {
1800 free(eae, M_TEMP);
1801 if (stand_alone)
1802 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
1803 else if (ip->i_ea_error == 0)
1804 ip->i_ea_error = error;
1805 return(error);
1806 }
1807 p += ealen;
1808 bzero(p, eapad2);
1809
1810 p = ip->i_ea_area;
1811 ip->i_ea_area = eae;
1812 ip->i_ea_len = easize;
1813 free(p, M_TEMP);
1814 if (stand_alone)
1815 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
1816 return(error);
1817 }
Cache object: 24b77d6828f7faa202520f8f43437eb8
|