ffs_vnops.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
    3  * All rights reserved.
    4  *
    5  * This software was developed for the FreeBSD Project by Marshall
    6  * Kirk McKusick and Network Associates Laboratories, the Security
    7  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
    8  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
    9  * research program
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  * Copyright (c) 1982, 1986, 1989, 1993
   33  *      The Regents of the University of California.  All rights reserved.
   34  *
   35  * Redistribution and use in source and binary forms, with or without
   36  * modification, are permitted provided that the following conditions
   37  * are met:
   38  * 1. Redistributions of source code must retain the above copyright
   39  *    notice, this list of conditions and the following disclaimer.
   40  * 2. Redistributions in binary form must reproduce the above copyright
   41  *    notice, this list of conditions and the following disclaimer in the
   42  *    documentation and/or other materials provided with the distribution.
   43  * 4. Neither the name of the University nor the names of its contributors
   44  *    may be used to endorse or promote products derived from this software
   45  *    without specific prior written permission.
   46  *
   47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   57  * SUCH DAMAGE.
   58  *
   59  *      from: @(#)ufs_readwrite.c       8.11 (Berkeley) 5/8/95
   60  * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
   61  *      @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95
   62  */
   63 
   64 #include <sys/cdefs.h>
   65 __FBSDID("$FreeBSD: releng/11.1/sys/ufs/ffs/ffs_vnops.c 308554 2016-11-11 20:18:08Z kib $");
   66 
   67 #include <sys/param.h>
   68 #include <sys/bio.h>
   69 #include <sys/systm.h>
   70 #include <sys/buf.h>
   71 #include <sys/conf.h>
   72 #include <sys/extattr.h>
   73 #include <sys/kernel.h>
   74 #include <sys/limits.h>
   75 #include <sys/malloc.h>
   76 #include <sys/mount.h>
   77 #include <sys/priv.h>
   78 #include <sys/rwlock.h>
   79 #include <sys/stat.h>
   80 #include <sys/sysctl.h>
   81 #include <sys/vmmeter.h>
   82 #include <sys/vnode.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_param.h>
   86 #include <vm/vm_extern.h>
   87 #include <vm/vm_object.h>
   88 #include <vm/vm_page.h>
   89 #include <vm/vm_pager.h>
   90 #include <vm/vnode_pager.h>
   91 
   92 #include <ufs/ufs/extattr.h>
   93 #include <ufs/ufs/quota.h>
   94 #include <ufs/ufs/inode.h>
   95 #include <ufs/ufs/ufs_extern.h>
   96 #include <ufs/ufs/ufsmount.h>
   97 
   98 #include <ufs/ffs/fs.h>
   99 #include <ufs/ffs/ffs_extern.h>
  100 #include "opt_directio.h"
  101 #include "opt_ffs.h"
  102 
  103 #ifdef DIRECTIO
  104 extern int      ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
  105 #endif
  106 static vop_fdatasync_t  ffs_fdatasync;
  107 static vop_fsync_t      ffs_fsync;
  108 static vop_getpages_t   ffs_getpages;
  109 static vop_lock1_t      ffs_lock;
  110 static vop_read_t       ffs_read;
  111 static vop_write_t      ffs_write;
  112 static int      ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
  113 static int      ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
  114                     struct ucred *cred);
  115 static vop_strategy_t   ffsext_strategy;
  116 static vop_closeextattr_t       ffs_closeextattr;
  117 static vop_deleteextattr_t      ffs_deleteextattr;
  118 static vop_getextattr_t ffs_getextattr;
  119 static vop_listextattr_t        ffs_listextattr;
  120 static vop_openextattr_t        ffs_openextattr;
  121 static vop_setextattr_t ffs_setextattr;
  122 static vop_vptofh_t     ffs_vptofh;
  123 
  124 /* Global vfs data structures for ufs. */
  125 struct vop_vector ffs_vnodeops1 = {
  126         .vop_default =          &ufs_vnodeops,
  127         .vop_fsync =            ffs_fsync,
  128         .vop_fdatasync =        ffs_fdatasync,
  129         .vop_getpages =         ffs_getpages,
  130         .vop_getpages_async =   vnode_pager_local_getpages_async,
  131         .vop_lock1 =            ffs_lock,
  132         .vop_read =             ffs_read,
  133         .vop_reallocblks =      ffs_reallocblks,
  134         .vop_write =            ffs_write,
  135         .vop_vptofh =           ffs_vptofh,
  136 };
  137 
  138 struct vop_vector ffs_fifoops1 = {
  139         .vop_default =          &ufs_fifoops,
  140         .vop_fsync =            ffs_fsync,
  141         .vop_fdatasync =        ffs_fdatasync,
  142         .vop_reallocblks =      ffs_reallocblks, /* XXX: really ??? */
  143         .vop_vptofh =           ffs_vptofh,
  144 };
  145 
  146 /* Global vfs data structures for ufs. */
  147 struct vop_vector ffs_vnodeops2 = {
  148         .vop_default =          &ufs_vnodeops,
  149         .vop_fsync =            ffs_fsync,
  150         .vop_fdatasync =        ffs_fdatasync,
  151         .vop_getpages =         ffs_getpages,
  152         .vop_getpages_async =   vnode_pager_local_getpages_async,
  153         .vop_lock1 =            ffs_lock,
  154         .vop_read =             ffs_read,
  155         .vop_reallocblks =      ffs_reallocblks,
  156         .vop_write =            ffs_write,
  157         .vop_closeextattr =     ffs_closeextattr,
  158         .vop_deleteextattr =    ffs_deleteextattr,
  159         .vop_getextattr =       ffs_getextattr,
  160         .vop_listextattr =      ffs_listextattr,
  161         .vop_openextattr =      ffs_openextattr,
  162         .vop_setextattr =       ffs_setextattr,
  163         .vop_vptofh =           ffs_vptofh,
  164 };
  165 
  166 struct vop_vector ffs_fifoops2 = {
  167         .vop_default =          &ufs_fifoops,
  168         .vop_fsync =            ffs_fsync,
  169         .vop_fdatasync =        ffs_fdatasync,
  170         .vop_lock1 =            ffs_lock,
  171         .vop_reallocblks =      ffs_reallocblks,
  172         .vop_strategy =         ffsext_strategy,
  173         .vop_closeextattr =     ffs_closeextattr,
  174         .vop_deleteextattr =    ffs_deleteextattr,
  175         .vop_getextattr =       ffs_getextattr,
  176         .vop_listextattr =      ffs_listextattr,
  177         .vop_openextattr =      ffs_openextattr,
  178         .vop_setextattr =       ffs_setextattr,
  179         .vop_vptofh =           ffs_vptofh,
  180 };
  181 
  182 /*
  183  * Synch an open file.
  184  */
  185 /* ARGSUSED */
  186 static int
  187 ffs_fsync(struct vop_fsync_args *ap)
  188 {
  189         struct vnode *vp;
  190         struct bufobj *bo;
  191         int error;
  192 
  193         vp = ap->a_vp;
  194         bo = &vp->v_bufobj;
  195 retry:
  196         error = ffs_syncvnode(vp, ap->a_waitfor, 0);
  197         if (error)
  198                 return (error);
  199         if (ap->a_waitfor == MNT_WAIT && DOINGSOFTDEP(vp)) {
  200                 error = softdep_fsync(vp);
  201                 if (error)
  202                         return (error);
  203 
  204                 /*
  205                  * The softdep_fsync() function may drop vp lock,
  206                  * allowing for dirty buffers to reappear on the
  207                  * bo_dirty list. Recheck and resync as needed.
  208                  */
  209                 BO_LOCK(bo);
  210                 if ((vp->v_type == VREG || vp->v_type == VDIR) &&
  211                     (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
  212                         BO_UNLOCK(bo);
  213                         goto retry;
  214                 }
  215                 BO_UNLOCK(bo);
  216         }
  217         return (0);
  218 }
  219 
  220 int
  221 ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
  222 {
  223         struct inode *ip;
  224         struct bufobj *bo;
  225         struct buf *bp, *nbp;
  226         ufs_lbn_t lbn;
  227         int error, passes;
  228         bool still_dirty, wait;
  229 
  230         ip = VTOI(vp);
  231         ip->i_flag &= ~IN_NEEDSYNC;
  232         bo = &vp->v_bufobj;
  233 
  234         /*
  235          * When doing MNT_WAIT we must first flush all dependencies
  236          * on the inode.
  237          */
  238         if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
  239             (error = softdep_sync_metadata(vp)) != 0)
  240                 return (error);
  241 
  242         /*
  243          * Flush all dirty buffers associated with a vnode.
  244          */
  245         error = 0;
  246         passes = 0;
  247         wait = false;   /* Always do an async pass first. */
  248         lbn = lblkno(ITOFS(ip), (ip->i_size + ITOFS(ip)->fs_bsize - 1));
  249         BO_LOCK(bo);
  250 loop:
  251         TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
  252                 bp->b_vflags &= ~BV_SCANNED;
  253         TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
  254                 /*
  255                  * Reasons to skip this buffer: it has already been considered
  256                  * on this pass, the buffer has dependencies that will cause
  257                  * it to be redirtied and it has not already been deferred,
  258                  * or it is already being written.
  259                  */
  260                 if ((bp->b_vflags & BV_SCANNED) != 0)
  261                         continue;
  262                 bp->b_vflags |= BV_SCANNED;
  263                 /*
  264                  * Flush indirects in order, if requested.
  265                  *
  266                  * Note that if only datasync is requested, we can
  267                  * skip indirect blocks when softupdates are not
  268                  * active.  Otherwise we must flush them with data,
  269                  * since dependencies prevent data block writes.
  270                  */
  271                 if (waitfor == MNT_WAIT && bp->b_lblkno <= -NDADDR &&
  272                     (lbn_level(bp->b_lblkno) >= passes ||
  273                     ((flags & DATA_ONLY) != 0 && !DOINGSOFTDEP(vp))))
  274                         continue;
  275                 if (bp->b_lblkno > lbn)
  276                         panic("ffs_syncvnode: syncing truncated data.");
  277                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
  278                         BO_UNLOCK(bo);
  279                 } else if (wait) {
  280                         if (BUF_LOCK(bp,
  281                             LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
  282                             BO_LOCKPTR(bo)) != 0) {
  283                                 bp->b_vflags &= ~BV_SCANNED;
  284                                 goto next;
  285                         }
  286                 } else
  287                         continue;
  288                 if ((bp->b_flags & B_DELWRI) == 0)
  289                         panic("ffs_fsync: not dirty");
  290                 /*
  291                  * Check for dependencies and potentially complete them.
  292                  */
  293                 if (!LIST_EMPTY(&bp->b_dep) &&
  294                     (error = softdep_sync_buf(vp, bp,
  295                     wait ? MNT_WAIT : MNT_NOWAIT)) != 0) {
  296                         /* I/O error. */
  297                         if (error != EBUSY) {
  298                                 BUF_UNLOCK(bp);
  299                                 return (error);
  300                         }
  301                         /* If we deferred once, don't defer again. */
  302                         if ((bp->b_flags & B_DEFERRED) == 0) {
  303                                 bp->b_flags |= B_DEFERRED;
  304                                 BUF_UNLOCK(bp);
  305                                 goto next;
  306                         }
  307                 }
  308                 if (wait) {
  309                         bremfree(bp);
  310                         if ((error = bwrite(bp)) != 0)
  311                                 return (error);
  312                 } else if ((bp->b_flags & B_CLUSTEROK)) {
  313                         (void) vfs_bio_awrite(bp);
  314                 } else {
  315                         bremfree(bp);
  316                         (void) bawrite(bp);
  317                 }
  318 next:
  319                 /*
  320                  * Since we may have slept during the I/O, we need
  321                  * to start from a known point.
  322                  */
  323                 BO_LOCK(bo);
  324                 nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd);
  325         }
  326         if (waitfor != MNT_WAIT) {
  327                 BO_UNLOCK(bo);
  328                 if ((flags & NO_INO_UPDT) != 0)
  329                         return (0);
  330                 else
  331                         return (ffs_update(vp, 0));
  332         }
  333         /* Drain IO to see if we're done. */
  334         bufobj_wwait(bo, 0, 0);
  335         /*
  336          * Block devices associated with filesystems may have new I/O
  337          * requests posted for them even if the vnode is locked, so no
  338          * amount of trying will get them clean.  We make several passes
  339          * as a best effort.
  340          *
  341          * Regular files may need multiple passes to flush all dependency
  342          * work as it is possible that we must write once per indirect
  343          * level, once for the leaf, and once for the inode and each of
  344          * these will be done with one sync and one async pass.
  345          */
  346         if (bo->bo_dirty.bv_cnt > 0) {
  347                 if ((flags & DATA_ONLY) == 0) {
  348                         still_dirty = true;
  349                 } else {
  350                         /*
  351                          * For data-only sync, dirty indirect buffers
  352                          * are ignored.
  353                          */
  354                         still_dirty = false;
  355                         TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
  356                                 if (bp->b_lblkno > -NDADDR) {
  357                                         still_dirty = true;
  358                                         break;
  359                                 }
  360                         }
  361                 }
  362 
  363                 if (still_dirty) {
  364                         /* Write the inode after sync passes to flush deps. */
  365                         if (wait && DOINGSOFTDEP(vp) &&
  366                             (flags & NO_INO_UPDT) == 0) {
  367                                 BO_UNLOCK(bo);
  368                                 ffs_update(vp, 1);
  369                                 BO_LOCK(bo);
  370                         }
  371                         /* switch between sync/async. */
  372                         wait = !wait;
  373                         if (wait || ++passes < NIADDR + 2)
  374                                 goto loop;
  375 #ifdef INVARIANTS
  376                         if (!vn_isdisk(vp, NULL))
  377                                 vn_printf(vp, "ffs_fsync: dirty ");
  378 #endif
  379                 }
  380         }
  381         BO_UNLOCK(bo);
  382         error = 0;
  383         if ((flags & DATA_ONLY) == 0) {
  384                 if ((flags & NO_INO_UPDT) == 0)
  385                         error = ffs_update(vp, 1);
  386                 if (DOINGSUJ(vp))
  387                         softdep_journal_fsync(VTOI(vp));
  388         }
  389         return (error);
  390 }
  391 
  392 static int
  393 ffs_fdatasync(struct vop_fdatasync_args *ap)
  394 {
  395 
  396         return (ffs_syncvnode(ap->a_vp, MNT_WAIT, DATA_ONLY));
  397 }
  398 
  399 static int
  400 ffs_lock(ap)
  401         struct vop_lock1_args /* {
  402                 struct vnode *a_vp;
  403                 int a_flags;
  404                 struct thread *a_td;
  405                 char *file;
  406                 int line;
  407         } */ *ap;
  408 {
  409 #ifndef NO_FFS_SNAPSHOT
  410         struct vnode *vp;
  411         int flags;
  412         struct lock *lkp;
  413         int result;
  414 
  415         switch (ap->a_flags & LK_TYPE_MASK) {
  416         case LK_SHARED:
  417         case LK_UPGRADE:
  418         case LK_EXCLUSIVE:
  419                 vp = ap->a_vp;
  420                 flags = ap->a_flags;
  421                 for (;;) {
  422 #ifdef DEBUG_VFS_LOCKS
  423                         KASSERT(vp->v_holdcnt != 0,
  424                             ("ffs_lock %p: zero hold count", vp));
  425 #endif
  426                         lkp = vp->v_vnlock;
  427                         result = _lockmgr_args(lkp, flags, VI_MTX(vp),
  428                             LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT,
  429                             ap->a_file, ap->a_line);
  430                         if (lkp == vp->v_vnlock || result != 0)
  431                                 break;
  432                         /*
  433                          * Apparent success, except that the vnode
  434                          * mutated between snapshot file vnode and
  435                          * regular file vnode while this process
  436                          * slept.  The lock currently held is not the
  437                          * right lock.  Release it, and try to get the
  438                          * new lock.
  439                          */
  440                         (void) _lockmgr_args(lkp, LK_RELEASE, NULL,
  441                             LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT,
  442                             ap->a_file, ap->a_line);
  443                         if ((flags & (LK_INTERLOCK | LK_NOWAIT)) ==
  444                             (LK_INTERLOCK | LK_NOWAIT))
  445                                 return (EBUSY);
  446                         if ((flags & LK_TYPE_MASK) == LK_UPGRADE)
  447                                 flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
  448                         flags &= ~LK_INTERLOCK;
  449                 }
  450                 break;
  451         default:
  452                 result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
  453         }
  454         return (result);
  455 #else
  456         return (VOP_LOCK1_APV(&ufs_vnodeops, ap));
  457 #endif
  458 }
  459 
  460 /*
  461  * Vnode op for reading.
  462  */
  463 static int
  464 ffs_read(ap)
  465         struct vop_read_args /* {
  466                 struct vnode *a_vp;
  467                 struct uio *a_uio;
  468                 int a_ioflag;
  469                 struct ucred *a_cred;
  470         } */ *ap;
  471 {
  472         struct vnode *vp;
  473         struct inode *ip;
  474         struct uio *uio;
  475         struct fs *fs;
  476         struct buf *bp;
  477         ufs_lbn_t lbn, nextlbn;
  478         off_t bytesinfile;
  479         long size, xfersize, blkoffset;
  480         ssize_t orig_resid;
  481         int error;
  482         int seqcount;
  483         int ioflag;
  484 
  485         vp = ap->a_vp;
  486         uio = ap->a_uio;
  487         ioflag = ap->a_ioflag;
  488         if (ap->a_ioflag & IO_EXT)
  489 #ifdef notyet
  490                 return (ffs_extread(vp, uio, ioflag));
  491 #else
  492                 panic("ffs_read+IO_EXT");
  493 #endif
  494 #ifdef DIRECTIO
  495         if ((ioflag & IO_DIRECT) != 0) {
  496                 int workdone;
  497 
  498                 error = ffs_rawread(vp, uio, &workdone);
  499                 if (error != 0 || workdone != 0)
  500                         return error;
  501         }
  502 #endif
  503 
  504         seqcount = ap->a_ioflag >> IO_SEQSHIFT;
  505         ip = VTOI(vp);
  506 
  507 #ifdef INVARIANTS
  508         if (uio->uio_rw != UIO_READ)
  509                 panic("ffs_read: mode");
  510 
  511         if (vp->v_type == VLNK) {
  512                 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
  513                         panic("ffs_read: short symlink");
  514         } else if (vp->v_type != VREG && vp->v_type != VDIR)
  515                 panic("ffs_read: type %d",  vp->v_type);
  516 #endif
  517         orig_resid = uio->uio_resid;
  518         KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0"));
  519         if (orig_resid == 0)
  520                 return (0);
  521         KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0"));
  522         fs = ITOFS(ip);
  523         if (uio->uio_offset < ip->i_size &&
  524             uio->uio_offset >= fs->fs_maxfilesize)
  525                 return (EOVERFLOW);
  526 
  527         for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
  528                 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
  529                         break;
  530                 lbn = lblkno(fs, uio->uio_offset);
  531                 nextlbn = lbn + 1;
  532 
  533                 /*
  534                  * size of buffer.  The buffer representing the
  535                  * end of the file is rounded up to the size of
  536                  * the block type ( fragment or full block,
  537                  * depending ).
  538                  */
  539                 size = blksize(fs, ip, lbn);
  540                 blkoffset = blkoff(fs, uio->uio_offset);
  541 
  542                 /*
  543                  * The amount we want to transfer in this iteration is
  544                  * one FS block less the amount of the data before
  545                  * our startpoint (duh!)
  546                  */
  547                 xfersize = fs->fs_bsize - blkoffset;
  548 
  549                 /*
  550                  * But if we actually want less than the block,
  551                  * or the file doesn't have a whole block more of data,
  552                  * then use the lesser number.
  553                  */
  554                 if (uio->uio_resid < xfersize)
  555                         xfersize = uio->uio_resid;
  556                 if (bytesinfile < xfersize)
  557                         xfersize = bytesinfile;
  558 
  559                 if (lblktosize(fs, nextlbn) >= ip->i_size) {
  560                         /*
  561                          * Don't do readahead if this is the end of the file.
  562                          */
  563                         error = bread_gb(vp, lbn, size, NOCRED,
  564                             GB_UNMAPPED, &bp);
  565                 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
  566                         /*
  567                          * Otherwise if we are allowed to cluster,
  568                          * grab as much as we can.
  569                          *
  570                          * XXX  This may not be a win if we are not
  571                          * doing sequential access.
  572                          */
  573                         error = cluster_read(vp, ip->i_size, lbn,
  574                             size, NOCRED, blkoffset + uio->uio_resid,
  575                             seqcount, GB_UNMAPPED, &bp);
  576                 } else if (seqcount > 1) {
  577                         /*
  578                          * If we are NOT allowed to cluster, then
  579                          * if we appear to be acting sequentially,
  580                          * fire off a request for a readahead
  581                          * as well as a read. Note that the 4th and 5th
  582                          * arguments point to arrays of the size specified in
  583                          * the 6th argument.
  584                          */
  585                         u_int nextsize = blksize(fs, ip, nextlbn);
  586                         error = breadn_flags(vp, lbn, size, &nextlbn,
  587                             &nextsize, 1, NOCRED, GB_UNMAPPED, &bp);
  588                 } else {
  589                         /*
  590                          * Failing all of the above, just read what the
  591                          * user asked for. Interestingly, the same as
  592                          * the first option above.
  593                          */
  594                         error = bread_gb(vp, lbn, size, NOCRED,
  595                             GB_UNMAPPED, &bp);
  596                 }
  597                 if (error) {
  598                         brelse(bp);
  599                         bp = NULL;
  600                         break;
  601                 }
  602 
  603                 /*
  604                  * If IO_DIRECT then set B_DIRECT for the buffer.  This
  605                  * will cause us to attempt to release the buffer later on
  606                  * and will cause the buffer cache to attempt to free the
  607                  * underlying pages.
  608                  */
  609                 if (ioflag & IO_DIRECT)
  610                         bp->b_flags |= B_DIRECT;
  611 
  612                 /*
  613                  * We should only get non-zero b_resid when an I/O error
  614                  * has occurred, which should cause us to break above.
  615                  * However, if the short read did not cause an error,
  616                  * then we want to ensure that we do not uiomove bad
  617                  * or uninitialized data.
  618                  */
  619                 size -= bp->b_resid;
  620                 if (size < xfersize) {
  621                         if (size == 0)
  622                                 break;
  623                         xfersize = size;
  624                 }
  625 
  626                 if (buf_mapped(bp)) {
  627                         error = vn_io_fault_uiomove((char *)bp->b_data +
  628                             blkoffset, (int)xfersize, uio);
  629                 } else {
  630                         error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
  631                             (int)xfersize, uio);
  632                 }
  633                 if (error)
  634                         break;
  635 
  636                 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
  637                    (LIST_EMPTY(&bp->b_dep))) {
  638                         /*
  639                          * If there are no dependencies, and it's VMIO,
  640                          * then we don't need the buf, mark it available
  641                          * for freeing.  For non-direct VMIO reads, the VM
  642                          * has the data.
  643                          */
  644                         bp->b_flags |= B_RELBUF;
  645                         brelse(bp);
  646                 } else {
  647                         /*
  648                          * Otherwise let whoever
  649                          * made the request take care of
  650                          * freeing it. We just queue
  651                          * it onto another list.
  652                          */
  653                         bqrelse(bp);
  654                 }
  655         }
  656 
  657         /*
  658          * This can only happen in the case of an error
  659          * because the loop above resets bp to NULL on each iteration
  660          * and on normal completion has not set a new value into it.
  661          * so it must have come from a 'break' statement
  662          */
  663         if (bp != NULL) {
  664                 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
  665                    (LIST_EMPTY(&bp->b_dep))) {
  666                         bp->b_flags |= B_RELBUF;
  667                         brelse(bp);
  668                 } else {
  669                         bqrelse(bp);
  670                 }
  671         }
  672 
  673         if ((error == 0 || uio->uio_resid != orig_resid) &&
  674             (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0 &&
  675             (ip->i_flag & IN_ACCESS) == 0) {
  676                 VI_LOCK(vp);
  677                 ip->i_flag |= IN_ACCESS;
  678                 VI_UNLOCK(vp);
  679         }
  680         return (error);
  681 }
  682 
  683 /*
  684  * Vnode op for writing.
  685  */
  686 static int
  687 ffs_write(ap)
  688         struct vop_write_args /* {
  689                 struct vnode *a_vp;
  690                 struct uio *a_uio;
  691                 int a_ioflag;
  692                 struct ucred *a_cred;
  693         } */ *ap;
  694 {
  695         struct vnode *vp;
  696         struct uio *uio;
  697         struct inode *ip;
  698         struct fs *fs;
  699         struct buf *bp;
  700         ufs_lbn_t lbn;
  701         off_t osize;
  702         ssize_t resid;
  703         int seqcount;
  704         int blkoffset, error, flags, ioflag, size, xfersize;
  705 
  706         vp = ap->a_vp;
  707         uio = ap->a_uio;
  708         ioflag = ap->a_ioflag;
  709         if (ap->a_ioflag & IO_EXT)
  710 #ifdef notyet
  711                 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
  712 #else
  713                 panic("ffs_write+IO_EXT");
  714 #endif
  715 
  716         seqcount = ap->a_ioflag >> IO_SEQSHIFT;
  717         ip = VTOI(vp);
  718 
  719 #ifdef INVARIANTS
  720         if (uio->uio_rw != UIO_WRITE)
  721                 panic("ffs_write: mode");
  722 #endif
  723 
  724         switch (vp->v_type) {
  725         case VREG:
  726                 if (ioflag & IO_APPEND)
  727                         uio->uio_offset = ip->i_size;
  728                 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
  729                         return (EPERM);
  730                 /* FALLTHROUGH */
  731         case VLNK:
  732                 break;
  733         case VDIR:
  734                 panic("ffs_write: dir write");
  735                 break;
  736         default:
  737                 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
  738                         (int)uio->uio_offset,
  739                         (int)uio->uio_resid
  740                 );
  741         }
  742 
  743         KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0"));
  744         KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0"));
  745         fs = ITOFS(ip);
  746         if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
  747                 return (EFBIG);
  748         /*
  749          * Maybe this should be above the vnode op call, but so long as
  750          * file servers have no limits, I don't think it matters.
  751          */
  752         if (vn_rlimit_fsize(vp, uio, uio->uio_td))
  753                 return (EFBIG);
  754 
  755         resid = uio->uio_resid;
  756         osize = ip->i_size;
  757         if (seqcount > BA_SEQMAX)
  758                 flags = BA_SEQMAX << BA_SEQSHIFT;
  759         else
  760                 flags = seqcount << BA_SEQSHIFT;
  761         if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
  762                 flags |= IO_SYNC;
  763         flags |= BA_UNMAPPED;
  764 
  765         for (error = 0; uio->uio_resid > 0;) {
  766                 lbn = lblkno(fs, uio->uio_offset);
  767                 blkoffset = blkoff(fs, uio->uio_offset);
  768                 xfersize = fs->fs_bsize - blkoffset;
  769                 if (uio->uio_resid < xfersize)
  770                         xfersize = uio->uio_resid;
  771                 if (uio->uio_offset + xfersize > ip->i_size)
  772                         vnode_pager_setsize(vp, uio->uio_offset + xfersize);
  773 
  774                 /*
  775                  * We must perform a read-before-write if the transfer size
  776                  * does not cover the entire buffer.
  777                  */
  778                 if (fs->fs_bsize > xfersize)
  779                         flags |= BA_CLRBUF;
  780                 else
  781                         flags &= ~BA_CLRBUF;
  782 /* XXX is uio->uio_offset the right thing here? */
  783                 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
  784                     ap->a_cred, flags, &bp);
  785                 if (error != 0) {
  786                         vnode_pager_setsize(vp, ip->i_size);
  787                         break;
  788                 }
  789                 if (ioflag & IO_DIRECT)
  790                         bp->b_flags |= B_DIRECT;
  791                 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
  792                         bp->b_flags |= B_NOCACHE;
  793 
  794                 if (uio->uio_offset + xfersize > ip->i_size) {
  795                         ip->i_size = uio->uio_offset + xfersize;
  796                         DIP_SET(ip, i_size, ip->i_size);
  797                 }
  798 
  799                 size = blksize(fs, ip, lbn) - bp->b_resid;
  800                 if (size < xfersize)
  801                         xfersize = size;
  802 
  803                 if (buf_mapped(bp)) {
  804                         error = vn_io_fault_uiomove((char *)bp->b_data +
  805                             blkoffset, (int)xfersize, uio);
  806                 } else {
  807                         error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
  808                             (int)xfersize, uio);
  809                 }
  810                 /*
  811                  * If the buffer is not already filled and we encounter an
  812                  * error while trying to fill it, we have to clear out any
  813                  * garbage data from the pages instantiated for the buffer.
  814                  * If we do not, a failed uiomove() during a write can leave
  815                  * the prior contents of the pages exposed to a userland mmap.
  816                  *
  817                  * Note that we need only clear buffers with a transfer size
  818                  * equal to the block size because buffers with a shorter
  819                  * transfer size were cleared above by the call to UFS_BALLOC()
  820                  * with the BA_CLRBUF flag set.
  821                  *
  822                  * If the source region for uiomove identically mmaps the
  823                  * buffer, uiomove() performed the NOP copy, and the buffer
  824                  * content remains valid because the page fault handler
  825                  * validated the pages.
  826                  */
  827                 if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
  828                     fs->fs_bsize == xfersize)
  829                         vfs_bio_clrbuf(bp);
  830                 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
  831                    (LIST_EMPTY(&bp->b_dep))) {
  832                         bp->b_flags |= B_RELBUF;
  833                 }
  834 
  835                 /*
  836                  * If IO_SYNC each buffer is written synchronously.  Otherwise
  837                  * if we have a severe page deficiency write the buffer
  838                  * asynchronously.  Otherwise try to cluster, and if that
  839                  * doesn't do it then either do an async write (if O_DIRECT),
  840                  * or a delayed write (if not).
  841                  */
  842                 if (ioflag & IO_SYNC) {
  843                         (void)bwrite(bp);
  844                 } else if (vm_page_count_severe() ||
  845                             buf_dirty_count_severe() ||
  846                             (ioflag & IO_ASYNC)) {
  847                         bp->b_flags |= B_CLUSTEROK;
  848                         bawrite(bp);
  849                 } else if (xfersize + blkoffset == fs->fs_bsize) {
  850                         if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
  851                                 bp->b_flags |= B_CLUSTEROK;
  852                                 cluster_write(vp, bp, ip->i_size, seqcount,
  853                                     GB_UNMAPPED);
  854                         } else {
  855                                 bawrite(bp);
  856                         }
  857                 } else if (ioflag & IO_DIRECT) {
  858                         bp->b_flags |= B_CLUSTEROK;
  859                         bawrite(bp);
  860                 } else {
  861                         bp->b_flags |= B_CLUSTEROK;
  862                         bdwrite(bp);
  863                 }
  864                 if (error || xfersize == 0)
  865                         break;
  866                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
  867         }
  868         /*
  869          * If we successfully wrote any data, and we are not the superuser
  870          * we clear the setuid and setgid bits as a precaution against
  871          * tampering.
  872          */
  873         if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
  874             ap->a_cred) {
  875                 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) {
  876                         ip->i_mode &= ~(ISUID | ISGID);
  877                         DIP_SET(ip, i_mode, ip->i_mode);
  878                 }
  879         }
  880         if (error) {
  881                 if (ioflag & IO_UNIT) {
  882                         (void)ffs_truncate(vp, osize,
  883                             IO_NORMAL | (ioflag & IO_SYNC), ap->a_cred);
  884                         uio->uio_offset -= resid - uio->uio_resid;
  885                         uio->uio_resid = resid;
  886                 }
  887         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
  888                 error = ffs_update(vp, 1);
  889         return (error);
  890 }
  891 
  892 /*
  893  * Extended attribute area reading.
  894  */
  895 static int
  896 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
  897 {
  898         struct inode *ip;
  899         struct ufs2_dinode *dp;
  900         struct fs *fs;
  901         struct buf *bp;
  902         ufs_lbn_t lbn, nextlbn;
  903         off_t bytesinfile;
  904         long size, xfersize, blkoffset;
  905         ssize_t orig_resid;
  906         int error;
  907 
  908         ip = VTOI(vp);
  909         fs = ITOFS(ip);
  910         dp = ip->i_din2;
  911 
  912 #ifdef INVARIANTS
  913         if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
  914                 panic("ffs_extread: mode");
  915 
  916 #endif
  917         orig_resid = uio->uio_resid;
  918         KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0"));
  919         if (orig_resid == 0)
  920                 return (0);
  921         KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0"));
  922 
  923         for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
  924                 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
  925                         break;
  926                 lbn = lblkno(fs, uio->uio_offset);
  927                 nextlbn = lbn + 1;
  928 
  929                 /*
  930                  * size of buffer.  The buffer representing the
  931                  * end of the file is rounded up to the size of
  932                  * the block type ( fragment or full block,
  933                  * depending ).
  934                  */
  935                 size = sblksize(fs, dp->di_extsize, lbn);
  936                 blkoffset = blkoff(fs, uio->uio_offset);
  937 
  938                 /*
  939                  * The amount we want to transfer in this iteration is
  940                  * one FS block less the amount of the data before
  941                  * our startpoint (duh!)
  942                  */
  943                 xfersize = fs->fs_bsize - blkoffset;
  944 
  945                 /*
  946                  * But if we actually want less than the block,
  947                  * or the file doesn't have a whole block more of data,
  948                  * then use the lesser number.
  949                  */
  950                 if (uio->uio_resid < xfersize)
  951                         xfersize = uio->uio_resid;
  952                 if (bytesinfile < xfersize)
  953                         xfersize = bytesinfile;
  954 
  955                 if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
  956                         /*
  957                          * Don't do readahead if this is the end of the info.
  958                          */
  959                         error = bread(vp, -1 - lbn, size, NOCRED, &bp);
  960                 } else {
  961                         /*
  962                          * If we have a second block, then
  963                          * fire off a request for a readahead
  964                          * as well as a read. Note that the 4th and 5th
  965                          * arguments point to arrays of the size specified in
  966                          * the 6th argument.
  967                          */
  968                         u_int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
  969 
  970                         nextlbn = -1 - nextlbn;
  971                         error = breadn(vp, -1 - lbn,
  972                             size, &nextlbn, &nextsize, 1, NOCRED, &bp);
  973                 }
  974                 if (error) {
  975                         brelse(bp);
  976                         bp = NULL;
  977                         break;
  978                 }
  979 
  980                 /*
  981                  * If IO_DIRECT then set B_DIRECT for the buffer.  This
  982                  * will cause us to attempt to release the buffer later on
  983                  * and will cause the buffer cache to attempt to free the
  984                  * underlying pages.
  985                  */
  986                 if (ioflag & IO_DIRECT)
  987                         bp->b_flags |= B_DIRECT;
  988 
  989                 /*
  990                  * We should only get non-zero b_resid when an I/O error
  991                  * has occurred, which should cause us to break above.
  992                  * However, if the short read did not cause an error,
  993                  * then we want to ensure that we do not uiomove bad
  994                  * or uninitialized data.
  995                  */
  996                 size -= bp->b_resid;
  997                 if (size < xfersize) {
  998                         if (size == 0)
  999                                 break;
 1000                         xfersize = size;
 1001                 }
 1002 
 1003                 error = uiomove((char *)bp->b_data + blkoffset,
 1004                                         (int)xfersize, uio);
 1005                 if (error)
 1006                         break;
 1007 
 1008                 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
 1009                    (LIST_EMPTY(&bp->b_dep))) {
 1010                         /*
 1011                          * If there are no dependencies, and it's VMIO,
 1012                          * then we don't need the buf, mark it available
 1013                          * for freeing.  For non-direct VMIO reads, the VM
 1014                          * has the data.
 1015                          */
 1016                         bp->b_flags |= B_RELBUF;
 1017                         brelse(bp);
 1018                 } else {
 1019                         /*
 1020                          * Otherwise let whoever
 1021                          * made the request take care of
 1022                          * freeing it. We just queue
 1023                          * it onto another list.
 1024                          */
 1025                         bqrelse(bp);
 1026                 }
 1027         }
 1028 
 1029         /*
 1030          * This can only happen in the case of an error
 1031          * because the loop above resets bp to NULL on each iteration
 1032          * and on normal completion has not set a new value into it.
 1033          * so it must have come from a 'break' statement
 1034          */
 1035         if (bp != NULL) {
 1036                 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
 1037                    (LIST_EMPTY(&bp->b_dep))) {
 1038                         bp->b_flags |= B_RELBUF;
 1039                         brelse(bp);
 1040                 } else {
 1041                         bqrelse(bp);
 1042                 }
 1043         }
 1044         return (error);
 1045 }
 1046 
 1047 /*
 1048  * Extended attribute area writing.
 1049  */
 1050 static int
 1051 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
 1052 {
 1053         struct inode *ip;
 1054         struct ufs2_dinode *dp;
 1055         struct fs *fs;
 1056         struct buf *bp;
 1057         ufs_lbn_t lbn;
 1058         off_t osize;
 1059         ssize_t resid;
 1060         int blkoffset, error, flags, size, xfersize;
 1061 
 1062         ip = VTOI(vp);
 1063         fs = ITOFS(ip);
 1064         dp = ip->i_din2;
 1065 
 1066 #ifdef INVARIANTS
 1067         if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
 1068                 panic("ffs_extwrite: mode");
 1069 #endif
 1070 
 1071         if (ioflag & IO_APPEND)
 1072                 uio->uio_offset = dp->di_extsize;
 1073         KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0"));
 1074         KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0"));
 1075         if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize)
 1076                 return (EFBIG);
 1077 
 1078         resid = uio->uio_resid;
 1079         osize = dp->di_extsize;
 1080         flags = IO_EXT;
 1081         if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
 1082                 flags |= IO_SYNC;
 1083 
 1084         for (error = 0; uio->uio_resid > 0;) {
 1085                 lbn = lblkno(fs, uio->uio_offset);
 1086                 blkoffset = blkoff(fs, uio->uio_offset);
 1087                 xfersize = fs->fs_bsize - blkoffset;
 1088                 if (uio->uio_resid < xfersize)
 1089                         xfersize = uio->uio_resid;
 1090 
 1091                 /*
 1092                  * We must perform a read-before-write if the transfer size
 1093                  * does not cover the entire buffer.
 1094                  */
 1095                 if (fs->fs_bsize > xfersize)
 1096                         flags |= BA_CLRBUF;
 1097                 else
 1098                         flags &= ~BA_CLRBUF;
 1099                 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
 1100                     ucred, flags, &bp);
 1101                 if (error != 0)
 1102                         break;
 1103                 /*
 1104                  * If the buffer is not valid we have to clear out any
 1105                  * garbage data from the pages instantiated for the buffer.
 1106                  * If we do not, a failed uiomove() during a write can leave
 1107                  * the prior contents of the pages exposed to a userland
 1108                  * mmap().  XXX deal with uiomove() errors a better way.
 1109                  */
 1110                 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
 1111                         vfs_bio_clrbuf(bp);
 1112                 if (ioflag & IO_DIRECT)
 1113                         bp->b_flags |= B_DIRECT;
 1114 
 1115                 if (uio->uio_offset + xfersize > dp->di_extsize)
 1116                         dp->di_extsize = uio->uio_offset + xfersize;
 1117 
 1118                 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
 1119                 if (size < xfersize)
 1120                         xfersize = size;
 1121 
 1122                 error =
 1123                     uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
 1124                 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
 1125                    (LIST_EMPTY(&bp->b_dep))) {
 1126                         bp->b_flags |= B_RELBUF;
 1127                 }
 1128 
 1129                 /*
 1130                  * If IO_SYNC each buffer is written synchronously.  Otherwise
 1131                  * if we have a severe page deficiency write the buffer
 1132                  * asynchronously.  Otherwise try to cluster, and if that
 1133                  * doesn't do it then either do an async write (if O_DIRECT),
 1134                  * or a delayed write (if not).
 1135                  */
 1136                 if (ioflag & IO_SYNC) {
 1137                         (void)bwrite(bp);
 1138                 } else if (vm_page_count_severe() ||
 1139                             buf_dirty_count_severe() ||
 1140                             xfersize + blkoffset == fs->fs_bsize ||
 1141                             (ioflag & (IO_ASYNC | IO_DIRECT)))
 1142                         bawrite(bp);
 1143                 else
 1144                         bdwrite(bp);
 1145                 if (error || xfersize == 0)
 1146                         break;
 1147                 ip->i_flag |= IN_CHANGE;
 1148         }
 1149         /*
 1150          * If we successfully wrote any data, and we are not the superuser
 1151          * we clear the setuid and setgid bits as a precaution against
 1152          * tampering.
 1153          */
 1154         if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) {
 1155                 if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID, 0)) {
 1156                         ip->i_mode &= ~(ISUID | ISGID);
 1157                         dp->di_mode = ip->i_mode;
 1158                 }
 1159         }
 1160         if (error) {
 1161                 if (ioflag & IO_UNIT) {
 1162                         (void)ffs_truncate(vp, osize,
 1163                             IO_EXT | (ioflag&IO_SYNC), ucred);
 1164                         uio->uio_offset -= resid - uio->uio_resid;
 1165                         uio->uio_resid = resid;
 1166                 }
 1167         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
 1168                 error = ffs_update(vp, 1);
 1169         return (error);
 1170 }
 1171 
 1172 
 1173 /*
 1174  * Vnode operating to retrieve a named extended attribute.
 1175  *
 1176  * Locate a particular EA (nspace:name) in the area (ptr:length), and return
 1177  * the length of the EA, and possibly the pointer to the entry and to the data.
 1178  */
 1179 static int
 1180 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac)
 1181 {
 1182         u_char *p, *pe, *pn, *p0;
 1183         int eapad1, eapad2, ealength, ealen, nlen;
 1184         uint32_t ul;
 1185 
 1186         pe = ptr + length;
 1187         nlen = strlen(name);
 1188 
 1189         for (p = ptr; p < pe; p = pn) {
 1190                 p0 = p;
 1191                 bcopy(p, &ul, sizeof(ul));
 1192                 pn = p + ul;
 1193                 /* make sure this entry is complete */
 1194                 if (pn > pe)
 1195                         break;
 1196                 p += sizeof(uint32_t);
 1197                 if (*p != nspace)
 1198                         continue;
 1199                 p++;
 1200                 eapad2 = *p++;
 1201                 if (*p != nlen)
 1202                         continue;
 1203                 p++;
 1204                 if (bcmp(p, name, nlen))
 1205                         continue;
 1206                 ealength = sizeof(uint32_t) + 3 + nlen;
 1207                 eapad1 = 8 - (ealength % 8);
 1208                 if (eapad1 == 8)
 1209                         eapad1 = 0;
 1210                 ealength += eapad1;
 1211                 ealen = ul - ealength - eapad2;
 1212                 p += nlen + eapad1;
 1213                 if (eap != NULL)
 1214                         *eap = p0;
 1215                 if (eac != NULL)
 1216                         *eac = p;
 1217                 return (ealen);
 1218         }
 1219         return(-1);
 1220 }
 1221 
 1222 static int
 1223 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra)
 1224 {
 1225         struct inode *ip;
 1226         struct ufs2_dinode *dp;
 1227         struct fs *fs;
 1228         struct uio luio;
 1229         struct iovec liovec;
 1230         u_int easize;
 1231         int error;
 1232         u_char *eae;
 1233 
 1234         ip = VTOI(vp);
 1235         fs = ITOFS(ip);
 1236         dp = ip->i_din2;
 1237         easize = dp->di_extsize;
 1238         if ((uoff_t)easize + extra > NXADDR * fs->fs_bsize)
 1239                 return (EFBIG);
 1240 
 1241         eae = malloc(easize + extra, M_TEMP, M_WAITOK);
 1242 
 1243         liovec.iov_base = eae;
 1244         liovec.iov_len = easize;
 1245         luio.uio_iov = &liovec;
 1246         luio.uio_iovcnt = 1;
 1247         luio.uio_offset = 0;
 1248         luio.uio_resid = easize;
 1249         luio.uio_segflg = UIO_SYSSPACE;
 1250         luio.uio_rw = UIO_READ;
 1251         luio.uio_td = td;
 1252 
 1253         error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
 1254         if (error) {
 1255                 free(eae, M_TEMP);
 1256                 return(error);
 1257         }
 1258         *p = eae;
 1259         return (0);
 1260 }
 1261 
 1262 static void
 1263 ffs_lock_ea(struct vnode *vp)
 1264 {
 1265         struct inode *ip;
 1266 
 1267         ip = VTOI(vp);
 1268         VI_LOCK(vp);
 1269         while (ip->i_flag & IN_EA_LOCKED) {
 1270                 ip->i_flag |= IN_EA_LOCKWAIT;
 1271                 msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD + 2, "ufs_ea",
 1272                     0);
 1273         }
 1274         ip->i_flag |= IN_EA_LOCKED;
 1275         VI_UNLOCK(vp);
 1276 }
 1277 
 1278 static void
 1279 ffs_unlock_ea(struct vnode *vp)
 1280 {
 1281         struct inode *ip;
 1282 
 1283         ip = VTOI(vp);
 1284         VI_LOCK(vp);
 1285         if (ip->i_flag & IN_EA_LOCKWAIT)
 1286                 wakeup(&ip->i_ea_refs);
 1287         ip->i_flag &= ~(IN_EA_LOCKED | IN_EA_LOCKWAIT);
 1288         VI_UNLOCK(vp);
 1289 }
 1290 
 1291 static int
 1292 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
 1293 {
 1294         struct inode *ip;
 1295         struct ufs2_dinode *dp;
 1296         int error;
 1297 
 1298         ip = VTOI(vp);
 1299 
 1300         ffs_lock_ea(vp);
 1301         if (ip->i_ea_area != NULL) {
 1302                 ip->i_ea_refs++;
 1303                 ffs_unlock_ea(vp);
 1304                 return (0);
 1305         }
 1306         dp = ip->i_din2;
 1307         error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0);
 1308         if (error) {
 1309                 ffs_unlock_ea(vp);
 1310                 return (error);
 1311         }
 1312         ip->i_ea_len = dp->di_extsize;
 1313         ip->i_ea_error = 0;
 1314         ip->i_ea_refs++;
 1315         ffs_unlock_ea(vp);
 1316         return (0);
 1317 }
 1318 
 1319 /*
 1320  * Vnode extattr transaction commit/abort
 1321  */
 1322 static int
 1323 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
 1324 {
 1325         struct inode *ip;
 1326         struct uio luio;
 1327         struct iovec liovec;
 1328         int error;
 1329         struct ufs2_dinode *dp;
 1330 
 1331         ip = VTOI(vp);
 1332 
 1333         ffs_lock_ea(vp);
 1334         if (ip->i_ea_area == NULL) {
 1335                 ffs_unlock_ea(vp);
 1336                 return (EINVAL);
 1337         }
 1338         dp = ip->i_din2;
 1339         error = ip->i_ea_error;
 1340         if (commit && error == 0) {
 1341                 ASSERT_VOP_ELOCKED(vp, "ffs_close_ea commit");
 1342                 if (cred == NOCRED)
 1343                         cred =  vp->v_mount->mnt_cred;
 1344                 liovec.iov_base = ip->i_ea_area;
 1345                 liovec.iov_len = ip->i_ea_len;
 1346                 luio.uio_iov = &liovec;
 1347                 luio.uio_iovcnt = 1;
 1348                 luio.uio_offset = 0;
 1349                 luio.uio_resid = ip->i_ea_len;
 1350                 luio.uio_segflg = UIO_SYSSPACE;
 1351                 luio.uio_rw = UIO_WRITE;
 1352                 luio.uio_td = td;
 1353                 /* XXX: I'm not happy about truncating to zero size */
 1354                 if (ip->i_ea_len < dp->di_extsize)
 1355                         error = ffs_truncate(vp, 0, IO_EXT, cred);
 1356                 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
 1357         }
 1358         if (--ip->i_ea_refs == 0) {
 1359                 free(ip->i_ea_area, M_TEMP);
 1360                 ip->i_ea_area = NULL;
 1361                 ip->i_ea_len = 0;
 1362                 ip->i_ea_error = 0;
 1363         }
 1364         ffs_unlock_ea(vp);
 1365         return (error);
 1366 }
 1367 
 1368 /*
 1369  * Vnode extattr strategy routine for fifos.
 1370  *
 1371  * We need to check for a read or write of the external attributes.
 1372  * Otherwise we just fall through and do the usual thing.
 1373  */
 1374 static int
 1375 ffsext_strategy(struct vop_strategy_args *ap)
 1376 /*
 1377 struct vop_strategy_args {
 1378         struct vnodeop_desc *a_desc;
 1379         struct vnode *a_vp;
 1380         struct buf *a_bp;
 1381 };
 1382 */
 1383 {
 1384         struct vnode *vp;
 1385         daddr_t lbn;
 1386 
 1387         vp = ap->a_vp;
 1388         lbn = ap->a_bp->b_lblkno;
 1389         if (I_IS_UFS2(VTOI(vp)) && lbn < 0 && lbn >= -NXADDR)
 1390                 return (VOP_STRATEGY_APV(&ufs_vnodeops, ap));
 1391         if (vp->v_type == VFIFO)
 1392                 return (VOP_STRATEGY_APV(&ufs_fifoops, ap));
 1393         panic("spec nodes went here");
 1394 }
 1395 
 1396 /*
 1397  * Vnode extattr transaction commit/abort
 1398  */
 1399 static int
 1400 ffs_openextattr(struct vop_openextattr_args *ap)
 1401 /*
 1402 struct vop_openextattr_args {
 1403         struct vnodeop_desc *a_desc;
 1404         struct vnode *a_vp;
 1405         IN struct ucred *a_cred;
 1406         IN struct thread *a_td;
 1407 };
 1408 */
 1409 {
 1410 
 1411         if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 1412                 return (EOPNOTSUPP);
 1413 
 1414         return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
 1415 }
 1416 
 1417 
 1418 /*
 1419  * Vnode extattr transaction commit/abort
 1420  */
 1421 static int
 1422 ffs_closeextattr(struct vop_closeextattr_args *ap)
 1423 /*
 1424 struct vop_closeextattr_args {
 1425         struct vnodeop_desc *a_desc;
 1426         struct vnode *a_vp;
 1427         int a_commit;
 1428         IN struct ucred *a_cred;
 1429         IN struct thread *a_td;
 1430 };
 1431 */
 1432 {
 1433 
 1434         if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 1435                 return (EOPNOTSUPP);
 1436 
 1437         if (ap->a_commit && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY))
 1438                 return (EROFS);
 1439 
 1440         return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td));
 1441 }
 1442 
 1443 /*
 1444  * Vnode operation to remove a named attribute.
 1445  */
 1446 static int
 1447 ffs_deleteextattr(struct vop_deleteextattr_args *ap)
 1448 /*
 1449 vop_deleteextattr {
 1450         IN struct vnode *a_vp;
 1451         IN int a_attrnamespace;
 1452         IN const char *a_name;
 1453         IN struct ucred *a_cred;
 1454         IN struct thread *a_td;
 1455 };
 1456 */
 1457 {
 1458         struct inode *ip;
 1459         struct fs *fs;
 1460         uint32_t ealength, ul;
 1461         int ealen, olen, eapad1, eapad2, error, i, easize;
 1462         u_char *eae, *p;
 1463 
 1464         ip = VTOI(ap->a_vp);
 1465         fs = ITOFS(ip);
 1466 
 1467         if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 1468                 return (EOPNOTSUPP);
 1469 
 1470         if (strlen(ap->a_name) == 0)
 1471                 return (EINVAL);
 1472 
 1473         if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
 1474                 return (EROFS);
 1475 
 1476         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 1477             ap->a_cred, ap->a_td, VWRITE);
 1478         if (error) {
 1479 
 1480                 /*
 1481                  * ffs_lock_ea is not needed there, because the vnode
 1482                  * must be exclusively locked.
 1483                  */
 1484                 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 1485                         ip->i_ea_error = error;
 1486                 return (error);
 1487         }
 1488 
 1489         error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 1490         if (error)
 1491                 return (error);
 1492 
 1493         ealength = eapad1 = ealen = eapad2 = 0;
 1494 
 1495         eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
 1496         bcopy(ip->i_ea_area, eae, ip->i_ea_len);
 1497         easize = ip->i_ea_len;
 1498 
 1499         olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
 1500             &p, NULL);
 1501         if (olen == -1) {
 1502                 /* delete but nonexistent */
 1503                 free(eae, M_TEMP);
 1504                 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1505                 return(ENOATTR);
 1506         }
 1507         bcopy(p, &ul, sizeof ul);
 1508         i = p - eae + ul;
 1509         if (ul != ealength) {
 1510                 bcopy(p + ul, p + ealength, easize - i);
 1511                 easize += (ealength - ul);
 1512         }
 1513         if (easize > NXADDR * fs->fs_bsize) {
 1514                 free(eae, M_TEMP);
 1515                 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1516                 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 1517                         ip->i_ea_error = ENOSPC;
 1518                 return(ENOSPC);
 1519         }
 1520         p = ip->i_ea_area;
 1521         ip->i_ea_area = eae;
 1522         ip->i_ea_len = easize;
 1523         free(p, M_TEMP);
 1524         error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
 1525         return(error);
 1526 }
 1527 
 1528 /*
 1529  * Vnode operation to retrieve a named extended attribute.
 1530  */
 1531 static int
 1532 ffs_getextattr(struct vop_getextattr_args *ap)
 1533 /*
 1534 vop_getextattr {
 1535         IN struct vnode *a_vp;
 1536         IN int a_attrnamespace;
 1537         IN const char *a_name;
 1538         INOUT struct uio *a_uio;
 1539         OUT size_t *a_size;
 1540         IN struct ucred *a_cred;
 1541         IN struct thread *a_td;
 1542 };
 1543 */
 1544 {
 1545         struct inode *ip;
 1546         u_char *eae, *p;
 1547         unsigned easize;
 1548         int error, ealen;
 1549 
 1550         ip = VTOI(ap->a_vp);
 1551 
 1552         if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 1553                 return (EOPNOTSUPP);
 1554 
 1555         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 1556             ap->a_cred, ap->a_td, VREAD);
 1557         if (error)
 1558                 return (error);
 1559 
 1560         error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 1561         if (error)
 1562                 return (error);
 1563 
 1564         eae = ip->i_ea_area;
 1565         easize = ip->i_ea_len;
 1566 
 1567         ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
 1568             NULL, &p);
 1569         if (ealen >= 0) {
 1570                 error = 0;
 1571                 if (ap->a_size != NULL)
 1572                         *ap->a_size = ealen;
 1573                 else if (ap->a_uio != NULL)
 1574                         error = uiomove(p, ealen, ap->a_uio);
 1575         } else
 1576                 error = ENOATTR;
 1577 
 1578         ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1579         return(error);
 1580 }
 1581 
 1582 /*
 1583  * Vnode operation to retrieve extended attributes on a vnode.
 1584  */
 1585 static int
 1586 ffs_listextattr(struct vop_listextattr_args *ap)
 1587 /*
 1588 vop_listextattr {
 1589         IN struct vnode *a_vp;
 1590         IN int a_attrnamespace;
 1591         INOUT struct uio *a_uio;
 1592         OUT size_t *a_size;
 1593         IN struct ucred *a_cred;
 1594         IN struct thread *a_td;
 1595 };
 1596 */
 1597 {
 1598         struct inode *ip;
 1599         u_char *eae, *p, *pe, *pn;
 1600         unsigned easize;
 1601         uint32_t ul;
 1602         int error, ealen;
 1603 
 1604         ip = VTOI(ap->a_vp);
 1605 
 1606         if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 1607                 return (EOPNOTSUPP);
 1608 
 1609         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 1610             ap->a_cred, ap->a_td, VREAD);
 1611         if (error)
 1612                 return (error);
 1613 
 1614         error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 1615         if (error)
 1616                 return (error);
 1617         eae = ip->i_ea_area;
 1618         easize = ip->i_ea_len;
 1619 
 1620         error = 0;
 1621         if (ap->a_size != NULL)
 1622                 *ap->a_size = 0;
 1623         pe = eae + easize;
 1624         for(p = eae; error == 0 && p < pe; p = pn) {
 1625                 bcopy(p, &ul, sizeof(ul));
 1626                 pn = p + ul;
 1627                 if (pn > pe)
 1628                         break;
 1629                 p += sizeof(ul);
 1630                 if (*p++ != ap->a_attrnamespace)
 1631                         continue;
 1632                 p++;    /* pad2 */
 1633                 ealen = *p;
 1634                 if (ap->a_size != NULL) {
 1635                         *ap->a_size += ealen + 1;
 1636                 } else if (ap->a_uio != NULL) {
 1637                         error = uiomove(p, ealen + 1, ap->a_uio);
 1638                 }
 1639         }
 1640         ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1641         return(error);
 1642 }
 1643 
 1644 /*
 1645  * Vnode operation to set a named attribute.
 1646  */
 1647 static int
 1648 ffs_setextattr(struct vop_setextattr_args *ap)
 1649 /*
 1650 vop_setextattr {
 1651         IN struct vnode *a_vp;
 1652         IN int a_attrnamespace;
 1653         IN const char *a_name;
 1654         INOUT struct uio *a_uio;
 1655         IN struct ucred *a_cred;
 1656         IN struct thread *a_td;
 1657 };
 1658 */
 1659 {
 1660         struct inode *ip;
 1661         struct fs *fs;
 1662         uint32_t ealength, ul;
 1663         ssize_t ealen;
 1664         int olen, eapad1, eapad2, error, i, easize;
 1665         u_char *eae, *p;
 1666 
 1667         ip = VTOI(ap->a_vp);
 1668         fs = ITOFS(ip);
 1669 
 1670         if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 1671                 return (EOPNOTSUPP);
 1672 
 1673         if (strlen(ap->a_name) == 0)
 1674                 return (EINVAL);
 1675 
 1676         /* XXX Now unsupported API to delete EAs using NULL uio. */
 1677         if (ap->a_uio == NULL)
 1678                 return (EOPNOTSUPP);
 1679 
 1680         if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
 1681                 return (EROFS);
 1682 
 1683         ealen = ap->a_uio->uio_resid;
 1684         if (ealen < 0 || ealen > lblktosize(fs, NXADDR))
 1685                 return (EINVAL);
 1686 
 1687         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 1688             ap->a_cred, ap->a_td, VWRITE);
 1689         if (error) {
 1690 
 1691                 /*
 1692                  * ffs_lock_ea is not needed there, because the vnode
 1693                  * must be exclusively locked.
 1694                  */
 1695                 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 1696                         ip->i_ea_error = error;
 1697                 return (error);
 1698         }
 1699 
 1700         error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 1701         if (error)
 1702                 return (error);
 1703 
 1704         ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
 1705         eapad1 = 8 - (ealength % 8);
 1706         if (eapad1 == 8)
 1707                 eapad1 = 0;
 1708         eapad2 = 8 - (ealen % 8);
 1709         if (eapad2 == 8)
 1710                 eapad2 = 0;
 1711         ealength += eapad1 + ealen + eapad2;
 1712 
 1713         eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
 1714         bcopy(ip->i_ea_area, eae, ip->i_ea_len);
 1715         easize = ip->i_ea_len;
 1716 
 1717         olen = ffs_findextattr(eae, easize,
 1718             ap->a_attrnamespace, ap->a_name, &p, NULL);
 1719         if (olen == -1) {
 1720                 /* new, append at end */
 1721                 p = eae + easize;
 1722                 easize += ealength;
 1723         } else {
 1724                 bcopy(p, &ul, sizeof ul);
 1725                 i = p - eae + ul;
 1726                 if (ul != ealength) {
 1727                         bcopy(p + ul, p + ealength, easize - i);
 1728                         easize += (ealength - ul);
 1729                 }
 1730         }
 1731         if (easize > lblktosize(fs, NXADDR)) {
 1732                 free(eae, M_TEMP);
 1733                 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1734                 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 1735                         ip->i_ea_error = ENOSPC;
 1736                 return(ENOSPC);
 1737         }
 1738         bcopy(&ealength, p, sizeof(ealength));
 1739         p += sizeof(ealength);
 1740         *p++ = ap->a_attrnamespace;
 1741         *p++ = eapad2;
 1742         *p++ = strlen(ap->a_name);
 1743         strcpy(p, ap->a_name);
 1744         p += strlen(ap->a_name);
 1745         bzero(p, eapad1);
 1746         p += eapad1;
 1747         error = uiomove(p, ealen, ap->a_uio);
 1748         if (error) {
 1749                 free(eae, M_TEMP);
 1750                 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1751                 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 1752                         ip->i_ea_error = error;
 1753                 return(error);
 1754         }
 1755         p += ealen;
 1756         bzero(p, eapad2);
 1757 
 1758         p = ip->i_ea_area;
 1759         ip->i_ea_area = eae;
 1760         ip->i_ea_len = easize;
 1761         free(p, M_TEMP);
 1762         error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
 1763         return(error);
 1764 }
 1765 
 1766 /*
 1767  * Vnode pointer to File handle
 1768  */
 1769 static int
 1770 ffs_vptofh(struct vop_vptofh_args *ap)
 1771 /*
 1772 vop_vptofh {
 1773         IN struct vnode *a_vp;
 1774         IN struct fid *a_fhp;
 1775 };
 1776 */
 1777 {
 1778         struct inode *ip;
 1779         struct ufid *ufhp;
 1780 
 1781         ip = VTOI(ap->a_vp);
 1782         ufhp = (struct ufid *)ap->a_fhp;
 1783         ufhp->ufid_len = sizeof(struct ufid);
 1784         ufhp->ufid_ino = ip->i_number;
 1785         ufhp->ufid_gen = ip->i_gen;
 1786         return (0);
 1787 }
 1788 
 1789 SYSCTL_DECL(_vfs_ffs);
 1790 static int use_buf_pager = 0;
 1791 SYSCTL_INT(_vfs_ffs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0,
 1792     "Always use buffer pager instead of bmap");
 1793 
 1794 static daddr_t
 1795 ffs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
 1796 {
 1797 
 1798         return (lblkno(VFSTOUFS(vp->v_mount)->um_fs, off));
 1799 }
 1800 
 1801 static int
 1802 ffs_gbp_getblksz(struct vnode *vp, daddr_t lbn)
 1803 {
 1804 
 1805         return (blksize(VFSTOUFS(vp->v_mount)->um_fs, VTOI(vp), lbn));
 1806 }
 1807 
 1808 static int
 1809 ffs_getpages(struct vop_getpages_args *ap)
 1810 {
 1811         struct vnode *vp;
 1812         struct ufsmount *um;
 1813 
 1814         vp = ap->a_vp;
 1815         um = VFSTOUFS(vp->v_mount);
 1816 
 1817         if (!use_buf_pager && um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE)
 1818                 return (vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
 1819                     ap->a_rbehind, ap->a_rahead, NULL, NULL));
 1820         return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
 1821             ap->a_rahead, ffs_gbp_getblkno, ffs_gbp_getblksz));
 1822 }
Cache object: 3d7bed4c68a2d0f8f68dbe2fae44d8aa
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/ufs/ffs/ffs_vnops.c

FreeBSD/Linux Kernel Cross Reference
sys/ufs/ffs/ffs_vnops.c