The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ufs/ffs/ffs_vnops.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
    3  *
    4  * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
    5  * All rights reserved.
    6  *
    7  * This software was developed for the FreeBSD Project by Marshall
    8  * Kirk McKusick and Network Associates Laboratories, the Security
    9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
   10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
   11  * research program
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  * Copyright (c) 1982, 1986, 1989, 1993
   35  *      The Regents of the University of California.  All rights reserved.
   36  *
   37  * Redistribution and use in source and binary forms, with or without
   38  * modification, are permitted provided that the following conditions
   39  * are met:
   40  * 1. Redistributions of source code must retain the above copyright
   41  *    notice, this list of conditions and the following disclaimer.
   42  * 2. Redistributions in binary form must reproduce the above copyright
   43  *    notice, this list of conditions and the following disclaimer in the
   44  *    documentation and/or other materials provided with the distribution.
   45  * 3. Neither the name of the University nor the names of its contributors
   46  *    may be used to endorse or promote products derived from this software
   47  *    without specific prior written permission.
   48  *
   49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   59  * SUCH DAMAGE.
   60  *
   61  *      from: @(#)ufs_readwrite.c       8.11 (Berkeley) 5/8/95
   62  * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
   63  *      @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95
   64  */
   65 
   66 #include <sys/cdefs.h>
   67 __FBSDID("$FreeBSD$");
   68 
   69 #include "opt_directio.h"
   70 #include "opt_ffs.h"
   71 
   72 #include <sys/param.h>
   73 #include <sys/bio.h>
   74 #include <sys/systm.h>
   75 #include <sys/buf.h>
   76 #include <sys/conf.h>
   77 #include <sys/extattr.h>
   78 #include <sys/kernel.h>
   79 #include <sys/limits.h>
   80 #include <sys/malloc.h>
   81 #include <sys/mount.h>
   82 #include <sys/priv.h>
   83 #include <sys/rwlock.h>
   84 #include <sys/stat.h>
   85 #include <sys/sysctl.h>
   86 #include <sys/vmmeter.h>
   87 #include <sys/vnode.h>
   88 
   89 #include <vm/vm.h>
   90 #include <vm/vm_param.h>
   91 #include <vm/vm_extern.h>
   92 #include <vm/vm_object.h>
   93 #include <vm/vm_page.h>
   94 #include <vm/vm_pager.h>
   95 #include <vm/vnode_pager.h>
   96 
   97 #include <ufs/ufs/extattr.h>
   98 #include <ufs/ufs/quota.h>
   99 #include <ufs/ufs/inode.h>
  100 #include <ufs/ufs/ufs_extern.h>
  101 #include <ufs/ufs/ufsmount.h>
  102 
  103 #include <ufs/ffs/fs.h>
  104 #include <ufs/ffs/ffs_extern.h>
  105 
  106 #define ALIGNED_TO(ptr, s)      \
  107         (((uintptr_t)(ptr) & (_Alignof(s) - 1)) == 0)
  108 
  109 #ifdef DIRECTIO
  110 extern int      ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
  111 #endif
  112 static vop_fdatasync_t  ffs_fdatasync;
  113 static vop_fsync_t      ffs_fsync;
  114 static vop_getpages_t   ffs_getpages;
  115 static vop_getpages_async_t     ffs_getpages_async;
  116 static vop_lock1_t      ffs_lock;
  117 static vop_read_t       ffs_read;
  118 static vop_write_t      ffs_write;
  119 static int      ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
  120 static int      ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
  121                     struct ucred *cred);
  122 static vop_strategy_t   ffsext_strategy;
  123 static vop_closeextattr_t       ffs_closeextattr;
  124 static vop_deleteextattr_t      ffs_deleteextattr;
  125 static vop_getextattr_t ffs_getextattr;
  126 static vop_listextattr_t        ffs_listextattr;
  127 static vop_openextattr_t        ffs_openextattr;
  128 static vop_setextattr_t ffs_setextattr;
  129 static vop_vptofh_t     ffs_vptofh;
  130 
  131 /* Global vfs data structures for ufs. */
  132 struct vop_vector ffs_vnodeops1 = {
  133         .vop_default =          &ufs_vnodeops,
  134         .vop_fsync =            ffs_fsync,
  135         .vop_fdatasync =        ffs_fdatasync,
  136         .vop_getpages =         ffs_getpages,
  137         .vop_getpages_async =   ffs_getpages_async,
  138         .vop_lock1 =            ffs_lock,
  139         .vop_read =             ffs_read,
  140         .vop_reallocblks =      ffs_reallocblks,
  141         .vop_write =            ffs_write,
  142         .vop_vptofh =           ffs_vptofh,
  143 };
  144 
  145 struct vop_vector ffs_fifoops1 = {
  146         .vop_default =          &ufs_fifoops,
  147         .vop_fsync =            ffs_fsync,
  148         .vop_fdatasync =        ffs_fdatasync,
  149         .vop_reallocblks =      ffs_reallocblks, /* XXX: really ??? */
  150         .vop_vptofh =           ffs_vptofh,
  151 };
  152 
  153 /* Global vfs data structures for ufs. */
  154 struct vop_vector ffs_vnodeops2 = {
  155         .vop_default =          &ufs_vnodeops,
  156         .vop_fsync =            ffs_fsync,
  157         .vop_fdatasync =        ffs_fdatasync,
  158         .vop_getpages =         ffs_getpages,
  159         .vop_getpages_async =   ffs_getpages_async,
  160         .vop_lock1 =            ffs_lock,
  161         .vop_read =             ffs_read,
  162         .vop_reallocblks =      ffs_reallocblks,
  163         .vop_write =            ffs_write,
  164         .vop_closeextattr =     ffs_closeextattr,
  165         .vop_deleteextattr =    ffs_deleteextattr,
  166         .vop_getextattr =       ffs_getextattr,
  167         .vop_listextattr =      ffs_listextattr,
  168         .vop_openextattr =      ffs_openextattr,
  169         .vop_setextattr =       ffs_setextattr,
  170         .vop_vptofh =           ffs_vptofh,
  171 };
  172 
  173 struct vop_vector ffs_fifoops2 = {
  174         .vop_default =          &ufs_fifoops,
  175         .vop_fsync =            ffs_fsync,
  176         .vop_fdatasync =        ffs_fdatasync,
  177         .vop_lock1 =            ffs_lock,
  178         .vop_reallocblks =      ffs_reallocblks,
  179         .vop_strategy =         ffsext_strategy,
  180         .vop_closeextattr =     ffs_closeextattr,
  181         .vop_deleteextattr =    ffs_deleteextattr,
  182         .vop_getextattr =       ffs_getextattr,
  183         .vop_listextattr =      ffs_listextattr,
  184         .vop_openextattr =      ffs_openextattr,
  185         .vop_setextattr =       ffs_setextattr,
  186         .vop_vptofh =           ffs_vptofh,
  187 };
  188 
  189 /*
  190  * Synch an open file.
  191  */
  192 /* ARGSUSED */
  193 static int
  194 ffs_fsync(struct vop_fsync_args *ap)
  195 {
  196         struct vnode *vp;
  197         struct bufobj *bo;
  198         int error;
  199 
  200         vp = ap->a_vp;
  201         bo = &vp->v_bufobj;
  202 retry:
  203         error = ffs_syncvnode(vp, ap->a_waitfor, 0);
  204         if (error)
  205                 return (error);
  206         if (ap->a_waitfor == MNT_WAIT && DOINGSOFTDEP(vp)) {
  207                 error = softdep_fsync(vp);
  208                 if (error)
  209                         return (error);
  210 
  211                 /*
  212                  * The softdep_fsync() function may drop vp lock,
  213                  * allowing for dirty buffers to reappear on the
  214                  * bo_dirty list. Recheck and resync as needed.
  215                  */
  216                 BO_LOCK(bo);
  217                 if ((vp->v_type == VREG || vp->v_type == VDIR) &&
  218                     (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
  219                         BO_UNLOCK(bo);
  220                         goto retry;
  221                 }
  222                 BO_UNLOCK(bo);
  223         }
  224         return (0);
  225 }
  226 
  227 int
  228 ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
  229 {
  230         struct inode *ip;
  231         struct bufobj *bo;
  232         struct buf *bp, *nbp;
  233         ufs_lbn_t lbn;
  234         int error, passes;
  235         bool still_dirty, wait;
  236 
  237         ip = VTOI(vp);
  238         ip->i_flag &= ~IN_NEEDSYNC;
  239         bo = &vp->v_bufobj;
  240 
  241         /*
  242          * When doing MNT_WAIT we must first flush all dependencies
  243          * on the inode.
  244          */
  245         if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
  246             (error = softdep_sync_metadata(vp)) != 0)
  247                 return (error);
  248 
  249         /*
  250          * Flush all dirty buffers associated with a vnode.
  251          */
  252         error = 0;
  253         passes = 0;
  254         wait = false;   /* Always do an async pass first. */
  255         lbn = lblkno(ITOFS(ip), (ip->i_size + ITOFS(ip)->fs_bsize - 1));
  256         BO_LOCK(bo);
  257 loop:
  258         TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
  259                 bp->b_vflags &= ~BV_SCANNED;
  260         TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
  261                 /*
  262                  * Reasons to skip this buffer: it has already been considered
  263                  * on this pass, the buffer has dependencies that will cause
  264                  * it to be redirtied and it has not already been deferred,
  265                  * or it is already being written.
  266                  */
  267                 if ((bp->b_vflags & BV_SCANNED) != 0)
  268                         continue;
  269                 bp->b_vflags |= BV_SCANNED;
  270                 /*
  271                  * Flush indirects in order, if requested.
  272                  *
  273                  * Note that if only datasync is requested, we can
  274                  * skip indirect blocks when softupdates are not
  275                  * active.  Otherwise we must flush them with data,
  276                  * since dependencies prevent data block writes.
  277                  */
  278                 if (waitfor == MNT_WAIT && bp->b_lblkno <= -UFS_NDADDR &&
  279                     (lbn_level(bp->b_lblkno) >= passes ||
  280                     ((flags & DATA_ONLY) != 0 && !DOINGSOFTDEP(vp))))
  281                         continue;
  282                 if (bp->b_lblkno > lbn)
  283                         panic("ffs_syncvnode: syncing truncated data.");
  284                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
  285                         BO_UNLOCK(bo);
  286                 } else if (wait) {
  287                         if (BUF_LOCK(bp,
  288                             LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
  289                             BO_LOCKPTR(bo)) != 0) {
  290                                 BO_LOCK(bo);
  291                                 bp->b_vflags &= ~BV_SCANNED;
  292                                 goto next_locked;
  293                         }
  294                 } else
  295                         continue;
  296                 if ((bp->b_flags & B_DELWRI) == 0)
  297                         panic("ffs_fsync: not dirty");
  298                 /*
  299                  * Check for dependencies and potentially complete them.
  300                  */
  301                 if (!LIST_EMPTY(&bp->b_dep) &&
  302                     (error = softdep_sync_buf(vp, bp,
  303                     wait ? MNT_WAIT : MNT_NOWAIT)) != 0) {
  304                         /* I/O error. */
  305                         if (error != EBUSY) {
  306                                 BUF_UNLOCK(bp);
  307                                 return (error);
  308                         }
  309                         /* If we deferred once, don't defer again. */
  310                         if ((bp->b_flags & B_DEFERRED) == 0) {
  311                                 bp->b_flags |= B_DEFERRED;
  312                                 BUF_UNLOCK(bp);
  313                                 goto next;
  314                         }
  315                 }
  316                 if (wait) {
  317                         bremfree(bp);
  318                         if ((error = bwrite(bp)) != 0)
  319                                 return (error);
  320                 } else if ((bp->b_flags & B_CLUSTEROK)) {
  321                         (void) vfs_bio_awrite(bp);
  322                 } else {
  323                         bremfree(bp);
  324                         (void) bawrite(bp);
  325                 }
  326 next:
  327                 /*
  328                  * Since we may have slept during the I/O, we need
  329                  * to start from a known point.
  330                  */
  331                 BO_LOCK(bo);
  332 next_locked:
  333                 nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd);
  334         }
  335         if (waitfor != MNT_WAIT) {
  336                 BO_UNLOCK(bo);
  337                 if ((flags & NO_INO_UPDT) != 0)
  338                         return (0);
  339                 else
  340                         return (ffs_update(vp, 0));
  341         }
  342         /* Drain IO to see if we're done. */
  343         bufobj_wwait(bo, 0, 0);
  344         /*
  345          * Block devices associated with filesystems may have new I/O
  346          * requests posted for them even if the vnode is locked, so no
  347          * amount of trying will get them clean.  We make several passes
  348          * as a best effort.
  349          *
  350          * Regular files may need multiple passes to flush all dependency
  351          * work as it is possible that we must write once per indirect
  352          * level, once for the leaf, and once for the inode and each of
  353          * these will be done with one sync and one async pass.
  354          */
  355         if (bo->bo_dirty.bv_cnt > 0) {
  356                 if ((flags & DATA_ONLY) == 0) {
  357                         still_dirty = true;
  358                 } else {
  359                         /*
  360                          * For data-only sync, dirty indirect buffers
  361                          * are ignored.
  362                          */
  363                         still_dirty = false;
  364                         TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
  365                                 if (bp->b_lblkno > -UFS_NDADDR) {
  366                                         still_dirty = true;
  367                                         break;
  368                                 }
  369                         }
  370                 }
  371 
  372                 if (still_dirty) {
  373                         /* Write the inode after sync passes to flush deps. */
  374                         if (wait && DOINGSOFTDEP(vp) &&
  375                             (flags & NO_INO_UPDT) == 0) {
  376                                 BO_UNLOCK(bo);
  377                                 ffs_update(vp, 1);
  378                                 BO_LOCK(bo);
  379                         }
  380                         /* switch between sync/async. */
  381                         wait = !wait;
  382                         if (wait || ++passes < UFS_NIADDR + 2)
  383                                 goto loop;
  384                 }
  385         }
  386         BO_UNLOCK(bo);
  387         error = 0;
  388         if ((flags & DATA_ONLY) == 0) {
  389                 if ((flags & NO_INO_UPDT) == 0)
  390                         error = ffs_update(vp, 1);
  391                 if (DOINGSUJ(vp))
  392                         softdep_journal_fsync(VTOI(vp));
  393         } else if ((ip->i_flags & (IN_SIZEMOD | IN_IBLKDATA)) != 0) {
  394                 error = ffs_update(vp, 1);
  395         }
  396         return (error);
  397 }
  398 
  399 static int
  400 ffs_fdatasync(struct vop_fdatasync_args *ap)
  401 {
  402 
  403         return (ffs_syncvnode(ap->a_vp, MNT_WAIT, DATA_ONLY));
  404 }
  405 
  406 static int
  407 ffs_lock(ap)
  408         struct vop_lock1_args /* {
  409                 struct vnode *a_vp;
  410                 int a_flags;
  411                 struct thread *a_td;
  412                 char *file;
  413                 int line;
  414         } */ *ap;
  415 {
  416 #ifndef NO_FFS_SNAPSHOT
  417         struct vnode *vp;
  418         int flags;
  419         struct lock *lkp;
  420         int result;
  421 
  422         switch (ap->a_flags & LK_TYPE_MASK) {
  423         case LK_SHARED:
  424         case LK_UPGRADE:
  425         case LK_EXCLUSIVE:
  426                 vp = ap->a_vp;
  427                 flags = ap->a_flags;
  428                 for (;;) {
  429 #ifdef DEBUG_VFS_LOCKS
  430                         KASSERT(vp->v_holdcnt != 0,
  431                             ("ffs_lock %p: zero hold count", vp));
  432 #endif
  433                         lkp = vp->v_vnlock;
  434                         result = _lockmgr_args(lkp, flags, VI_MTX(vp),
  435                             LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT,
  436                             ap->a_file, ap->a_line);
  437                         if (lkp == vp->v_vnlock || result != 0)
  438                                 break;
  439                         /*
  440                          * Apparent success, except that the vnode
  441                          * mutated between snapshot file vnode and
  442                          * regular file vnode while this process
  443                          * slept.  The lock currently held is not the
  444                          * right lock.  Release it, and try to get the
  445                          * new lock.
  446                          */
  447                         (void) _lockmgr_args(lkp, LK_RELEASE, NULL,
  448                             LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT,
  449                             ap->a_file, ap->a_line);
  450                         if ((flags & (LK_INTERLOCK | LK_NOWAIT)) ==
  451                             (LK_INTERLOCK | LK_NOWAIT))
  452                                 return (EBUSY);
  453                         if ((flags & LK_TYPE_MASK) == LK_UPGRADE)
  454                                 flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
  455                         flags &= ~LK_INTERLOCK;
  456                 }
  457                 break;
  458         default:
  459                 result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
  460         }
  461         return (result);
  462 #else
  463         return (VOP_LOCK1_APV(&ufs_vnodeops, ap));
  464 #endif
  465 }
  466 
  467 static int
  468 ffs_read_hole(struct uio *uio, long xfersize, long *size)
  469 {
  470         ssize_t saved_resid, tlen;
  471         int error;
  472 
  473         while (xfersize > 0) {
  474                 tlen = min(xfersize, ZERO_REGION_SIZE);
  475                 saved_resid = uio->uio_resid;
  476                 error = vn_io_fault_uiomove(__DECONST(void *, zero_region),
  477                     tlen, uio);
  478                 if (error != 0)
  479                         return (error);
  480                 tlen = saved_resid - uio->uio_resid;
  481                 xfersize -= tlen;
  482                 *size -= tlen;
  483         }
  484         return (0);
  485 }
  486 
  487 /*
  488  * Vnode op for reading.
  489  */
  490 static int
  491 ffs_read(ap)
  492         struct vop_read_args /* {
  493                 struct vnode *a_vp;
  494                 struct uio *a_uio;
  495                 int a_ioflag;
  496                 struct ucred *a_cred;
  497         } */ *ap;
  498 {
  499         struct vnode *vp;
  500         struct inode *ip;
  501         struct uio *uio;
  502         struct fs *fs;
  503         struct buf *bp;
  504         ufs_lbn_t lbn, nextlbn;
  505         off_t bytesinfile;
  506         long size, xfersize, blkoffset;
  507         ssize_t orig_resid;
  508         int bflag, error, ioflag, seqcount;
  509 
  510         vp = ap->a_vp;
  511         uio = ap->a_uio;
  512         ioflag = ap->a_ioflag;
  513         if (ap->a_ioflag & IO_EXT)
  514 #ifdef notyet
  515                 return (ffs_extread(vp, uio, ioflag));
  516 #else
  517                 panic("ffs_read+IO_EXT");
  518 #endif
  519 #ifdef DIRECTIO
  520         if ((ioflag & IO_DIRECT) != 0) {
  521                 int workdone;
  522 
  523                 error = ffs_rawread(vp, uio, &workdone);
  524                 if (error != 0 || workdone != 0)
  525                         return error;
  526         }
  527 #endif
  528 
  529         seqcount = ap->a_ioflag >> IO_SEQSHIFT;
  530         ip = VTOI(vp);
  531 
  532 #ifdef INVARIANTS
  533         if (uio->uio_rw != UIO_READ)
  534                 panic("ffs_read: mode");
  535 
  536         if (vp->v_type == VLNK) {
  537                 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
  538                         panic("ffs_read: short symlink");
  539         } else if (vp->v_type != VREG && vp->v_type != VDIR)
  540                 panic("ffs_read: type %d",  vp->v_type);
  541 #endif
  542         orig_resid = uio->uio_resid;
  543         KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0"));
  544         if (orig_resid == 0)
  545                 return (0);
  546         KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0"));
  547         fs = ITOFS(ip);
  548         if (uio->uio_offset < ip->i_size &&
  549             uio->uio_offset >= fs->fs_maxfilesize)
  550                 return (EOVERFLOW);
  551 
  552         bflag = GB_UNMAPPED | (uio->uio_segflg == UIO_NOCOPY ? 0 : GB_NOSPARSE);
  553         for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
  554                 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
  555                         break;
  556                 lbn = lblkno(fs, uio->uio_offset);
  557                 nextlbn = lbn + 1;
  558 
  559                 /*
  560                  * size of buffer.  The buffer representing the
  561                  * end of the file is rounded up to the size of
  562                  * the block type ( fragment or full block,
  563                  * depending ).
  564                  */
  565                 size = blksize(fs, ip, lbn);
  566                 blkoffset = blkoff(fs, uio->uio_offset);
  567 
  568                 /*
  569                  * The amount we want to transfer in this iteration is
  570                  * one FS block less the amount of the data before
  571                  * our startpoint (duh!)
  572                  */
  573                 xfersize = fs->fs_bsize - blkoffset;
  574 
  575                 /*
  576                  * But if we actually want less than the block,
  577                  * or the file doesn't have a whole block more of data,
  578                  * then use the lesser number.
  579                  */
  580                 if (uio->uio_resid < xfersize)
  581                         xfersize = uio->uio_resid;
  582                 if (bytesinfile < xfersize)
  583                         xfersize = bytesinfile;
  584 
  585                 if (lblktosize(fs, nextlbn) >= ip->i_size) {
  586                         /*
  587                          * Don't do readahead if this is the end of the file.
  588                          */
  589                         error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
  590                 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
  591                         /*
  592                          * Otherwise if we are allowed to cluster,
  593                          * grab as much as we can.
  594                          *
  595                          * XXX  This may not be a win if we are not
  596                          * doing sequential access.
  597                          */
  598                         error = cluster_read(vp, ip->i_size, lbn,
  599                             size, NOCRED, blkoffset + uio->uio_resid,
  600                             seqcount, bflag, &bp);
  601                 } else if (seqcount > 1) {
  602                         /*
  603                          * If we are NOT allowed to cluster, then
  604                          * if we appear to be acting sequentially,
  605                          * fire off a request for a readahead
  606                          * as well as a read. Note that the 4th and 5th
  607                          * arguments point to arrays of the size specified in
  608                          * the 6th argument.
  609                          */
  610                         u_int nextsize = blksize(fs, ip, nextlbn);
  611                         error = breadn_flags(vp, lbn, size, &nextlbn,
  612                             &nextsize, 1, NOCRED, bflag, NULL, &bp);
  613                 } else {
  614                         /*
  615                          * Failing all of the above, just read what the
  616                          * user asked for. Interestingly, the same as
  617                          * the first option above.
  618                          */
  619                         error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
  620                 }
  621                 if (error == EJUSTRETURN) {
  622                         error = ffs_read_hole(uio, xfersize, &size);
  623                         if (error == 0)
  624                                 continue;
  625                 }
  626                 if (error != 0) {
  627                         brelse(bp);
  628                         bp = NULL;
  629                         break;
  630                 }
  631 
  632                 /*
  633                  * We should only get non-zero b_resid when an I/O error
  634                  * has occurred, which should cause us to break above.
  635                  * However, if the short read did not cause an error,
  636                  * then we want to ensure that we do not uiomove bad
  637                  * or uninitialized data.
  638                  */
  639                 size -= bp->b_resid;
  640                 if (size < xfersize) {
  641                         if (size == 0)
  642                                 break;
  643                         xfersize = size;
  644                 }
  645 
  646                 if (buf_mapped(bp)) {
  647                         error = vn_io_fault_uiomove((char *)bp->b_data +
  648                             blkoffset, (int)xfersize, uio);
  649                 } else {
  650                         error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
  651                             (int)xfersize, uio);
  652                 }
  653                 if (error)
  654                         break;
  655 
  656                 vfs_bio_brelse(bp, ioflag);
  657         }
  658 
  659         /*
  660          * This can only happen in the case of an error
  661          * because the loop above resets bp to NULL on each iteration
  662          * and on normal completion has not set a new value into it.
  663          * so it must have come from a 'break' statement
  664          */
  665         if (bp != NULL)
  666                 vfs_bio_brelse(bp, ioflag);
  667 
  668         if ((error == 0 || uio->uio_resid != orig_resid) &&
  669             (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0 &&
  670             (ip->i_flag & IN_ACCESS) == 0) {
  671                 VI_LOCK(vp);
  672                 ip->i_flag |= IN_ACCESS;
  673                 VI_UNLOCK(vp);
  674         }
  675         return (error);
  676 }
  677 
  678 /*
  679  * Vnode op for writing.
  680  */
  681 static int
  682 ffs_write(ap)
  683         struct vop_write_args /* {
  684                 struct vnode *a_vp;
  685                 struct uio *a_uio;
  686                 int a_ioflag;
  687                 struct ucred *a_cred;
  688         } */ *ap;
  689 {
  690         struct vnode *vp;
  691         struct uio *uio;
  692         struct inode *ip;
  693         struct fs *fs;
  694         struct buf *bp;
  695         ufs_lbn_t lbn;
  696         off_t osize;
  697         ssize_t resid;
  698         int seqcount;
  699         int blkoffset, error, flags, ioflag, size, xfersize;
  700 
  701         vp = ap->a_vp;
  702         uio = ap->a_uio;
  703         ioflag = ap->a_ioflag;
  704         if (ap->a_ioflag & IO_EXT)
  705 #ifdef notyet
  706                 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
  707 #else
  708                 panic("ffs_write+IO_EXT");
  709 #endif
  710 
  711         seqcount = ap->a_ioflag >> IO_SEQSHIFT;
  712         ip = VTOI(vp);
  713 
  714 #ifdef INVARIANTS
  715         if (uio->uio_rw != UIO_WRITE)
  716                 panic("ffs_write: mode");
  717 #endif
  718 
  719         switch (vp->v_type) {
  720         case VREG:
  721                 if (ioflag & IO_APPEND)
  722                         uio->uio_offset = ip->i_size;
  723                 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
  724                         return (EPERM);
  725                 /* FALLTHROUGH */
  726         case VLNK:
  727                 break;
  728         case VDIR:
  729                 panic("ffs_write: dir write");
  730                 break;
  731         default:
  732                 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
  733                         (int)uio->uio_offset,
  734                         (int)uio->uio_resid
  735                 );
  736         }
  737 
  738         KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0"));
  739         KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0"));
  740         fs = ITOFS(ip);
  741         if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
  742                 return (EFBIG);
  743         /*
  744          * Maybe this should be above the vnode op call, but so long as
  745          * file servers have no limits, I don't think it matters.
  746          */
  747         if (vn_rlimit_fsize(vp, uio, uio->uio_td))
  748                 return (EFBIG);
  749 
  750         resid = uio->uio_resid;
  751         osize = ip->i_size;
  752         if (seqcount > BA_SEQMAX)
  753                 flags = BA_SEQMAX << BA_SEQSHIFT;
  754         else
  755                 flags = seqcount << BA_SEQSHIFT;
  756         if (ioflag & IO_SYNC)
  757                 flags |= IO_SYNC;
  758         flags |= BA_UNMAPPED;
  759 
  760         for (error = 0; uio->uio_resid > 0;) {
  761                 lbn = lblkno(fs, uio->uio_offset);
  762                 blkoffset = blkoff(fs, uio->uio_offset);
  763                 xfersize = fs->fs_bsize - blkoffset;
  764                 if (uio->uio_resid < xfersize)
  765                         xfersize = uio->uio_resid;
  766                 if (uio->uio_offset + xfersize > ip->i_size)
  767                         vnode_pager_setsize(vp, uio->uio_offset + xfersize);
  768 
  769                 /*
  770                  * We must perform a read-before-write if the transfer size
  771                  * does not cover the entire buffer.
  772                  */
  773                 if (fs->fs_bsize > xfersize)
  774                         flags |= BA_CLRBUF;
  775                 else
  776                         flags &= ~BA_CLRBUF;
  777 /* XXX is uio->uio_offset the right thing here? */
  778                 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
  779                     ap->a_cred, flags, &bp);
  780                 if (error != 0) {
  781                         vnode_pager_setsize(vp, ip->i_size);
  782                         break;
  783                 }
  784                 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
  785                         bp->b_flags |= B_NOCACHE;
  786 
  787                 if (uio->uio_offset + xfersize > ip->i_size) {
  788                         ip->i_size = uio->uio_offset + xfersize;
  789                         DIP_SET(ip, i_size, ip->i_size);
  790                         ip->i_flag |= IN_SIZEMOD | IN_CHANGE;
  791                 }
  792 
  793                 size = blksize(fs, ip, lbn) - bp->b_resid;
  794                 if (size < xfersize)
  795                         xfersize = size;
  796 
  797                 if (buf_mapped(bp)) {
  798                         error = vn_io_fault_uiomove((char *)bp->b_data +
  799                             blkoffset, (int)xfersize, uio);
  800                 } else {
  801                         error = vn_io_fault_pgmove(bp->b_pages, blkoffset,
  802                             (int)xfersize, uio);
  803                 }
  804                 /*
  805                  * If the buffer is not already filled and we encounter an
  806                  * error while trying to fill it, we have to clear out any
  807                  * garbage data from the pages instantiated for the buffer.
  808                  * If we do not, a failed uiomove() during a write can leave
  809                  * the prior contents of the pages exposed to a userland mmap.
  810                  *
  811                  * Note that we need only clear buffers with a transfer size
  812                  * equal to the block size because buffers with a shorter
  813                  * transfer size were cleared above by the call to UFS_BALLOC()
  814                  * with the BA_CLRBUF flag set.
  815                  *
  816                  * If the source region for uiomove identically mmaps the
  817                  * buffer, uiomove() performed the NOP copy, and the buffer
  818                  * content remains valid because the page fault handler
  819                  * validated the pages.
  820                  */
  821                 if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
  822                     fs->fs_bsize == xfersize)
  823                         vfs_bio_clrbuf(bp);
  824 
  825                 vfs_bio_set_flags(bp, ioflag);
  826 
  827                 /*
  828                  * If IO_SYNC each buffer is written synchronously.  Otherwise
  829                  * if we have a severe page deficiency write the buffer
  830                  * asynchronously.  Otherwise try to cluster, and if that
  831                  * doesn't do it then either do an async write (if O_DIRECT),
  832                  * or a delayed write (if not).
  833                  */
  834                 if (ioflag & IO_SYNC) {
  835                         (void)bwrite(bp);
  836                 } else if (vm_page_count_severe() ||
  837                             buf_dirty_count_severe() ||
  838                             (ioflag & IO_ASYNC)) {
  839                         bp->b_flags |= B_CLUSTEROK;
  840                         bawrite(bp);
  841                 } else if (xfersize + blkoffset == fs->fs_bsize) {
  842                         if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
  843                                 bp->b_flags |= B_CLUSTEROK;
  844                                 cluster_write(vp, bp, ip->i_size, seqcount,
  845                                     GB_UNMAPPED);
  846                         } else {
  847                                 bawrite(bp);
  848                         }
  849                 } else if (ioflag & IO_DIRECT) {
  850                         bp->b_flags |= B_CLUSTEROK;
  851                         bawrite(bp);
  852                 } else {
  853                         bp->b_flags |= B_CLUSTEROK;
  854                         bdwrite(bp);
  855                 }
  856                 if (error || xfersize == 0)
  857                         break;
  858                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
  859         }
  860         /*
  861          * If we successfully wrote any data, and we are not the superuser
  862          * we clear the setuid and setgid bits as a precaution against
  863          * tampering.
  864          */
  865         if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
  866             ap->a_cred) {
  867                 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) {
  868                         ip->i_mode &= ~(ISUID | ISGID);
  869                         DIP_SET(ip, i_mode, ip->i_mode);
  870                 }
  871         }
  872         if (error) {
  873                 if (ioflag & IO_UNIT) {
  874                         (void)ffs_truncate(vp, osize,
  875                             IO_NORMAL | (ioflag & IO_SYNC), ap->a_cred);
  876                         uio->uio_offset -= resid - uio->uio_resid;
  877                         uio->uio_resid = resid;
  878                 }
  879         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
  880                 error = ffs_update(vp, 1);
  881         return (error);
  882 }
  883 
  884 /*
  885  * Extended attribute area reading.
  886  */
  887 static int
  888 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
  889 {
  890         struct inode *ip;
  891         struct ufs2_dinode *dp;
  892         struct fs *fs;
  893         struct buf *bp;
  894         ufs_lbn_t lbn, nextlbn;
  895         off_t bytesinfile;
  896         long size, xfersize, blkoffset;
  897         ssize_t orig_resid;
  898         int error;
  899 
  900         ip = VTOI(vp);
  901         fs = ITOFS(ip);
  902         dp = ip->i_din2;
  903 
  904 #ifdef INVARIANTS
  905         if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
  906                 panic("ffs_extread: mode");
  907 
  908 #endif
  909         orig_resid = uio->uio_resid;
  910         KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0"));
  911         if (orig_resid == 0)
  912                 return (0);
  913         KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0"));
  914 
  915         for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
  916                 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
  917                         break;
  918                 lbn = lblkno(fs, uio->uio_offset);
  919                 nextlbn = lbn + 1;
  920 
  921                 /*
  922                  * size of buffer.  The buffer representing the
  923                  * end of the file is rounded up to the size of
  924                  * the block type ( fragment or full block,
  925                  * depending ).
  926                  */
  927                 size = sblksize(fs, dp->di_extsize, lbn);
  928                 blkoffset = blkoff(fs, uio->uio_offset);
  929 
  930                 /*
  931                  * The amount we want to transfer in this iteration is
  932                  * one FS block less the amount of the data before
  933                  * our startpoint (duh!)
  934                  */
  935                 xfersize = fs->fs_bsize - blkoffset;
  936 
  937                 /*
  938                  * But if we actually want less than the block,
  939                  * or the file doesn't have a whole block more of data,
  940                  * then use the lesser number.
  941                  */
  942                 if (uio->uio_resid < xfersize)
  943                         xfersize = uio->uio_resid;
  944                 if (bytesinfile < xfersize)
  945                         xfersize = bytesinfile;
  946 
  947                 if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
  948                         /*
  949                          * Don't do readahead if this is the end of the info.
  950                          */
  951                         error = bread(vp, -1 - lbn, size, NOCRED, &bp);
  952                 } else {
  953                         /*
  954                          * If we have a second block, then
  955                          * fire off a request for a readahead
  956                          * as well as a read. Note that the 4th and 5th
  957                          * arguments point to arrays of the size specified in
  958                          * the 6th argument.
  959                          */
  960                         u_int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
  961 
  962                         nextlbn = -1 - nextlbn;
  963                         error = breadn(vp, -1 - lbn,
  964                             size, &nextlbn, &nextsize, 1, NOCRED, &bp);
  965                 }
  966                 if (error) {
  967                         brelse(bp);
  968                         bp = NULL;
  969                         break;
  970                 }
  971 
  972                 /*
  973                  * We should only get non-zero b_resid when an I/O error
  974                  * has occurred, which should cause us to break above.
  975                  * However, if the short read did not cause an error,
  976                  * then we want to ensure that we do not uiomove bad
  977                  * or uninitialized data.
  978                  */
  979                 size -= bp->b_resid;
  980                 if (size < xfersize) {
  981                         if (size == 0)
  982                                 break;
  983                         xfersize = size;
  984                 }
  985 
  986                 error = uiomove((char *)bp->b_data + blkoffset,
  987                                         (int)xfersize, uio);
  988                 if (error)
  989                         break;
  990                 vfs_bio_brelse(bp, ioflag);
  991         }
  992 
  993         /*
  994          * This can only happen in the case of an error
  995          * because the loop above resets bp to NULL on each iteration
  996          * and on normal completion has not set a new value into it.
  997          * so it must have come from a 'break' statement
  998          */
  999         if (bp != NULL)
 1000                 vfs_bio_brelse(bp, ioflag);
 1001         return (error);
 1002 }
 1003 
 1004 /*
 1005  * Extended attribute area writing.
 1006  */
 1007 static int
 1008 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
 1009 {
 1010         struct inode *ip;
 1011         struct ufs2_dinode *dp;
 1012         struct fs *fs;
 1013         struct buf *bp;
 1014         ufs_lbn_t lbn;
 1015         off_t osize;
 1016         ssize_t resid;
 1017         int blkoffset, error, flags, size, xfersize;
 1018 
 1019         ip = VTOI(vp);
 1020         fs = ITOFS(ip);
 1021         dp = ip->i_din2;
 1022 
 1023 #ifdef INVARIANTS
 1024         if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
 1025                 panic("ffs_extwrite: mode");
 1026 #endif
 1027 
 1028         if (ioflag & IO_APPEND)
 1029                 uio->uio_offset = dp->di_extsize;
 1030         KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0"));
 1031         KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0"));
 1032         if ((uoff_t)uio->uio_offset + uio->uio_resid >
 1033             UFS_NXADDR * fs->fs_bsize)
 1034                 return (EFBIG);
 1035 
 1036         resid = uio->uio_resid;
 1037         osize = dp->di_extsize;
 1038         flags = IO_EXT;
 1039         if (ioflag & IO_SYNC)
 1040                 flags |= IO_SYNC;
 1041 
 1042         for (error = 0; uio->uio_resid > 0;) {
 1043                 lbn = lblkno(fs, uio->uio_offset);
 1044                 blkoffset = blkoff(fs, uio->uio_offset);
 1045                 xfersize = fs->fs_bsize - blkoffset;
 1046                 if (uio->uio_resid < xfersize)
 1047                         xfersize = uio->uio_resid;
 1048 
 1049                 /*
 1050                  * We must perform a read-before-write if the transfer size
 1051                  * does not cover the entire buffer.
 1052                  */
 1053                 if (fs->fs_bsize > xfersize)
 1054                         flags |= BA_CLRBUF;
 1055                 else
 1056                         flags &= ~BA_CLRBUF;
 1057                 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
 1058                     ucred, flags, &bp);
 1059                 if (error != 0)
 1060                         break;
 1061                 /*
 1062                  * If the buffer is not valid we have to clear out any
 1063                  * garbage data from the pages instantiated for the buffer.
 1064                  * If we do not, a failed uiomove() during a write can leave
 1065                  * the prior contents of the pages exposed to a userland
 1066                  * mmap().  XXX deal with uiomove() errors a better way.
 1067                  */
 1068                 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
 1069                         vfs_bio_clrbuf(bp);
 1070 
 1071                 if (uio->uio_offset + xfersize > dp->di_extsize) {
 1072                         dp->di_extsize = uio->uio_offset + xfersize;
 1073                         ip->i_flag |= IN_SIZEMOD | IN_CHANGE;
 1074                 }
 1075 
 1076                 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
 1077                 if (size < xfersize)
 1078                         xfersize = size;
 1079 
 1080                 error =
 1081                     uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
 1082 
 1083                 vfs_bio_set_flags(bp, ioflag);
 1084 
 1085                 /*
 1086                  * If IO_SYNC each buffer is written synchronously.  Otherwise
 1087                  * if we have a severe page deficiency write the buffer
 1088                  * asynchronously.  Otherwise try to cluster, and if that
 1089                  * doesn't do it then either do an async write (if O_DIRECT),
 1090                  * or a delayed write (if not).
 1091                  */
 1092                 if (ioflag & IO_SYNC) {
 1093                         (void)bwrite(bp);
 1094                 } else if (vm_page_count_severe() ||
 1095                             buf_dirty_count_severe() ||
 1096                             xfersize + blkoffset == fs->fs_bsize ||
 1097                             (ioflag & (IO_ASYNC | IO_DIRECT)))
 1098                         bawrite(bp);
 1099                 else
 1100                         bdwrite(bp);
 1101                 if (error || xfersize == 0)
 1102                         break;
 1103                 ip->i_flag |= IN_CHANGE;
 1104         }
 1105         /*
 1106          * If we successfully wrote any data, and we are not the superuser
 1107          * we clear the setuid and setgid bits as a precaution against
 1108          * tampering.
 1109          */
 1110         if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) {
 1111                 if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID, 0)) {
 1112                         ip->i_mode &= ~(ISUID | ISGID);
 1113                         dp->di_mode = ip->i_mode;
 1114                 }
 1115         }
 1116         if (error) {
 1117                 if (ioflag & IO_UNIT) {
 1118                         (void)ffs_truncate(vp, osize,
 1119                             IO_EXT | (ioflag&IO_SYNC), ucred);
 1120                         uio->uio_offset -= resid - uio->uio_resid;
 1121                         uio->uio_resid = resid;
 1122                 }
 1123         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
 1124                 error = ffs_update(vp, 1);
 1125         return (error);
 1126 }
 1127 
 1128 
 1129 /*
 1130  * Vnode operating to retrieve a named extended attribute.
 1131  *
 1132  * Locate a particular EA (nspace:name) in the area (ptr:length), and return
 1133  * the length of the EA, and possibly the pointer to the entry and to the data.
 1134  */
 1135 static int
 1136 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name,
 1137     struct extattr **eapp, u_char **eac)
 1138 {
 1139         struct extattr *eap, *eaend;
 1140         size_t nlen;
 1141 
 1142         nlen = strlen(name);
 1143         KASSERT(ALIGNED_TO(ptr, struct extattr), ("unaligned"));
 1144         eap = (struct extattr *)ptr;
 1145         eaend = (struct extattr *)(ptr + length);
 1146         for (; eap < eaend; eap = EXTATTR_NEXT(eap)) {
 1147                 KASSERT(EXTATTR_NEXT(eap) <= eaend,
 1148                     ("extattr next %p beyond %p", EXTATTR_NEXT(eap), eaend));
 1149                 if (eap->ea_namespace != nspace || eap->ea_namelength != nlen
 1150                     || memcmp(eap->ea_name, name, nlen) != 0)
 1151                         continue;
 1152                 if (eapp != NULL)
 1153                         *eapp = eap;
 1154                 if (eac != NULL)
 1155                         *eac = EXTATTR_CONTENT(eap);
 1156                 return (EXTATTR_CONTENT_SIZE(eap));
 1157         }
 1158         return (-1);
 1159 }
 1160 
 1161 static int
 1162 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td)
 1163 {
 1164         const struct extattr *eap, *eaend, *eapnext;
 1165         struct inode *ip;
 1166         struct ufs2_dinode *dp;
 1167         struct fs *fs;
 1168         struct uio luio;
 1169         struct iovec liovec;
 1170         u_int easize;
 1171         int error;
 1172         u_char *eae;
 1173 
 1174         ip = VTOI(vp);
 1175         fs = ITOFS(ip);
 1176         dp = ip->i_din2;
 1177         easize = dp->di_extsize;
 1178         if ((uoff_t)easize > UFS_NXADDR * fs->fs_bsize)
 1179                 return (EFBIG);
 1180 
 1181         eae = malloc(easize, M_TEMP, M_WAITOK);
 1182 
 1183         liovec.iov_base = eae;
 1184         liovec.iov_len = easize;
 1185         luio.uio_iov = &liovec;
 1186         luio.uio_iovcnt = 1;
 1187         luio.uio_offset = 0;
 1188         luio.uio_resid = easize;
 1189         luio.uio_segflg = UIO_SYSSPACE;
 1190         luio.uio_rw = UIO_READ;
 1191         luio.uio_td = td;
 1192 
 1193         error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
 1194         if (error) {
 1195                 free(eae, M_TEMP);
 1196                 return (error);
 1197         }
 1198         /* Validate disk xattrfile contents. */
 1199         for (eap = (void *)eae, eaend = (void *)(eae + easize); eap < eaend;
 1200             eap = eapnext) {
 1201                 eapnext = EXTATTR_NEXT(eap);
 1202                 /* Bogusly short entry or bogusly long entry. */
 1203                 if (eap->ea_length < sizeof(*eap) || eapnext > eaend) {
 1204                         free(eae, M_TEMP);
 1205                         return (EINTEGRITY);
 1206                 }
 1207         }
 1208         *p = eae;
 1209         return (0);
 1210 }
 1211 
 1212 static void
 1213 ffs_lock_ea(struct vnode *vp)
 1214 {
 1215         struct inode *ip;
 1216 
 1217         ip = VTOI(vp);
 1218         VI_LOCK(vp);
 1219         while (ip->i_flag & IN_EA_LOCKED) {
 1220                 ip->i_flag |= IN_EA_LOCKWAIT;
 1221                 msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD + 2, "ufs_ea",
 1222                     0);
 1223         }
 1224         ip->i_flag |= IN_EA_LOCKED;
 1225         VI_UNLOCK(vp);
 1226 }
 1227 
 1228 static void
 1229 ffs_unlock_ea(struct vnode *vp)
 1230 {
 1231         struct inode *ip;
 1232 
 1233         ip = VTOI(vp);
 1234         VI_LOCK(vp);
 1235         if (ip->i_flag & IN_EA_LOCKWAIT)
 1236                 wakeup(&ip->i_ea_refs);
 1237         ip->i_flag &= ~(IN_EA_LOCKED | IN_EA_LOCKWAIT);
 1238         VI_UNLOCK(vp);
 1239 }
 1240 
 1241 static int
 1242 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
 1243 {
 1244         struct inode *ip;
 1245         struct ufs2_dinode *dp;
 1246         int error;
 1247 
 1248         ip = VTOI(vp);
 1249 
 1250         ffs_lock_ea(vp);
 1251         if (ip->i_ea_area != NULL) {
 1252                 ip->i_ea_refs++;
 1253                 ffs_unlock_ea(vp);
 1254                 return (0);
 1255         }
 1256         dp = ip->i_din2;
 1257         error = ffs_rdextattr(&ip->i_ea_area, vp, td);
 1258         if (error) {
 1259                 ffs_unlock_ea(vp);
 1260                 return (error);
 1261         }
 1262         ip->i_ea_len = dp->di_extsize;
 1263         ip->i_ea_error = 0;
 1264         ip->i_ea_refs++;
 1265         ffs_unlock_ea(vp);
 1266         return (0);
 1267 }
 1268 
 1269 /*
 1270  * Vnode extattr transaction commit/abort
 1271  */
 1272 static int
 1273 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
 1274 {
 1275         struct inode *ip;
 1276         struct uio luio;
 1277         struct iovec liovec;
 1278         int error;
 1279         struct ufs2_dinode *dp;
 1280 
 1281         ip = VTOI(vp);
 1282 
 1283         ffs_lock_ea(vp);
 1284         if (ip->i_ea_area == NULL) {
 1285                 ffs_unlock_ea(vp);
 1286                 return (EINVAL);
 1287         }
 1288         dp = ip->i_din2;
 1289         error = ip->i_ea_error;
 1290         if (commit && error == 0) {
 1291                 ASSERT_VOP_ELOCKED(vp, "ffs_close_ea commit");
 1292                 if (cred == NOCRED)
 1293                         cred =  vp->v_mount->mnt_cred;
 1294                 liovec.iov_base = ip->i_ea_area;
 1295                 liovec.iov_len = ip->i_ea_len;
 1296                 luio.uio_iov = &liovec;
 1297                 luio.uio_iovcnt = 1;
 1298                 luio.uio_offset = 0;
 1299                 luio.uio_resid = ip->i_ea_len;
 1300                 luio.uio_segflg = UIO_SYSSPACE;
 1301                 luio.uio_rw = UIO_WRITE;
 1302                 luio.uio_td = td;
 1303                 /* XXX: I'm not happy about truncating to zero size */
 1304                 if (ip->i_ea_len < dp->di_extsize)
 1305                         error = ffs_truncate(vp, 0, IO_EXT, cred);
 1306                 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
 1307         }
 1308         if (--ip->i_ea_refs == 0) {
 1309                 free(ip->i_ea_area, M_TEMP);
 1310                 ip->i_ea_area = NULL;
 1311                 ip->i_ea_len = 0;
 1312                 ip->i_ea_error = 0;
 1313         }
 1314         ffs_unlock_ea(vp);
 1315         return (error);
 1316 }
 1317 
 1318 /*
 1319  * Vnode extattr strategy routine for fifos.
 1320  *
 1321  * We need to check for a read or write of the external attributes.
 1322  * Otherwise we just fall through and do the usual thing.
 1323  */
 1324 static int
 1325 ffsext_strategy(struct vop_strategy_args *ap)
 1326 /*
 1327 struct vop_strategy_args {
 1328         struct vnodeop_desc *a_desc;
 1329         struct vnode *a_vp;
 1330         struct buf *a_bp;
 1331 };
 1332 */
 1333 {
 1334         struct vnode *vp;
 1335         daddr_t lbn;
 1336 
 1337         vp = ap->a_vp;
 1338         lbn = ap->a_bp->b_lblkno;
 1339         if (I_IS_UFS2(VTOI(vp)) && lbn < 0 && lbn >= -UFS_NXADDR)
 1340                 return (VOP_STRATEGY_APV(&ufs_vnodeops, ap));
 1341         if (vp->v_type == VFIFO)
 1342                 return (VOP_STRATEGY_APV(&ufs_fifoops, ap));
 1343         panic("spec nodes went here");
 1344 }
 1345 
 1346 /*
 1347  * Vnode extattr transaction commit/abort
 1348  */
 1349 static int
 1350 ffs_openextattr(struct vop_openextattr_args *ap)
 1351 /*
 1352 struct vop_openextattr_args {
 1353         struct vnodeop_desc *a_desc;
 1354         struct vnode *a_vp;
 1355         IN struct ucred *a_cred;
 1356         IN struct thread *a_td;
 1357 };
 1358 */
 1359 {
 1360 
 1361         if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 1362                 return (EOPNOTSUPP);
 1363 
 1364         return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
 1365 }
 1366 
 1367 
 1368 /*
 1369  * Vnode extattr transaction commit/abort
 1370  */
 1371 static int
 1372 ffs_closeextattr(struct vop_closeextattr_args *ap)
 1373 /*
 1374 struct vop_closeextattr_args {
 1375         struct vnodeop_desc *a_desc;
 1376         struct vnode *a_vp;
 1377         int a_commit;
 1378         IN struct ucred *a_cred;
 1379         IN struct thread *a_td;
 1380 };
 1381 */
 1382 {
 1383 
 1384         if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 1385                 return (EOPNOTSUPP);
 1386 
 1387         if (ap->a_commit && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY))
 1388                 return (EROFS);
 1389 
 1390         return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td));
 1391 }
 1392 
 1393 /*
 1394  * Vnode operation to remove a named attribute.
 1395  */
 1396 static int
 1397 ffs_deleteextattr(struct vop_deleteextattr_args *ap)
 1398 /*
 1399 vop_deleteextattr {
 1400         IN struct vnode *a_vp;
 1401         IN int a_attrnamespace;
 1402         IN const char *a_name;
 1403         IN struct ucred *a_cred;
 1404         IN struct thread *a_td;
 1405 };
 1406 */
 1407 {
 1408         struct inode *ip;
 1409         struct extattr *eap;
 1410         uint32_t ul;
 1411         int olen, error, i, easize;
 1412         u_char *eae;
 1413         void *tmp;
 1414 
 1415         ip = VTOI(ap->a_vp);
 1416 
 1417         if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 1418                 return (EOPNOTSUPP);
 1419 
 1420         if (strlen(ap->a_name) == 0)
 1421                 return (EINVAL);
 1422 
 1423         if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
 1424                 return (EROFS);
 1425 
 1426         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 1427             ap->a_cred, ap->a_td, VWRITE);
 1428         if (error) {
 1429 
 1430                 /*
 1431                  * ffs_lock_ea is not needed there, because the vnode
 1432                  * must be exclusively locked.
 1433                  */
 1434                 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 1435                         ip->i_ea_error = error;
 1436                 return (error);
 1437         }
 1438 
 1439         error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 1440         if (error)
 1441                 return (error);
 1442 
 1443         /* CEM: delete could be done in-place instead */
 1444         eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
 1445         bcopy(ip->i_ea_area, eae, ip->i_ea_len);
 1446         easize = ip->i_ea_len;
 1447 
 1448         olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
 1449             &eap, NULL);
 1450         if (olen == -1) {
 1451                 /* delete but nonexistent */
 1452                 free(eae, M_TEMP);
 1453                 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1454                 return (ENOATTR);
 1455         }
 1456         ul = eap->ea_length;
 1457         i = (u_char *)EXTATTR_NEXT(eap) - eae;
 1458         bcopy(EXTATTR_NEXT(eap), eap, easize - i);
 1459         easize -= ul;
 1460 
 1461         tmp = ip->i_ea_area;
 1462         ip->i_ea_area = eae;
 1463         ip->i_ea_len = easize;
 1464         free(tmp, M_TEMP);
 1465         error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
 1466         return (error);
 1467 }
 1468 
 1469 /*
 1470  * Vnode operation to retrieve a named extended attribute.
 1471  */
 1472 static int
 1473 ffs_getextattr(struct vop_getextattr_args *ap)
 1474 /*
 1475 vop_getextattr {
 1476         IN struct vnode *a_vp;
 1477         IN int a_attrnamespace;
 1478         IN const char *a_name;
 1479         INOUT struct uio *a_uio;
 1480         OUT size_t *a_size;
 1481         IN struct ucred *a_cred;
 1482         IN struct thread *a_td;
 1483 };
 1484 */
 1485 {
 1486         struct inode *ip;
 1487         u_char *eae, *p;
 1488         unsigned easize;
 1489         int error, ealen;
 1490 
 1491         ip = VTOI(ap->a_vp);
 1492 
 1493         if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 1494                 return (EOPNOTSUPP);
 1495 
 1496         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 1497             ap->a_cred, ap->a_td, VREAD);
 1498         if (error)
 1499                 return (error);
 1500 
 1501         error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 1502         if (error)
 1503                 return (error);
 1504 
 1505         eae = ip->i_ea_area;
 1506         easize = ip->i_ea_len;
 1507 
 1508         ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
 1509             NULL, &p);
 1510         if (ealen >= 0) {
 1511                 error = 0;
 1512                 if (ap->a_size != NULL)
 1513                         *ap->a_size = ealen;
 1514                 else if (ap->a_uio != NULL)
 1515                         error = uiomove(p, ealen, ap->a_uio);
 1516         } else
 1517                 error = ENOATTR;
 1518 
 1519         ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1520         return (error);
 1521 }
 1522 
 1523 /*
 1524  * Vnode operation to retrieve extended attributes on a vnode.
 1525  */
 1526 static int
 1527 ffs_listextattr(struct vop_listextattr_args *ap)
 1528 /*
 1529 vop_listextattr {
 1530         IN struct vnode *a_vp;
 1531         IN int a_attrnamespace;
 1532         INOUT struct uio *a_uio;
 1533         OUT size_t *a_size;
 1534         IN struct ucred *a_cred;
 1535         IN struct thread *a_td;
 1536 };
 1537 */
 1538 {
 1539         struct inode *ip;
 1540         struct extattr *eap, *eaend;
 1541         int error, ealen;
 1542 
 1543         ip = VTOI(ap->a_vp);
 1544 
 1545         if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 1546                 return (EOPNOTSUPP);
 1547 
 1548         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 1549             ap->a_cred, ap->a_td, VREAD);
 1550         if (error)
 1551                 return (error);
 1552 
 1553         error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 1554         if (error)
 1555                 return (error);
 1556 
 1557         error = 0;
 1558         if (ap->a_size != NULL)
 1559                 *ap->a_size = 0;
 1560 
 1561         KASSERT(ALIGNED_TO(ip->i_ea_area, struct extattr), ("unaligned"));
 1562         eap = (struct extattr *)ip->i_ea_area;
 1563         eaend = (struct extattr *)(ip->i_ea_area + ip->i_ea_len);
 1564         for (; error == 0 && eap < eaend; eap = EXTATTR_NEXT(eap)) {
 1565                 KASSERT(EXTATTR_NEXT(eap) <= eaend,
 1566                     ("extattr next %p beyond %p", EXTATTR_NEXT(eap), eaend));
 1567                 if (eap->ea_namespace != ap->a_attrnamespace)
 1568                         continue;
 1569 
 1570                 ealen = eap->ea_namelength;
 1571                 if (ap->a_size != NULL)
 1572                         *ap->a_size += ealen + 1;
 1573                 else if (ap->a_uio != NULL)
 1574                         error = uiomove(&eap->ea_namelength, ealen + 1,
 1575                             ap->a_uio);
 1576         }
 1577 
 1578         ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1579         return (error);
 1580 }
 1581 
 1582 /*
 1583  * Vnode operation to set a named attribute.
 1584  */
 1585 static int
 1586 ffs_setextattr(struct vop_setextattr_args *ap)
 1587 /*
 1588 vop_setextattr {
 1589         IN struct vnode *a_vp;
 1590         IN int a_attrnamespace;
 1591         IN const char *a_name;
 1592         INOUT struct uio *a_uio;
 1593         IN struct ucred *a_cred;
 1594         IN struct thread *a_td;
 1595 };
 1596 */
 1597 {
 1598         struct inode *ip;
 1599         struct fs *fs;
 1600         struct extattr *eap;
 1601         uint32_t ealength, ul;
 1602         ssize_t ealen;
 1603         int olen, eapad1, eapad2, error, i, easize;
 1604         u_char *eae;
 1605         void *tmp;
 1606 
 1607         ip = VTOI(ap->a_vp);
 1608         fs = ITOFS(ip);
 1609 
 1610         if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 1611                 return (EOPNOTSUPP);
 1612 
 1613         if (strlen(ap->a_name) == 0)
 1614                 return (EINVAL);
 1615 
 1616         /* XXX Now unsupported API to delete EAs using NULL uio. */
 1617         if (ap->a_uio == NULL)
 1618                 return (EOPNOTSUPP);
 1619 
 1620         if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
 1621                 return (EROFS);
 1622 
 1623         ealen = ap->a_uio->uio_resid;
 1624         if (ealen < 0 || ealen > lblktosize(fs, UFS_NXADDR))
 1625                 return (EINVAL);
 1626 
 1627         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 1628             ap->a_cred, ap->a_td, VWRITE);
 1629         if (error) {
 1630 
 1631                 /*
 1632                  * ffs_lock_ea is not needed there, because the vnode
 1633                  * must be exclusively locked.
 1634                  */
 1635                 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 1636                         ip->i_ea_error = error;
 1637                 return (error);
 1638         }
 1639 
 1640         error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 1641         if (error)
 1642                 return (error);
 1643 
 1644         ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
 1645         eapad1 = roundup2(ealength, 8) - ealength;
 1646         eapad2 = roundup2(ealen, 8) - ealen;
 1647         ealength += eapad1 + ealen + eapad2;
 1648 
 1649         /*
 1650          * CEM: rewrites of the same size or smaller could be done in-place
 1651          * instead.  (We don't acquire any fine-grained locks in here either,
 1652          * so we could also do bigger writes in-place.)
 1653          */
 1654         eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
 1655         bcopy(ip->i_ea_area, eae, ip->i_ea_len);
 1656         easize = ip->i_ea_len;
 1657 
 1658         olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
 1659             &eap, NULL);
 1660         if (olen == -1) {
 1661                 /* new, append at end */
 1662                 KASSERT(ALIGNED_TO(eae + easize, struct extattr),
 1663                     ("unaligned"));
 1664                 eap = (struct extattr *)(eae + easize);
 1665                 easize += ealength;
 1666         } else {
 1667                 ul = eap->ea_length;
 1668                 i = (u_char *)EXTATTR_NEXT(eap) - eae;
 1669                 if (ul != ealength) {
 1670                         bcopy(EXTATTR_NEXT(eap), (u_char *)eap + ealength,
 1671                             easize - i);
 1672                         easize += (ealength - ul);
 1673                 }
 1674         }
 1675         if (easize > lblktosize(fs, UFS_NXADDR)) {
 1676                 free(eae, M_TEMP);
 1677                 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1678                 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 1679                         ip->i_ea_error = ENOSPC;
 1680                 return (ENOSPC);
 1681         }
 1682         eap->ea_length = ealength;
 1683         eap->ea_namespace = ap->a_attrnamespace;
 1684         eap->ea_contentpadlen = eapad2;
 1685         eap->ea_namelength = strlen(ap->a_name);
 1686         memcpy(eap->ea_name, ap->a_name, strlen(ap->a_name));
 1687         bzero(&eap->ea_name[strlen(ap->a_name)], eapad1);
 1688         error = uiomove(EXTATTR_CONTENT(eap), ealen, ap->a_uio);
 1689         if (error) {
 1690                 free(eae, M_TEMP);
 1691                 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1692                 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 1693                         ip->i_ea_error = error;
 1694                 return (error);
 1695         }
 1696         bzero((u_char *)EXTATTR_CONTENT(eap) + ealen, eapad2);
 1697 
 1698         tmp = ip->i_ea_area;
 1699         ip->i_ea_area = eae;
 1700         ip->i_ea_len = easize;
 1701         free(tmp, M_TEMP);
 1702         error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
 1703         return (error);
 1704 }
 1705 
 1706 /*
 1707  * Vnode pointer to File handle
 1708  */
 1709 static int
 1710 ffs_vptofh(struct vop_vptofh_args *ap)
 1711 /*
 1712 vop_vptofh {
 1713         IN struct vnode *a_vp;
 1714         IN struct fid *a_fhp;
 1715 };
 1716 */
 1717 {
 1718         struct inode *ip;
 1719         struct ufid *ufhp;
 1720 
 1721         ip = VTOI(ap->a_vp);
 1722         ufhp = (struct ufid *)ap->a_fhp;
 1723         ufhp->ufid_len = sizeof(struct ufid);
 1724         ufhp->ufid_ino = ip->i_number;
 1725         ufhp->ufid_gen = ip->i_gen;
 1726         return (0);
 1727 }
 1728 
 1729 SYSCTL_DECL(_vfs_ffs);
 1730 static int use_buf_pager = 1;
 1731 SYSCTL_INT(_vfs_ffs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0,
 1732     "Always use buffer pager instead of bmap");
 1733 
 1734 static daddr_t
 1735 ffs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
 1736 {
 1737 
 1738         return (lblkno(VFSTOUFS(vp->v_mount)->um_fs, off));
 1739 }
 1740 
 1741 static int
 1742 ffs_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *sz)
 1743 {
 1744 
 1745         *sz = blksize(VFSTOUFS(vp->v_mount)->um_fs, VTOI(vp), lbn);
 1746         return (0);
 1747 }
 1748 
 1749 static int
 1750 ffs_getpages(struct vop_getpages_args *ap)
 1751 {
 1752         struct vnode *vp;
 1753         struct ufsmount *um;
 1754 
 1755         vp = ap->a_vp;
 1756         um = VFSTOUFS(vp->v_mount);
 1757 
 1758         if (!use_buf_pager && um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE)
 1759                 return (vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
 1760                     ap->a_rbehind, ap->a_rahead, NULL, NULL));
 1761         return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
 1762             ap->a_rahead, ffs_gbp_getblkno, ffs_gbp_getblksz));
 1763 }
 1764 
 1765 static int
 1766 ffs_getpages_async(struct vop_getpages_async_args *ap)
 1767 {
 1768         struct vnode *vp;
 1769         struct ufsmount *um;
 1770         bool do_iodone;
 1771         int error;
 1772 
 1773         vp = ap->a_vp;
 1774         um = VFSTOUFS(vp->v_mount);
 1775         do_iodone = true;
 1776 
 1777         if (um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE) {
 1778                 error = vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
 1779                     ap->a_rbehind, ap->a_rahead, ap->a_iodone, ap->a_arg);
 1780                 if (error == 0)
 1781                         do_iodone = false;
 1782         } else {
 1783                 error = vfs_bio_getpages(vp, ap->a_m, ap->a_count,
 1784                     ap->a_rbehind, ap->a_rahead, ffs_gbp_getblkno,
 1785                     ffs_gbp_getblksz);
 1786         }
 1787         if (do_iodone && ap->a_iodone != NULL)
 1788                 ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error);
 1789 
 1790         return (error);
 1791 }
 1792 

Cache object: 71de4a3026794d4154388592bd646844


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.