The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ufs/ffs/ffs_vnops.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
    3  * All rights reserved.
    4  *
    5  * This software was developed for the FreeBSD Project by Marshall
    6  * Kirk McKusick and Network Associates Laboratories, the Security
    7  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
    8  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
    9  * research program
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  * Copyright (c) 1982, 1986, 1989, 1993
   33  *      The Regents of the University of California.  All rights reserved.
   34  *
   35  * Redistribution and use in source and binary forms, with or without
   36  * modification, are permitted provided that the following conditions
   37  * are met:
   38  * 1. Redistributions of source code must retain the above copyright
   39  *    notice, this list of conditions and the following disclaimer.
   40  * 2. Redistributions in binary form must reproduce the above copyright
   41  *    notice, this list of conditions and the following disclaimer in the
   42  *    documentation and/or other materials provided with the distribution.
   43  * 4. Neither the name of the University nor the names of its contributors
   44  *    may be used to endorse or promote products derived from this software
   45  *    without specific prior written permission.
   46  *
   47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   57  * SUCH DAMAGE.
   58  *
   59  *      from: @(#)ufs_readwrite.c       8.11 (Berkeley) 5/8/95
   60  * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
   61  *      @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95
   62  */
   63 
   64 #include <sys/cdefs.h>
   65 __FBSDID("$FreeBSD: releng/6.0/sys/ufs/ffs/ffs_vnops.c 151853 2005-10-29 07:00:45Z scottl $");
   66 
   67 #include <sys/param.h>
   68 #include <sys/bio.h>
   69 #include <sys/systm.h>
   70 #include <sys/buf.h>
   71 #include <sys/conf.h>
   72 #include <sys/extattr.h>
   73 #include <sys/kernel.h>
   74 #include <sys/limits.h>
   75 #include <sys/malloc.h>
   76 #include <sys/mount.h>
   77 #include <sys/proc.h>
   78 #include <sys/resourcevar.h>
   79 #include <sys/signalvar.h>
   80 #include <sys/stat.h>
   81 #include <sys/vmmeter.h>
   82 #include <sys/vnode.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_extern.h>
   86 #include <vm/vm_object.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/vm_pager.h>
   89 #include <vm/vnode_pager.h>
   90 
   91 #include <ufs/ufs/extattr.h>
   92 #include <ufs/ufs/quota.h>
   93 #include <ufs/ufs/inode.h>
   94 #include <ufs/ufs/ufs_extern.h>
   95 #include <ufs/ufs/ufsmount.h>
   96 
   97 #include <ufs/ffs/fs.h>
   98 #include <ufs/ffs/ffs_extern.h>
   99 #include "opt_directio.h"
  100 #include "opt_ffs.h"
  101 
  102 #ifdef DIRECTIO
  103 extern int      ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
  104 #endif
  105 static vop_fsync_t      ffs_fsync;
  106 static vop_lock_t       ffs_lock;
  107 static vop_getpages_t   ffs_getpages;
  108 static vop_read_t       ffs_read;
  109 static vop_write_t      ffs_write;
  110 static int      ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
  111 static int      ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
  112                     struct ucred *cred);
  113 static vop_strategy_t   ffsext_strategy;
  114 static vop_closeextattr_t       ffs_closeextattr;
  115 static vop_deleteextattr_t      ffs_deleteextattr;
  116 static vop_getextattr_t ffs_getextattr;
  117 static vop_listextattr_t        ffs_listextattr;
  118 static vop_openextattr_t        ffs_openextattr;
  119 static vop_setextattr_t ffs_setextattr;
  120 
  121 
  122 /* Global vfs data structures for ufs. */
  123 struct vop_vector ffs_vnodeops1 = {
  124         .vop_default =          &ufs_vnodeops,
  125         .vop_fsync =            ffs_fsync,
  126         .vop_getpages =         ffs_getpages,
  127         .vop_lock =             ffs_lock,
  128         .vop_read =             ffs_read,
  129         .vop_reallocblks =      ffs_reallocblks,
  130         .vop_write =            ffs_write,
  131 };
  132 
  133 struct vop_vector ffs_fifoops1 = {
  134         .vop_default =          &ufs_fifoops,
  135         .vop_fsync =            ffs_fsync,
  136         .vop_reallocblks =      ffs_reallocblks, /* XXX: really ??? */
  137 };
  138 
  139 /* Global vfs data structures for ufs. */
  140 struct vop_vector ffs_vnodeops2 = {
  141         .vop_default =          &ufs_vnodeops,
  142         .vop_fsync =            ffs_fsync,
  143         .vop_getpages =         ffs_getpages,
  144         .vop_lock =             ffs_lock,
  145         .vop_read =             ffs_read,
  146         .vop_reallocblks =      ffs_reallocblks,
  147         .vop_write =            ffs_write,
  148         .vop_closeextattr =     ffs_closeextattr,
  149         .vop_deleteextattr =    ffs_deleteextattr,
  150         .vop_getextattr =       ffs_getextattr,
  151         .vop_listextattr =      ffs_listextattr,
  152         .vop_openextattr =      ffs_openextattr,
  153         .vop_setextattr =       ffs_setextattr,
  154 };
  155 
  156 struct vop_vector ffs_fifoops2 = {
  157         .vop_default =          &ufs_fifoops,
  158         .vop_fsync =            ffs_fsync,
  159         .vop_lock =             ffs_lock,
  160         .vop_reallocblks =      ffs_reallocblks,
  161         .vop_strategy =         ffsext_strategy,
  162         .vop_closeextattr =     ffs_closeextattr,
  163         .vop_deleteextattr =    ffs_deleteextattr,
  164         .vop_getextattr =       ffs_getextattr,
  165         .vop_listextattr =      ffs_listextattr,
  166         .vop_openextattr =      ffs_openextattr,
  167         .vop_setextattr =       ffs_setextattr,
  168 };
  169 
  170 /*
  171  * Synch an open file.
  172  */
  173 /* ARGSUSED */
  174 static int
  175 ffs_fsync(struct vop_fsync_args *ap)
  176 {
  177         int error;
  178 
  179         error = ffs_syncvnode(ap->a_vp, ap->a_waitfor);
  180         if (error)
  181                 return (error);
  182         if (ap->a_waitfor == MNT_WAIT &&
  183             (ap->a_vp->v_mount->mnt_flag & MNT_SOFTDEP))
  184                 error = softdep_fsync(ap->a_vp);
  185         return (error);
  186 }
  187 
  188 int
  189 ffs_syncvnode(struct vnode *vp, int waitfor)
  190 {
  191         struct inode *ip = VTOI(vp);
  192         struct buf *bp;
  193         struct buf *nbp;
  194         int s, error, wait, passes, skipmeta;
  195         ufs_lbn_t lbn;
  196 
  197         wait = (waitfor == MNT_WAIT);
  198         lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
  199 
  200         /*
  201          * Flush all dirty buffers associated with a vnode.
  202          */
  203         passes = NIADDR + 1;
  204         skipmeta = 0;
  205         if (wait)
  206                 skipmeta = 1;
  207         s = splbio();
  208         VI_LOCK(vp);
  209 loop:
  210         TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs)
  211                 bp->b_vflags &= ~BV_SCANNED;
  212         TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
  213                 /* 
  214                  * Reasons to skip this buffer: it has already been considered
  215                  * on this pass, this pass is the first time through on a
  216                  * synchronous flush request and the buffer being considered
  217                  * is metadata, the buffer has dependencies that will cause
  218                  * it to be redirtied and it has not already been deferred,
  219                  * or it is already being written.
  220                  */
  221                 if ((bp->b_vflags & BV_SCANNED) != 0)
  222                         continue;
  223                 bp->b_vflags |= BV_SCANNED;
  224                 if ((skipmeta == 1 && bp->b_lblkno < 0))
  225                         continue;
  226                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
  227                         continue;
  228                 VI_UNLOCK(vp);
  229                 if (!wait && LIST_FIRST(&bp->b_dep) != NULL &&
  230                     (bp->b_flags & B_DEFERRED) == 0 &&
  231                     buf_countdeps(bp, 0)) {
  232                         bp->b_flags |= B_DEFERRED;
  233                         BUF_UNLOCK(bp);
  234                         VI_LOCK(vp);
  235                         continue;
  236                 }
  237                 if ((bp->b_flags & B_DELWRI) == 0)
  238                         panic("ffs_fsync: not dirty");
  239                 /*
  240                  * If this is a synchronous flush request, or it is not a
  241                  * file or device, start the write on this buffer immediatly.
  242                  */
  243                 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) {
  244 
  245                         /*
  246                          * On our final pass through, do all I/O synchronously
  247                          * so that we can find out if our flush is failing
  248                          * because of write errors.
  249                          */
  250                         if (passes > 0 || !wait) {
  251                                 if ((bp->b_flags & B_CLUSTEROK) && !wait) {
  252                                         (void) vfs_bio_awrite(bp);
  253                                 } else {
  254                                         bremfree(bp);
  255                                         splx(s);
  256                                         (void) bawrite(bp);
  257                                         s = splbio();
  258                                 }
  259                         } else {
  260                                 bremfree(bp);
  261                                 splx(s);
  262                                 if ((error = bwrite(bp)) != 0)
  263                                         return (error);
  264                                 s = splbio();
  265                         }
  266                 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) {
  267                         /* 
  268                          * If the buffer is for data that has been truncated
  269                          * off the file, then throw it away.
  270                          */
  271                         bremfree(bp);
  272                         bp->b_flags |= B_INVAL | B_NOCACHE;
  273                         splx(s);
  274                         brelse(bp);
  275                         s = splbio();
  276                 } else
  277                         vfs_bio_awrite(bp);
  278 
  279                 /*
  280                  * Since we may have slept during the I/O, we need 
  281                  * to start from a known point.
  282                  */
  283                 VI_LOCK(vp);
  284                 nbp = TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd);
  285         }
  286         /*
  287          * If we were asked to do this synchronously, then go back for
  288          * another pass, this time doing the metadata.
  289          */
  290         if (skipmeta) {
  291                 skipmeta = 0;
  292                 goto loop;
  293         }
  294 
  295         if (wait) {
  296                 bufobj_wwait(&vp->v_bufobj, 3, 0);
  297                 VI_UNLOCK(vp);
  298 
  299                 /* 
  300                  * Ensure that any filesystem metatdata associated
  301                  * with the vnode has been written.
  302                  */
  303                 splx(s);
  304                 if ((error = softdep_sync_metadata(vp)) != 0)
  305                         return (error);
  306                 s = splbio();
  307 
  308                 VI_LOCK(vp);
  309                 if (vp->v_bufobj.bo_dirty.bv_cnt > 0) {
  310                         /*
  311                          * Block devices associated with filesystems may
  312                          * have new I/O requests posted for them even if
  313                          * the vnode is locked, so no amount of trying will
  314                          * get them clean. Thus we give block devices a
  315                          * good effort, then just give up. For all other file
  316                          * types, go around and try again until it is clean.
  317                          */
  318                         if (passes > 0) {
  319                                 passes -= 1;
  320                                 goto loop;
  321                         }
  322 #ifdef DIAGNOSTIC
  323                         if (!vn_isdisk(vp, NULL))
  324                                 vprint("ffs_fsync: dirty", vp);
  325 #endif
  326                 }
  327         }
  328         VI_UNLOCK(vp);
  329         splx(s);
  330         return (ffs_update(vp, wait));
  331 }
  332 
  333 static int
  334 ffs_lock(ap)
  335         struct vop_lock_args /* {
  336                 struct vnode *a_vp;
  337                 int a_flags;
  338                 struct thread *a_td;
  339         } */ *ap;
  340 {
  341         return (VOP_LOCK_APV(&ufs_vnodeops, ap));
  342 }
  343 
  344 /*
  345  * Vnode op for reading.
  346  */
  347 /* ARGSUSED */
  348 static int
  349 ffs_read(ap)
  350         struct vop_read_args /* {
  351                 struct vnode *a_vp;
  352                 struct uio *a_uio;
  353                 int a_ioflag;
  354                 struct ucred *a_cred;
  355         } */ *ap;
  356 {
  357         struct vnode *vp;
  358         struct inode *ip;
  359         struct uio *uio;
  360         struct fs *fs;
  361         struct buf *bp;
  362         ufs_lbn_t lbn, nextlbn;
  363         off_t bytesinfile;
  364         long size, xfersize, blkoffset;
  365         int error, orig_resid;
  366         int seqcount;
  367         int ioflag;
  368 
  369         vp = ap->a_vp;
  370         uio = ap->a_uio;
  371         ioflag = ap->a_ioflag;
  372         if (ap->a_ioflag & IO_EXT)
  373 #ifdef notyet
  374                 return (ffs_extread(vp, uio, ioflag));
  375 #else
  376                 panic("ffs_read+IO_EXT");
  377 #endif
  378 #ifdef DIRECTIO
  379         if ((ioflag & IO_DIRECT) != 0) {
  380                 int workdone;
  381 
  382                 error = ffs_rawread(vp, uio, &workdone);
  383                 if (error != 0 || workdone != 0)
  384                         return error;
  385         }
  386 #endif
  387 
  388         seqcount = ap->a_ioflag >> IO_SEQSHIFT;
  389         ip = VTOI(vp);
  390 
  391 #ifdef DIAGNOSTIC
  392         if (uio->uio_rw != UIO_READ)
  393                 panic("ffs_read: mode");
  394 
  395         if (vp->v_type == VLNK) {
  396                 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
  397                         panic("ffs_read: short symlink");
  398         } else if (vp->v_type != VREG && vp->v_type != VDIR)
  399                 panic("ffs_read: type %d",  vp->v_type);
  400 #endif
  401         orig_resid = uio->uio_resid;
  402         KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0"));
  403         if (orig_resid == 0)
  404                 return (0);
  405         KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0"));
  406         fs = ip->i_fs;
  407         if (uio->uio_offset < ip->i_size &&
  408             uio->uio_offset >= fs->fs_maxfilesize)
  409                 return (EOVERFLOW);
  410 
  411         for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
  412                 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
  413                         break;
  414                 lbn = lblkno(fs, uio->uio_offset);
  415                 nextlbn = lbn + 1;
  416 
  417                 /*
  418                  * size of buffer.  The buffer representing the
  419                  * end of the file is rounded up to the size of
  420                  * the block type ( fragment or full block, 
  421                  * depending ).
  422                  */
  423                 size = blksize(fs, ip, lbn);
  424                 blkoffset = blkoff(fs, uio->uio_offset);
  425                 
  426                 /*
  427                  * The amount we want to transfer in this iteration is
  428                  * one FS block less the amount of the data before
  429                  * our startpoint (duh!)
  430                  */
  431                 xfersize = fs->fs_bsize - blkoffset;
  432 
  433                 /*
  434                  * But if we actually want less than the block,
  435                  * or the file doesn't have a whole block more of data,
  436                  * then use the lesser number.
  437                  */
  438                 if (uio->uio_resid < xfersize)
  439                         xfersize = uio->uio_resid;
  440                 if (bytesinfile < xfersize)
  441                         xfersize = bytesinfile;
  442 
  443                 if (lblktosize(fs, nextlbn) >= ip->i_size) {
  444                         /*
  445                          * Don't do readahead if this is the end of the file.
  446                          */
  447                         error = bread(vp, lbn, size, NOCRED, &bp);
  448                 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
  449                         /* 
  450                          * Otherwise if we are allowed to cluster,
  451                          * grab as much as we can.
  452                          *
  453                          * XXX  This may not be a win if we are not
  454                          * doing sequential access.
  455                          */
  456                         error = cluster_read(vp, ip->i_size, lbn,
  457                                 size, NOCRED, blkoffset + uio->uio_resid, seqcount, &bp);
  458                 } else if (seqcount > 1) {
  459                         /*
  460                          * If we are NOT allowed to cluster, then
  461                          * if we appear to be acting sequentially,
  462                          * fire off a request for a readahead
  463                          * as well as a read. Note that the 4th and 5th
  464                          * arguments point to arrays of the size specified in
  465                          * the 6th argument.
  466                          */
  467                         int nextsize = blksize(fs, ip, nextlbn);
  468                         error = breadn(vp, lbn,
  469                             size, &nextlbn, &nextsize, 1, NOCRED, &bp);
  470                 } else {
  471                         /*
  472                          * Failing all of the above, just read what the 
  473                          * user asked for. Interestingly, the same as
  474                          * the first option above.
  475                          */
  476                         error = bread(vp, lbn, size, NOCRED, &bp);
  477                 }
  478                 if (error) {
  479                         brelse(bp);
  480                         bp = NULL;
  481                         break;
  482                 }
  483 
  484                 /*
  485                  * If IO_DIRECT then set B_DIRECT for the buffer.  This
  486                  * will cause us to attempt to release the buffer later on
  487                  * and will cause the buffer cache to attempt to free the
  488                  * underlying pages.
  489                  */
  490                 if (ioflag & IO_DIRECT)
  491                         bp->b_flags |= B_DIRECT;
  492 
  493                 /*
  494                  * We should only get non-zero b_resid when an I/O error
  495                  * has occurred, which should cause us to break above.
  496                  * However, if the short read did not cause an error,
  497                  * then we want to ensure that we do not uiomove bad
  498                  * or uninitialized data.
  499                  */
  500                 size -= bp->b_resid;
  501                 if (size < xfersize) {
  502                         if (size == 0)
  503                                 break;
  504                         xfersize = size;
  505                 }
  506 
  507                 error = uiomove((char *)bp->b_data + blkoffset,
  508                     (int)xfersize, uio);
  509                 if (error)
  510                         break;
  511 
  512                 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
  513                    (LIST_FIRST(&bp->b_dep) == NULL)) {
  514                         /*
  515                          * If there are no dependencies, and it's VMIO,
  516                          * then we don't need the buf, mark it available
  517                          * for freeing. The VM has the data.
  518                          */
  519                         bp->b_flags |= B_RELBUF;
  520                         brelse(bp);
  521                 } else {
  522                         /*
  523                          * Otherwise let whoever
  524                          * made the request take care of
  525                          * freeing it. We just queue
  526                          * it onto another list.
  527                          */
  528                         bqrelse(bp);
  529                 }
  530         }
  531 
  532         /* 
  533          * This can only happen in the case of an error
  534          * because the loop above resets bp to NULL on each iteration
  535          * and on normal completion has not set a new value into it.
  536          * so it must have come from a 'break' statement
  537          */
  538         if (bp != NULL) {
  539                 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
  540                    (LIST_FIRST(&bp->b_dep) == NULL)) {
  541                         bp->b_flags |= B_RELBUF;
  542                         brelse(bp);
  543                 } else {
  544                         bqrelse(bp);
  545                 }
  546         }
  547 
  548         if ((error == 0 || uio->uio_resid != orig_resid) &&
  549             (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
  550                 ip->i_flag |= IN_ACCESS;
  551         return (error);
  552 }
  553 
  554 /*
  555  * Vnode op for writing.
  556  */
  557 static int
  558 ffs_write(ap)
  559         struct vop_write_args /* {
  560                 struct vnode *a_vp;
  561                 struct uio *a_uio;
  562                 int a_ioflag;
  563                 struct ucred *a_cred;
  564         } */ *ap;
  565 {
  566         struct vnode *vp;
  567         struct uio *uio;
  568         struct inode *ip;
  569         struct fs *fs;
  570         struct buf *bp;
  571         struct thread *td;
  572         ufs_lbn_t lbn;
  573         off_t osize;
  574         int seqcount;
  575         int blkoffset, error, flags, ioflag, resid, size, xfersize;
  576 
  577         vp = ap->a_vp;
  578         uio = ap->a_uio;
  579         ioflag = ap->a_ioflag;
  580         if (ap->a_ioflag & IO_EXT)
  581 #ifdef notyet
  582                 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
  583 #else
  584                 panic("ffs_write+IO_EXT");
  585 #endif
  586 
  587         seqcount = ap->a_ioflag >> IO_SEQSHIFT;
  588         ip = VTOI(vp);
  589 
  590 #ifdef DIAGNOSTIC
  591         if (uio->uio_rw != UIO_WRITE)
  592                 panic("ffs_write: mode");
  593 #endif
  594 
  595         switch (vp->v_type) {
  596         case VREG:
  597                 if (ioflag & IO_APPEND)
  598                         uio->uio_offset = ip->i_size;
  599                 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
  600                         return (EPERM);
  601                 /* FALLTHROUGH */
  602         case VLNK:
  603                 break;
  604         case VDIR:
  605                 panic("ffs_write: dir write");
  606                 break;
  607         default:
  608                 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
  609                         (int)uio->uio_offset,
  610                         (int)uio->uio_resid
  611                 );
  612         }
  613 
  614         KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0"));
  615         KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0"));
  616         fs = ip->i_fs;
  617         if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
  618                 return (EFBIG);
  619         /*
  620          * Maybe this should be above the vnode op call, but so long as
  621          * file servers have no limits, I don't think it matters.
  622          */
  623         td = uio->uio_td;
  624         if (vp->v_type == VREG && td != NULL) {
  625                 PROC_LOCK(td->td_proc);
  626                 if (uio->uio_offset + uio->uio_resid >
  627                     lim_cur(td->td_proc, RLIMIT_FSIZE)) {
  628                         psignal(td->td_proc, SIGXFSZ);
  629                         PROC_UNLOCK(td->td_proc);
  630                         return (EFBIG);
  631                 }
  632                 PROC_UNLOCK(td->td_proc);
  633         }
  634 
  635         resid = uio->uio_resid;
  636         osize = ip->i_size;
  637         if (seqcount > BA_SEQMAX)
  638                 flags = BA_SEQMAX << BA_SEQSHIFT;
  639         else
  640                 flags = seqcount << BA_SEQSHIFT;
  641         if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
  642                 flags |= IO_SYNC;
  643 
  644         for (error = 0; uio->uio_resid > 0;) {
  645                 lbn = lblkno(fs, uio->uio_offset);
  646                 blkoffset = blkoff(fs, uio->uio_offset);
  647                 xfersize = fs->fs_bsize - blkoffset;
  648                 if (uio->uio_resid < xfersize)
  649                         xfersize = uio->uio_resid;
  650                 if (uio->uio_offset + xfersize > ip->i_size)
  651                         vnode_pager_setsize(vp, uio->uio_offset + xfersize);
  652 
  653                 /*      
  654                  * We must perform a read-before-write if the transfer size
  655                  * does not cover the entire buffer.
  656                  */
  657                 if (fs->fs_bsize > xfersize)
  658                         flags |= BA_CLRBUF;
  659                 else
  660                         flags &= ~BA_CLRBUF;
  661 /* XXX is uio->uio_offset the right thing here? */
  662                 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
  663                     ap->a_cred, flags, &bp);
  664                 if (error != 0)
  665                         break;
  666                 /*
  667                  * If the buffer is not valid we have to clear out any
  668                  * garbage data from the pages instantiated for the buffer.
  669                  * If we do not, a failed uiomove() during a write can leave
  670                  * the prior contents of the pages exposed to a userland
  671                  * mmap().  XXX deal with uiomove() errors a better way.
  672                  */
  673                 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
  674                         vfs_bio_clrbuf(bp);
  675                 if (ioflag & IO_DIRECT)
  676                         bp->b_flags |= B_DIRECT;
  677                 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
  678                         bp->b_flags |= B_NOCACHE;
  679 
  680                 if (uio->uio_offset + xfersize > ip->i_size) {
  681                         ip->i_size = uio->uio_offset + xfersize;
  682                         DIP_SET(ip, i_size, ip->i_size);
  683                 }
  684 
  685                 size = blksize(fs, ip, lbn) - bp->b_resid;
  686                 if (size < xfersize)
  687                         xfersize = size;
  688 
  689                 error =
  690                     uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
  691                 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
  692                    (LIST_FIRST(&bp->b_dep) == NULL)) {
  693                         bp->b_flags |= B_RELBUF;
  694                 }
  695 
  696                 /*
  697                  * If IO_SYNC each buffer is written synchronously.  Otherwise
  698                  * if we have a severe page deficiency write the buffer 
  699                  * asynchronously.  Otherwise try to cluster, and if that
  700                  * doesn't do it then either do an async write (if O_DIRECT),
  701                  * or a delayed write (if not).
  702                  */
  703                 if (ioflag & IO_SYNC) {
  704                         (void)bwrite(bp);
  705                 } else if (vm_page_count_severe() ||
  706                             buf_dirty_count_severe() ||
  707                             (ioflag & IO_ASYNC)) {
  708                         bp->b_flags |= B_CLUSTEROK;
  709                         bawrite(bp);
  710                 } else if (xfersize + blkoffset == fs->fs_bsize) {
  711                         if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
  712                                 bp->b_flags |= B_CLUSTEROK;
  713                                 cluster_write(vp, bp, ip->i_size, seqcount);
  714                         } else {
  715                                 bawrite(bp);
  716                         }
  717                 } else if (ioflag & IO_DIRECT) {
  718                         bp->b_flags |= B_CLUSTEROK;
  719                         bawrite(bp);
  720                 } else {
  721                         bp->b_flags |= B_CLUSTEROK;
  722                         bdwrite(bp);
  723                 }
  724                 if (error || xfersize == 0)
  725                         break;
  726                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
  727         }
  728         /*
  729          * If we successfully wrote any data, and we are not the superuser
  730          * we clear the setuid and setgid bits as a precaution against
  731          * tampering.
  732          */
  733         if (resid > uio->uio_resid && ap->a_cred && 
  734             suser_cred(ap->a_cred, SUSER_ALLOWJAIL)) {
  735                 ip->i_mode &= ~(ISUID | ISGID);
  736                 DIP_SET(ip, i_mode, ip->i_mode);
  737         }
  738         if (error) {
  739                 if (ioflag & IO_UNIT) {
  740                         (void)ffs_truncate(vp, osize,
  741                             IO_NORMAL | (ioflag & IO_SYNC),
  742                             ap->a_cred, uio->uio_td);
  743                         uio->uio_offset -= resid - uio->uio_resid;
  744                         uio->uio_resid = resid;
  745                 }
  746         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
  747                 error = ffs_update(vp, 1);
  748         return (error);
  749 }
  750 
  751 /*
  752  * get page routine
  753  */
  754 static int
  755 ffs_getpages(ap)
  756         struct vop_getpages_args *ap;
  757 {
  758         int i;
  759         vm_page_t mreq;
  760         int pcount;
  761 
  762         pcount = round_page(ap->a_count) / PAGE_SIZE;
  763         mreq = ap->a_m[ap->a_reqpage];
  764 
  765         /*
  766          * if ANY DEV_BSIZE blocks are valid on a large filesystem block,
  767          * then the entire page is valid.  Since the page may be mapped,
  768          * user programs might reference data beyond the actual end of file
  769          * occuring within the page.  We have to zero that data.
  770          */
  771         VM_OBJECT_LOCK(mreq->object);
  772         if (mreq->valid) {
  773                 if (mreq->valid != VM_PAGE_BITS_ALL)
  774                         vm_page_zero_invalid(mreq, TRUE);
  775                 vm_page_lock_queues();
  776                 for (i = 0; i < pcount; i++) {
  777                         if (i != ap->a_reqpage) {
  778                                 vm_page_free(ap->a_m[i]);
  779                         }
  780                 }
  781                 vm_page_unlock_queues();
  782                 VM_OBJECT_UNLOCK(mreq->object);
  783                 return VM_PAGER_OK;
  784         }
  785         VM_OBJECT_UNLOCK(mreq->object);
  786 
  787         return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
  788                                             ap->a_count,
  789                                             ap->a_reqpage);
  790 }
  791 
  792 
  793 /*
  794  * Extended attribute area reading.
  795  */
  796 static int
  797 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
  798 {
  799         struct inode *ip;
  800         struct ufs2_dinode *dp;
  801         struct fs *fs;
  802         struct buf *bp;
  803         ufs_lbn_t lbn, nextlbn;
  804         off_t bytesinfile;
  805         long size, xfersize, blkoffset;
  806         int error, orig_resid;
  807 
  808         ip = VTOI(vp);
  809         fs = ip->i_fs;
  810         dp = ip->i_din2;
  811 
  812 #ifdef DIAGNOSTIC
  813         if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
  814                 panic("ffs_extread: mode");
  815 
  816 #endif
  817         orig_resid = uio->uio_resid;
  818         KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0"));
  819         if (orig_resid == 0)
  820                 return (0);
  821         KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0"));
  822 
  823         for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
  824                 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
  825                         break;
  826                 lbn = lblkno(fs, uio->uio_offset);
  827                 nextlbn = lbn + 1;
  828 
  829                 /*
  830                  * size of buffer.  The buffer representing the
  831                  * end of the file is rounded up to the size of
  832                  * the block type ( fragment or full block, 
  833                  * depending ).
  834                  */
  835                 size = sblksize(fs, dp->di_extsize, lbn);
  836                 blkoffset = blkoff(fs, uio->uio_offset);
  837                 
  838                 /*
  839                  * The amount we want to transfer in this iteration is
  840                  * one FS block less the amount of the data before
  841                  * our startpoint (duh!)
  842                  */
  843                 xfersize = fs->fs_bsize - blkoffset;
  844 
  845                 /*
  846                  * But if we actually want less than the block,
  847                  * or the file doesn't have a whole block more of data,
  848                  * then use the lesser number.
  849                  */
  850                 if (uio->uio_resid < xfersize)
  851                         xfersize = uio->uio_resid;
  852                 if (bytesinfile < xfersize)
  853                         xfersize = bytesinfile;
  854 
  855                 if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
  856                         /*
  857                          * Don't do readahead if this is the end of the info.
  858                          */
  859                         error = bread(vp, -1 - lbn, size, NOCRED, &bp);
  860                 } else {
  861                         /*
  862                          * If we have a second block, then
  863                          * fire off a request for a readahead
  864                          * as well as a read. Note that the 4th and 5th
  865                          * arguments point to arrays of the size specified in
  866                          * the 6th argument.
  867                          */
  868                         int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
  869 
  870                         nextlbn = -1 - nextlbn;
  871                         error = breadn(vp, -1 - lbn,
  872                             size, &nextlbn, &nextsize, 1, NOCRED, &bp);
  873                 }
  874                 if (error) {
  875                         brelse(bp);
  876                         bp = NULL;
  877                         break;
  878                 }
  879 
  880                 /*
  881                  * If IO_DIRECT then set B_DIRECT for the buffer.  This
  882                  * will cause us to attempt to release the buffer later on
  883                  * and will cause the buffer cache to attempt to free the
  884                  * underlying pages.
  885                  */
  886                 if (ioflag & IO_DIRECT)
  887                         bp->b_flags |= B_DIRECT;
  888 
  889                 /*
  890                  * We should only get non-zero b_resid when an I/O error
  891                  * has occurred, which should cause us to break above.
  892                  * However, if the short read did not cause an error,
  893                  * then we want to ensure that we do not uiomove bad
  894                  * or uninitialized data.
  895                  */
  896                 size -= bp->b_resid;
  897                 if (size < xfersize) {
  898                         if (size == 0)
  899                                 break;
  900                         xfersize = size;
  901                 }
  902 
  903                 error = uiomove((char *)bp->b_data + blkoffset,
  904                                         (int)xfersize, uio);
  905                 if (error)
  906                         break;
  907 
  908                 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
  909                    (LIST_FIRST(&bp->b_dep) == NULL)) {
  910                         /*
  911                          * If there are no dependencies, and it's VMIO,
  912                          * then we don't need the buf, mark it available
  913                          * for freeing. The VM has the data.
  914                          */
  915                         bp->b_flags |= B_RELBUF;
  916                         brelse(bp);
  917                 } else {
  918                         /*
  919                          * Otherwise let whoever
  920                          * made the request take care of
  921                          * freeing it. We just queue
  922                          * it onto another list.
  923                          */
  924                         bqrelse(bp);
  925                 }
  926         }
  927 
  928         /* 
  929          * This can only happen in the case of an error
  930          * because the loop above resets bp to NULL on each iteration
  931          * and on normal completion has not set a new value into it.
  932          * so it must have come from a 'break' statement
  933          */
  934         if (bp != NULL) {
  935                 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
  936                    (LIST_FIRST(&bp->b_dep) == NULL)) {
  937                         bp->b_flags |= B_RELBUF;
  938                         brelse(bp);
  939                 } else {
  940                         bqrelse(bp);
  941                 }
  942         }
  943 
  944         if ((error == 0 || uio->uio_resid != orig_resid) &&
  945             (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
  946                 ip->i_flag |= IN_ACCESS;
  947         return (error);
  948 }
  949 
  950 /*
  951  * Extended attribute area writing.
  952  */
  953 static int
  954 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
  955 {
  956         struct inode *ip;
  957         struct ufs2_dinode *dp;
  958         struct fs *fs;
  959         struct buf *bp;
  960         ufs_lbn_t lbn;
  961         off_t osize;
  962         int blkoffset, error, flags, resid, size, xfersize;
  963 
  964         ip = VTOI(vp);
  965         fs = ip->i_fs;
  966         dp = ip->i_din2;
  967 
  968 #ifdef DIAGNOSTIC
  969         if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
  970                 panic("ffs_extwrite: mode");
  971 #endif
  972 
  973         if (ioflag & IO_APPEND)
  974                 uio->uio_offset = dp->di_extsize;
  975         KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0"));
  976         KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0"));
  977         if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize)
  978                 return (EFBIG);
  979 
  980         resid = uio->uio_resid;
  981         osize = dp->di_extsize;
  982         flags = IO_EXT;
  983         if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
  984                 flags |= IO_SYNC;
  985 
  986         for (error = 0; uio->uio_resid > 0;) {
  987                 lbn = lblkno(fs, uio->uio_offset);
  988                 blkoffset = blkoff(fs, uio->uio_offset);
  989                 xfersize = fs->fs_bsize - blkoffset;
  990                 if (uio->uio_resid < xfersize)
  991                         xfersize = uio->uio_resid;
  992 
  993                 /*      
  994                  * We must perform a read-before-write if the transfer size
  995                  * does not cover the entire buffer.
  996                  */
  997                 if (fs->fs_bsize > xfersize)
  998                         flags |= BA_CLRBUF;
  999                 else
 1000                         flags &= ~BA_CLRBUF;
 1001                 error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
 1002                     ucred, flags, &bp);
 1003                 if (error != 0)
 1004                         break;
 1005                 /*
 1006                  * If the buffer is not valid we have to clear out any
 1007                  * garbage data from the pages instantiated for the buffer.
 1008                  * If we do not, a failed uiomove() during a write can leave
 1009                  * the prior contents of the pages exposed to a userland
 1010                  * mmap().  XXX deal with uiomove() errors a better way.
 1011                  */
 1012                 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
 1013                         vfs_bio_clrbuf(bp);
 1014                 if (ioflag & IO_DIRECT)
 1015                         bp->b_flags |= B_DIRECT;
 1016 
 1017                 if (uio->uio_offset + xfersize > dp->di_extsize)
 1018                         dp->di_extsize = uio->uio_offset + xfersize;
 1019 
 1020                 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
 1021                 if (size < xfersize)
 1022                         xfersize = size;
 1023 
 1024                 error =
 1025                     uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
 1026                 if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
 1027                    (LIST_FIRST(&bp->b_dep) == NULL)) {
 1028                         bp->b_flags |= B_RELBUF;
 1029                 }
 1030 
 1031                 /*
 1032                  * If IO_SYNC each buffer is written synchronously.  Otherwise
 1033                  * if we have a severe page deficiency write the buffer 
 1034                  * asynchronously.  Otherwise try to cluster, and if that
 1035                  * doesn't do it then either do an async write (if O_DIRECT),
 1036                  * or a delayed write (if not).
 1037                  */
 1038                 if (ioflag & IO_SYNC) {
 1039                         (void)bwrite(bp);
 1040                 } else if (vm_page_count_severe() ||
 1041                             buf_dirty_count_severe() ||
 1042                             xfersize + blkoffset == fs->fs_bsize ||
 1043                             (ioflag & (IO_ASYNC | IO_DIRECT)))
 1044                         bawrite(bp);
 1045                 else
 1046                         bdwrite(bp);
 1047                 if (error || xfersize == 0)
 1048                         break;
 1049                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
 1050         }
 1051         /*
 1052          * If we successfully wrote any data, and we are not the superuser
 1053          * we clear the setuid and setgid bits as a precaution against
 1054          * tampering.
 1055          */
 1056         if (resid > uio->uio_resid && ucred && 
 1057             suser_cred(ucred, SUSER_ALLOWJAIL)) {
 1058                 ip->i_mode &= ~(ISUID | ISGID);
 1059                 dp->di_mode = ip->i_mode;
 1060         }
 1061         if (error) {
 1062                 if (ioflag & IO_UNIT) {
 1063                         (void)ffs_truncate(vp, osize,
 1064                             IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td);
 1065                         uio->uio_offset -= resid - uio->uio_resid;
 1066                         uio->uio_resid = resid;
 1067                 }
 1068         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
 1069                 error = ffs_update(vp, 1);
 1070         return (error);
 1071 }
 1072 
 1073 
 1074 /*
 1075  * Vnode operating to retrieve a named extended attribute.
 1076  *
 1077  * Locate a particular EA (nspace:name) in the area (ptr:length), and return
 1078  * the length of the EA, and possibly the pointer to the entry and to the data.
 1079  */
 1080 static int
 1081 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac)
 1082 {
 1083         u_char *p, *pe, *pn, *p0;
 1084         int eapad1, eapad2, ealength, ealen, nlen;
 1085         uint32_t ul;
 1086 
 1087         pe = ptr + length;
 1088         nlen = strlen(name);
 1089 
 1090         for (p = ptr; p < pe; p = pn) {
 1091                 p0 = p;
 1092                 bcopy(p, &ul, sizeof(ul));
 1093                 pn = p + ul;
 1094                 /* make sure this entry is complete */
 1095                 if (pn > pe)
 1096                         break;
 1097                 p += sizeof(uint32_t);
 1098                 if (*p != nspace)
 1099                         continue;
 1100                 p++;
 1101                 eapad2 = *p++;
 1102                 if (*p != nlen)
 1103                         continue;
 1104                 p++;
 1105                 if (bcmp(p, name, nlen))
 1106                         continue;
 1107                 ealength = sizeof(uint32_t) + 3 + nlen;
 1108                 eapad1 = 8 - (ealength % 8);
 1109                 if (eapad1 == 8)
 1110                         eapad1 = 0;
 1111                 ealength += eapad1;
 1112                 ealen = ul - ealength - eapad2;
 1113                 p += nlen + eapad1;
 1114                 if (eap != NULL)
 1115                         *eap = p0;
 1116                 if (eac != NULL)
 1117                         *eac = p;
 1118                 return (ealen);
 1119         }
 1120         return(-1);
 1121 }
 1122 
 1123 static int
 1124 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra)
 1125 {
 1126         struct inode *ip;
 1127         struct ufs2_dinode *dp;
 1128         struct uio luio;
 1129         struct iovec liovec;
 1130         int easize, error;
 1131         u_char *eae;
 1132 
 1133         ip = VTOI(vp);
 1134         dp = ip->i_din2;
 1135         easize = dp->di_extsize;
 1136 
 1137         eae = malloc(easize + extra, M_TEMP, M_WAITOK);
 1138 
 1139         liovec.iov_base = eae;
 1140         liovec.iov_len = easize;
 1141         luio.uio_iov = &liovec;
 1142         luio.uio_iovcnt = 1;
 1143         luio.uio_offset = 0;
 1144         luio.uio_resid = easize;
 1145         luio.uio_segflg = UIO_SYSSPACE;
 1146         luio.uio_rw = UIO_READ;
 1147         luio.uio_td = td;
 1148 
 1149         error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
 1150         if (error) {
 1151                 free(eae, M_TEMP);
 1152                 return(error);
 1153         }
 1154         *p = eae;
 1155         return (0);
 1156 }
 1157 
 1158 static int
 1159 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
 1160 {
 1161         struct inode *ip;
 1162         struct ufs2_dinode *dp;
 1163         int error;
 1164 
 1165         ip = VTOI(vp);
 1166 
 1167         if (ip->i_ea_area != NULL)
 1168                 return (EBUSY);
 1169         dp = ip->i_din2;
 1170         error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0);
 1171         if (error)
 1172                 return (error);
 1173         ip->i_ea_len = dp->di_extsize;
 1174         ip->i_ea_error = 0;
 1175         return (0);
 1176 }
 1177 
 1178 /*
 1179  * Vnode extattr transaction commit/abort
 1180  */
 1181 static int
 1182 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
 1183 {
 1184         struct inode *ip;
 1185         struct uio luio;
 1186         struct iovec liovec;
 1187         int error;
 1188         struct ufs2_dinode *dp;
 1189 
 1190         ip = VTOI(vp);
 1191         if (ip->i_ea_area == NULL)
 1192                 return (EINVAL);
 1193         dp = ip->i_din2;
 1194         error = ip->i_ea_error;
 1195         if (commit && error == 0) {
 1196                 if (cred == NOCRED)
 1197                         cred =  vp->v_mount->mnt_cred;
 1198                 liovec.iov_base = ip->i_ea_area;
 1199                 liovec.iov_len = ip->i_ea_len;
 1200                 luio.uio_iov = &liovec;
 1201                 luio.uio_iovcnt = 1;
 1202                 luio.uio_offset = 0;
 1203                 luio.uio_resid = ip->i_ea_len;
 1204                 luio.uio_segflg = UIO_SYSSPACE;
 1205                 luio.uio_rw = UIO_WRITE;
 1206                 luio.uio_td = td;
 1207                 /* XXX: I'm not happy about truncating to zero size */
 1208                 if (ip->i_ea_len < dp->di_extsize)
 1209                         error = ffs_truncate(vp, 0, IO_EXT, cred, td);
 1210                 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
 1211         }
 1212         free(ip->i_ea_area, M_TEMP);
 1213         ip->i_ea_area = NULL;
 1214         ip->i_ea_len = 0;
 1215         ip->i_ea_error = 0;
 1216         return (error);
 1217 }
 1218 
 1219 /*
 1220  * Vnode extattr strategy routine for fifos.
 1221  *
 1222  * We need to check for a read or write of the external attributes.
 1223  * Otherwise we just fall through and do the usual thing.
 1224  */
 1225 static int
 1226 ffsext_strategy(struct vop_strategy_args *ap)
 1227 /*
 1228 struct vop_strategy_args {
 1229         struct vnodeop_desc *a_desc;
 1230         struct vnode *a_vp;
 1231         struct buf *a_bp;
 1232 };
 1233 */
 1234 {
 1235         struct vnode *vp;
 1236         daddr_t lbn;
 1237 
 1238         vp = ap->a_vp;
 1239         lbn = ap->a_bp->b_lblkno;
 1240         if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC &&
 1241             lbn < 0 && lbn >= -NXADDR)
 1242                 return (VOP_STRATEGY_APV(&ufs_vnodeops, ap));
 1243         if (vp->v_type == VFIFO)
 1244                 return (VOP_STRATEGY_APV(&ufs_fifoops, ap));
 1245         panic("spec nodes went here");
 1246 }
 1247 
 1248 /*
 1249  * Vnode extattr transaction commit/abort
 1250  */
 1251 static int
 1252 ffs_openextattr(struct vop_openextattr_args *ap)
 1253 /*
 1254 struct vop_openextattr_args {
 1255         struct vnodeop_desc *a_desc;
 1256         struct vnode *a_vp;
 1257         IN struct ucred *a_cred;
 1258         IN struct thread *a_td;
 1259 };
 1260 */
 1261 {
 1262         struct inode *ip;
 1263         struct fs *fs;
 1264 
 1265         ip = VTOI(ap->a_vp);
 1266         fs = ip->i_fs;
 1267 
 1268         if (ap->a_vp->v_type == VCHR)
 1269                 return (EOPNOTSUPP);
 1270 
 1271         return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
 1272 }
 1273 
 1274 
 1275 /*
 1276  * Vnode extattr transaction commit/abort
 1277  */
 1278 static int
 1279 ffs_closeextattr(struct vop_closeextattr_args *ap)
 1280 /*
 1281 struct vop_closeextattr_args {
 1282         struct vnodeop_desc *a_desc;
 1283         struct vnode *a_vp;
 1284         int a_commit;
 1285         IN struct ucred *a_cred;
 1286         IN struct thread *a_td;
 1287 };
 1288 */
 1289 {
 1290         struct inode *ip;
 1291         struct fs *fs;
 1292 
 1293         ip = VTOI(ap->a_vp);
 1294         fs = ip->i_fs;
 1295 
 1296         if (ap->a_vp->v_type == VCHR)
 1297                 return (EOPNOTSUPP);
 1298 
 1299         return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td));
 1300 }
 1301 
 1302 /*
 1303  * Vnode operation to remove a named attribute.
 1304  */
 1305 static int
 1306 ffs_deleteextattr(struct vop_deleteextattr_args *ap)
 1307 /*
 1308 vop_deleteextattr {
 1309         IN struct vnode *a_vp;
 1310         IN int a_attrnamespace;
 1311         IN const char *a_name;
 1312         IN struct ucred *a_cred;
 1313         IN struct thread *a_td;
 1314 };
 1315 */
 1316 {
 1317         struct inode *ip;
 1318         struct fs *fs;
 1319         uint32_t ealength, ul;
 1320         int ealen, olen, eapad1, eapad2, error, i, easize;
 1321         u_char *eae, *p;
 1322         int stand_alone;
 1323 
 1324         ip = VTOI(ap->a_vp);
 1325         fs = ip->i_fs;
 1326 
 1327         if (ap->a_vp->v_type == VCHR)
 1328                 return (EOPNOTSUPP);
 1329 
 1330         if (strlen(ap->a_name) == 0)
 1331                 return (EINVAL);
 1332 
 1333         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 1334             ap->a_cred, ap->a_td, IWRITE);
 1335         if (error) {
 1336                 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 1337                         ip->i_ea_error = error;
 1338                 return (error);
 1339         }
 1340 
 1341         if (ip->i_ea_area == NULL) {
 1342                 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 1343                 if (error)
 1344                         return (error);
 1345                 stand_alone = 1;
 1346         } else {
 1347                 stand_alone = 0;
 1348         }
 1349 
 1350         ealength = eapad1 = ealen = eapad2 = 0;
 1351 
 1352         eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
 1353         bcopy(ip->i_ea_area, eae, ip->i_ea_len);
 1354         easize = ip->i_ea_len;
 1355 
 1356         olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
 1357             &p, NULL);
 1358         if (olen == -1) {
 1359                 /* delete but nonexistent */
 1360                 free(eae, M_TEMP);
 1361                 if (stand_alone)
 1362                         ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1363                 return(ENOATTR);
 1364         }
 1365         bcopy(p, &ul, sizeof ul);
 1366         i = p - eae + ul;
 1367         if (ul != ealength) {
 1368                 bcopy(p + ul, p + ealength, easize - i);
 1369                 easize += (ealength - ul);
 1370         }
 1371         if (easize > NXADDR * fs->fs_bsize) {
 1372                 free(eae, M_TEMP);
 1373                 if (stand_alone)
 1374                         ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1375                 else if (ip->i_ea_error == 0)
 1376                         ip->i_ea_error = ENOSPC;
 1377                 return(ENOSPC);
 1378         }
 1379         p = ip->i_ea_area;
 1380         ip->i_ea_area = eae;
 1381         ip->i_ea_len = easize;
 1382         free(p, M_TEMP);
 1383         if (stand_alone)
 1384                 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
 1385         return(error);
 1386 }
 1387 
 1388 /*
 1389  * Vnode operation to retrieve a named extended attribute.
 1390  */
 1391 static int
 1392 ffs_getextattr(struct vop_getextattr_args *ap)
 1393 /*
 1394 vop_getextattr {
 1395         IN struct vnode *a_vp;
 1396         IN int a_attrnamespace;
 1397         IN const char *a_name;
 1398         INOUT struct uio *a_uio;
 1399         OUT size_t *a_size;
 1400         IN struct ucred *a_cred;
 1401         IN struct thread *a_td;
 1402 };
 1403 */
 1404 {
 1405         struct inode *ip;
 1406         struct fs *fs;
 1407         u_char *eae, *p;
 1408         unsigned easize;
 1409         int error, ealen, stand_alone;
 1410 
 1411         ip = VTOI(ap->a_vp);
 1412         fs = ip->i_fs;
 1413 
 1414         if (ap->a_vp->v_type == VCHR)
 1415                 return (EOPNOTSUPP);
 1416 
 1417         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 1418             ap->a_cred, ap->a_td, IREAD);
 1419         if (error)
 1420                 return (error);
 1421 
 1422         if (ip->i_ea_area == NULL) {
 1423                 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 1424                 if (error)
 1425                         return (error);
 1426                 stand_alone = 1;
 1427         } else {
 1428                 stand_alone = 0;
 1429         }
 1430         eae = ip->i_ea_area;
 1431         easize = ip->i_ea_len;
 1432 
 1433         ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
 1434             NULL, &p);
 1435         if (ealen >= 0) {
 1436                 error = 0;
 1437                 if (ap->a_size != NULL)
 1438                         *ap->a_size = ealen;
 1439                 else if (ap->a_uio != NULL)
 1440                         error = uiomove(p, ealen, ap->a_uio);
 1441         } else
 1442                 error = ENOATTR;
 1443         if (stand_alone)
 1444                 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1445         return(error);
 1446 }
 1447 
 1448 /*
 1449  * Vnode operation to retrieve extended attributes on a vnode.
 1450  */
 1451 static int
 1452 ffs_listextattr(struct vop_listextattr_args *ap)
 1453 /*
 1454 vop_listextattr {
 1455         IN struct vnode *a_vp;
 1456         IN int a_attrnamespace;
 1457         INOUT struct uio *a_uio;
 1458         OUT size_t *a_size;
 1459         IN struct ucred *a_cred;
 1460         IN struct thread *a_td;
 1461 };
 1462 */
 1463 {
 1464         struct inode *ip;
 1465         struct fs *fs;
 1466         u_char *eae, *p, *pe, *pn;
 1467         unsigned easize;
 1468         uint32_t ul;
 1469         int error, ealen, stand_alone;
 1470 
 1471         ip = VTOI(ap->a_vp);
 1472         fs = ip->i_fs;
 1473 
 1474         if (ap->a_vp->v_type == VCHR)
 1475                 return (EOPNOTSUPP);
 1476 
 1477         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 1478             ap->a_cred, ap->a_td, IREAD);
 1479         if (error)
 1480                 return (error);
 1481 
 1482         if (ip->i_ea_area == NULL) {
 1483                 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 1484                 if (error)
 1485                         return (error);
 1486                 stand_alone = 1;
 1487         } else {
 1488                 stand_alone = 0;
 1489         }
 1490         eae = ip->i_ea_area;
 1491         easize = ip->i_ea_len;
 1492 
 1493         error = 0;
 1494         if (ap->a_size != NULL)
 1495                 *ap->a_size = 0;
 1496         pe = eae + easize;
 1497         for(p = eae; error == 0 && p < pe; p = pn) {
 1498                 bcopy(p, &ul, sizeof(ul));
 1499                 pn = p + ul;
 1500                 if (pn > pe)
 1501                         break;
 1502                 p += sizeof(ul);
 1503                 if (*p++ != ap->a_attrnamespace)
 1504                         continue;
 1505                 p++;    /* pad2 */
 1506                 ealen = *p;
 1507                 if (ap->a_size != NULL) {
 1508                         *ap->a_size += ealen + 1;
 1509                 } else if (ap->a_uio != NULL) {
 1510                         error = uiomove(p, ealen + 1, ap->a_uio);
 1511                 }
 1512         }
 1513         if (stand_alone)
 1514                 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1515         return(error);
 1516 }
 1517 
 1518 /*
 1519  * Vnode operation to set a named attribute.
 1520  */
 1521 static int
 1522 ffs_setextattr(struct vop_setextattr_args *ap)
 1523 /*
 1524 vop_setextattr {
 1525         IN struct vnode *a_vp;
 1526         IN int a_attrnamespace;
 1527         IN const char *a_name;
 1528         INOUT struct uio *a_uio;
 1529         IN struct ucred *a_cred;
 1530         IN struct thread *a_td;
 1531 };
 1532 */
 1533 {
 1534         struct inode *ip;
 1535         struct fs *fs;
 1536         uint32_t ealength, ul;
 1537         int ealen, olen, eapad1, eapad2, error, i, easize;
 1538         u_char *eae, *p;
 1539         int stand_alone;
 1540 
 1541         ip = VTOI(ap->a_vp);
 1542         fs = ip->i_fs;
 1543 
 1544         if (ap->a_vp->v_type == VCHR)
 1545                 return (EOPNOTSUPP);
 1546 
 1547         if (strlen(ap->a_name) == 0)
 1548                 return (EINVAL);
 1549 
 1550         /* XXX Now unsupported API to delete EAs using NULL uio. */
 1551         if (ap->a_uio == NULL)
 1552                 return (EOPNOTSUPP);
 1553 
 1554         error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 1555             ap->a_cred, ap->a_td, IWRITE);
 1556         if (error) {
 1557                 if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 1558                         ip->i_ea_error = error;
 1559                 return (error);
 1560         }
 1561 
 1562         if (ip->i_ea_area == NULL) {
 1563                 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 1564                 if (error)
 1565                         return (error);
 1566                 stand_alone = 1;
 1567         } else {
 1568                 stand_alone = 0;
 1569         }
 1570 
 1571         ealen = ap->a_uio->uio_resid;
 1572         ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
 1573         eapad1 = 8 - (ealength % 8);
 1574         if (eapad1 == 8)
 1575                 eapad1 = 0;
 1576         eapad2 = 8 - (ealen % 8);
 1577         if (eapad2 == 8)
 1578                 eapad2 = 0;
 1579         ealength += eapad1 + ealen + eapad2;
 1580 
 1581         eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
 1582         bcopy(ip->i_ea_area, eae, ip->i_ea_len);
 1583         easize = ip->i_ea_len;
 1584 
 1585         olen = ffs_findextattr(eae, easize,
 1586             ap->a_attrnamespace, ap->a_name, &p, NULL);
 1587         if (olen == -1) {
 1588                 /* new, append at end */
 1589                 p = eae + easize;
 1590                 easize += ealength;
 1591         } else {
 1592                 bcopy(p, &ul, sizeof ul);
 1593                 i = p - eae + ul;
 1594                 if (ul != ealength) {
 1595                         bcopy(p + ul, p + ealength, easize - i);
 1596                         easize += (ealength - ul);
 1597                 }
 1598         }
 1599         if (easize > NXADDR * fs->fs_bsize) {
 1600                 free(eae, M_TEMP);
 1601                 if (stand_alone)
 1602                         ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1603                 else if (ip->i_ea_error == 0)
 1604                         ip->i_ea_error = ENOSPC;
 1605                 return(ENOSPC);
 1606         }
 1607         bcopy(&ealength, p, sizeof(ealength));
 1608         p += sizeof(ealength);
 1609         *p++ = ap->a_attrnamespace;
 1610         *p++ = eapad2;
 1611         *p++ = strlen(ap->a_name);
 1612         strcpy(p, ap->a_name);
 1613         p += strlen(ap->a_name);
 1614         bzero(p, eapad1);
 1615         p += eapad1;
 1616         error = uiomove(p, ealen, ap->a_uio);
 1617         if (error) {
 1618                 free(eae, M_TEMP);
 1619                 if (stand_alone)
 1620                         ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 1621                 else if (ip->i_ea_error == 0)
 1622                         ip->i_ea_error = error;
 1623                 return(error);
 1624         }
 1625         p += ealen;
 1626         bzero(p, eapad2);
 1627 
 1628         p = ip->i_ea_area;
 1629         ip->i_ea_area = eae;
 1630         ip->i_ea_len = easize;
 1631         free(p, M_TEMP);
 1632         if (stand_alone)
 1633                 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td);
 1634         return(error);
 1635 }

Cache object: c6d52c5e1d999408140ee4bda416a063


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.