The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_vnops.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/8.0/sys/kern/vfs_vnops.c 196974 2009-09-08 14:43:42Z kib $");
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/fcntl.h>
   43 #include <sys/file.h>
   44 #include <sys/kdb.h>
   45 #include <sys/stat.h>
   46 #include <sys/priv.h>
   47 #include <sys/proc.h>
   48 #include <sys/limits.h>
   49 #include <sys/lock.h>
   50 #include <sys/mount.h>
   51 #include <sys/mutex.h>
   52 #include <sys/namei.h>
   53 #include <sys/vnode.h>
   54 #include <sys/bio.h>
   55 #include <sys/buf.h>
   56 #include <sys/filio.h>
   57 #include <sys/sx.h>
   58 #include <sys/ttycom.h>
   59 #include <sys/conf.h>
   60 #include <sys/syslog.h>
   61 #include <sys/unistd.h>
   62 
   63 #include <security/mac/mac_framework.h>
   64 
   65 static fo_rdwr_t        vn_read;
   66 static fo_rdwr_t        vn_write;
   67 static fo_truncate_t    vn_truncate;
   68 static fo_ioctl_t       vn_ioctl;
   69 static fo_poll_t        vn_poll;
   70 static fo_kqfilter_t    vn_kqfilter;
   71 static fo_stat_t        vn_statfile;
   72 static fo_close_t       vn_closefile;
   73 
   74 struct  fileops vnops = {
   75         .fo_read = vn_read,
   76         .fo_write = vn_write,
   77         .fo_truncate = vn_truncate,
   78         .fo_ioctl = vn_ioctl,
   79         .fo_poll = vn_poll,
   80         .fo_kqfilter = vn_kqfilter,
   81         .fo_stat = vn_statfile,
   82         .fo_close = vn_closefile,
   83         .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
   84 };
   85 
   86 int
   87 vn_open(ndp, flagp, cmode, fp)
   88         struct nameidata *ndp;
   89         int *flagp, cmode;
   90         struct file *fp;
   91 {
   92         struct thread *td = ndp->ni_cnd.cn_thread;
   93 
   94         return (vn_open_cred(ndp, flagp, cmode, 0, td->td_ucred, fp));
   95 }
   96 
   97 /*
   98  * Common code for vnode open operations.
   99  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
  100  * 
  101  * Note that this does NOT free nameidata for the successful case,
  102  * due to the NDINIT being done elsewhere.
  103  */
  104 int
  105 vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags,
  106     struct ucred *cred, struct file *fp)
  107 {
  108         struct vnode *vp;
  109         struct mount *mp;
  110         struct thread *td = ndp->ni_cnd.cn_thread;
  111         struct vattr vat;
  112         struct vattr *vap = &vat;
  113         int fmode, error;
  114         accmode_t accmode;
  115         int vfslocked, mpsafe;
  116 
  117         mpsafe = ndp->ni_cnd.cn_flags & MPSAFE;
  118 restart:
  119         vfslocked = 0;
  120         fmode = *flagp;
  121         if (fmode & O_CREAT) {
  122                 ndp->ni_cnd.cn_nameiop = CREATE;
  123                 ndp->ni_cnd.cn_flags = ISOPEN | LOCKPARENT | LOCKLEAF |
  124                     MPSAFE;
  125                 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
  126                         ndp->ni_cnd.cn_flags |= FOLLOW;
  127                 if (!(vn_open_flags & VN_OPEN_NOAUDIT))
  128                         ndp->ni_cnd.cn_flags |= AUDITVNODE1;
  129                 bwillwrite();
  130                 if ((error = namei(ndp)) != 0)
  131                         return (error);
  132                 vfslocked = NDHASGIANT(ndp);
  133                 if (!mpsafe)
  134                         ndp->ni_cnd.cn_flags &= ~MPSAFE;
  135                 if (ndp->ni_vp == NULL) {
  136                         VATTR_NULL(vap);
  137                         vap->va_type = VREG;
  138                         vap->va_mode = cmode;
  139                         if (fmode & O_EXCL)
  140                                 vap->va_vaflags |= VA_EXCLUSIVE;
  141                         if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
  142                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  143                                 vput(ndp->ni_dvp);
  144                                 VFS_UNLOCK_GIANT(vfslocked);
  145                                 if ((error = vn_start_write(NULL, &mp,
  146                                     V_XSLEEP | PCATCH)) != 0)
  147                                         return (error);
  148                                 goto restart;
  149                         }
  150 #ifdef MAC
  151                         error = mac_vnode_check_create(cred, ndp->ni_dvp,
  152                             &ndp->ni_cnd, vap);
  153                         if (error == 0)
  154 #endif
  155                                 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
  156                                                    &ndp->ni_cnd, vap);
  157                         vput(ndp->ni_dvp);
  158                         vn_finished_write(mp);
  159                         if (error) {
  160                                 VFS_UNLOCK_GIANT(vfslocked);
  161                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  162                                 return (error);
  163                         }
  164                         fmode &= ~O_TRUNC;
  165                         vp = ndp->ni_vp;
  166                 } else {
  167                         if (ndp->ni_dvp == ndp->ni_vp)
  168                                 vrele(ndp->ni_dvp);
  169                         else
  170                                 vput(ndp->ni_dvp);
  171                         ndp->ni_dvp = NULL;
  172                         vp = ndp->ni_vp;
  173                         if (fmode & O_EXCL) {
  174                                 error = EEXIST;
  175                                 goto bad;
  176                         }
  177                         fmode &= ~O_CREAT;
  178                 }
  179         } else {
  180                 ndp->ni_cnd.cn_nameiop = LOOKUP;
  181                 ndp->ni_cnd.cn_flags = ISOPEN |
  182                     ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) |
  183                     LOCKLEAF | MPSAFE;
  184                 if (!(fmode & FWRITE))
  185                         ndp->ni_cnd.cn_flags |= LOCKSHARED;
  186                 if (!(vn_open_flags & VN_OPEN_NOAUDIT))
  187                         ndp->ni_cnd.cn_flags |= AUDITVNODE1;
  188                 if ((error = namei(ndp)) != 0)
  189                         return (error);
  190                 if (!mpsafe)
  191                         ndp->ni_cnd.cn_flags &= ~MPSAFE;
  192                 vfslocked = NDHASGIANT(ndp);
  193                 vp = ndp->ni_vp;
  194         }
  195         if (vp->v_type == VLNK) {
  196                 error = EMLINK;
  197                 goto bad;
  198         }
  199         if (vp->v_type == VSOCK) {
  200                 error = EOPNOTSUPP;
  201                 goto bad;
  202         }
  203         accmode = 0;
  204         if (fmode & (FWRITE | O_TRUNC)) {
  205                 if (vp->v_type == VDIR) {
  206                         error = EISDIR;
  207                         goto bad;
  208                 }
  209                 accmode |= VWRITE;
  210         }
  211         if (fmode & FREAD)
  212                 accmode |= VREAD;
  213         if (fmode & FEXEC)
  214                 accmode |= VEXEC;
  215         if (fmode & O_APPEND)
  216                 accmode |= VAPPEND;
  217 #ifdef MAC
  218         error = mac_vnode_check_open(cred, vp, accmode);
  219         if (error)
  220                 goto bad;
  221 #endif
  222         if ((fmode & O_CREAT) == 0) {
  223                 if (accmode & VWRITE) {
  224                         error = vn_writechk(vp);
  225                         if (error)
  226                                 goto bad;
  227                 }
  228                 if (accmode) {
  229                         error = VOP_ACCESS(vp, accmode, cred, td);
  230                         if (error)
  231                                 goto bad;
  232                 }
  233         }
  234         if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0)
  235                 goto bad;
  236 
  237         if (fmode & FWRITE)
  238                 vp->v_writecount++;
  239         *flagp = fmode;
  240         ASSERT_VOP_LOCKED(vp, "vn_open_cred");
  241         if (!mpsafe)
  242                 VFS_UNLOCK_GIANT(vfslocked);
  243         return (0);
  244 bad:
  245         NDFREE(ndp, NDF_ONLY_PNBUF);
  246         vput(vp);
  247         VFS_UNLOCK_GIANT(vfslocked);
  248         *flagp = fmode;
  249         ndp->ni_vp = NULL;
  250         return (error);
  251 }
  252 
  253 /*
  254  * Check for write permissions on the specified vnode.
  255  * Prototype text segments cannot be written.
  256  */
  257 int
  258 vn_writechk(vp)
  259         register struct vnode *vp;
  260 {
  261 
  262         ASSERT_VOP_LOCKED(vp, "vn_writechk");
  263         /*
  264          * If there's shared text associated with
  265          * the vnode, try to free it up once.  If
  266          * we fail, we can't allow writing.
  267          */
  268         if (vp->v_vflag & VV_TEXT)
  269                 return (ETXTBSY);
  270 
  271         return (0);
  272 }
  273 
  274 /*
  275  * Vnode close call
  276  */
  277 int
  278 vn_close(vp, flags, file_cred, td)
  279         register struct vnode *vp;
  280         int flags;
  281         struct ucred *file_cred;
  282         struct thread *td;
  283 {
  284         struct mount *mp;
  285         int error, lock_flags;
  286 
  287         if (!(flags & FWRITE) && vp->v_mount != NULL &&
  288             vp->v_mount->mnt_kern_flag & MNTK_EXTENDED_SHARED)
  289                 lock_flags = LK_SHARED;
  290         else
  291                 lock_flags = LK_EXCLUSIVE;
  292 
  293         VFS_ASSERT_GIANT(vp->v_mount);
  294 
  295         vn_start_write(vp, &mp, V_WAIT);
  296         vn_lock(vp, lock_flags | LK_RETRY);
  297         if (flags & FWRITE) {
  298                 VNASSERT(vp->v_writecount > 0, vp, 
  299                     ("vn_close: negative writecount"));
  300                 vp->v_writecount--;
  301         }
  302         error = VOP_CLOSE(vp, flags, file_cred, td);
  303         vput(vp);
  304         vn_finished_write(mp);
  305         return (error);
  306 }
  307 
  308 /*
  309  * Heuristic to detect sequential operation.
  310  */
  311 static int
  312 sequential_heuristic(struct uio *uio, struct file *fp)
  313 {
  314 
  315         /*
  316          * Offset 0 is handled specially.  open() sets f_seqcount to 1 so
  317          * that the first I/O is normally considered to be slightly
  318          * sequential.  Seeking to offset 0 doesn't change sequentiality
  319          * unless previous seeks have reduced f_seqcount to 0, in which
  320          * case offset 0 is not special.
  321          */
  322         if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
  323             uio->uio_offset == fp->f_nextoff) {
  324                 /*
  325                  * f_seqcount is in units of fixed-size blocks so that it
  326                  * depends mainly on the amount of sequential I/O and not
  327                  * much on the number of sequential I/O's.  The fixed size
  328                  * of 16384 is hard-coded here since it is (not quite) just
  329                  * a magic size that works well here.  This size is more
  330                  * closely related to the best I/O size for real disks than
  331                  * to any block size used by software.
  332                  */
  333                 fp->f_seqcount += howmany(uio->uio_resid, 16384);
  334                 if (fp->f_seqcount > IO_SEQMAX)
  335                         fp->f_seqcount = IO_SEQMAX;
  336                 return (fp->f_seqcount << IO_SEQSHIFT);
  337         }
  338 
  339         /* Not sequential.  Quickly draw-down sequentiality. */
  340         if (fp->f_seqcount > 1)
  341                 fp->f_seqcount = 1;
  342         else
  343                 fp->f_seqcount = 0;
  344         return (0);
  345 }
  346 
  347 /*
  348  * Package up an I/O request on a vnode into a uio and do it.
  349  */
  350 int
  351 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
  352     aresid, td)
  353         enum uio_rw rw;
  354         struct vnode *vp;
  355         void *base;
  356         int len;
  357         off_t offset;
  358         enum uio_seg segflg;
  359         int ioflg;
  360         struct ucred *active_cred;
  361         struct ucred *file_cred;
  362         int *aresid;
  363         struct thread *td;
  364 {
  365         struct uio auio;
  366         struct iovec aiov;
  367         struct mount *mp;
  368         struct ucred *cred;
  369         int error, lock_flags;
  370 
  371         VFS_ASSERT_GIANT(vp->v_mount);
  372 
  373         if ((ioflg & IO_NODELOCKED) == 0) {
  374                 mp = NULL;
  375                 if (rw == UIO_WRITE) { 
  376                         if (vp->v_type != VCHR &&
  377                             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
  378                             != 0)
  379                                 return (error);
  380                         if (MNT_SHARED_WRITES(mp) ||
  381                             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
  382                                 lock_flags = LK_SHARED;
  383                         } else {
  384                                 lock_flags = LK_EXCLUSIVE;
  385                         }
  386                         vn_lock(vp, lock_flags | LK_RETRY);
  387                 } else
  388                         vn_lock(vp, LK_SHARED | LK_RETRY);
  389 
  390         }
  391         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
  392         auio.uio_iov = &aiov;
  393         auio.uio_iovcnt = 1;
  394         aiov.iov_base = base;
  395         aiov.iov_len = len;
  396         auio.uio_resid = len;
  397         auio.uio_offset = offset;
  398         auio.uio_segflg = segflg;
  399         auio.uio_rw = rw;
  400         auio.uio_td = td;
  401         error = 0;
  402 #ifdef MAC
  403         if ((ioflg & IO_NOMACCHECK) == 0) {
  404                 if (rw == UIO_READ)
  405                         error = mac_vnode_check_read(active_cred, file_cred,
  406                             vp);
  407                 else
  408                         error = mac_vnode_check_write(active_cred, file_cred,
  409                             vp);
  410         }
  411 #endif
  412         if (error == 0) {
  413                 if (file_cred)
  414                         cred = file_cred;
  415                 else
  416                         cred = active_cred;
  417                 if (rw == UIO_READ)
  418                         error = VOP_READ(vp, &auio, ioflg, cred);
  419                 else
  420                         error = VOP_WRITE(vp, &auio, ioflg, cred);
  421         }
  422         if (aresid)
  423                 *aresid = auio.uio_resid;
  424         else
  425                 if (auio.uio_resid && error == 0)
  426                         error = EIO;
  427         if ((ioflg & IO_NODELOCKED) == 0) {
  428                 if (rw == UIO_WRITE && vp->v_type != VCHR)
  429                         vn_finished_write(mp);
  430                 VOP_UNLOCK(vp, 0);
  431         }
  432         return (error);
  433 }
  434 
  435 /*
  436  * Package up an I/O request on a vnode into a uio and do it.  The I/O
  437  * request is split up into smaller chunks and we try to avoid saturating
  438  * the buffer cache while potentially holding a vnode locked, so we 
  439  * check bwillwrite() before calling vn_rdwr().  We also call uio_yield()
  440  * to give other processes a chance to lock the vnode (either other processes
  441  * core'ing the same binary, or unrelated processes scanning the directory).
  442  */
  443 int
  444 vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred,
  445     file_cred, aresid, td)
  446         enum uio_rw rw;
  447         struct vnode *vp;
  448         void *base;
  449         size_t len;
  450         off_t offset;
  451         enum uio_seg segflg;
  452         int ioflg;
  453         struct ucred *active_cred;
  454         struct ucred *file_cred;
  455         size_t *aresid;
  456         struct thread *td;
  457 {
  458         int error = 0;
  459         int iaresid;
  460 
  461         VFS_ASSERT_GIANT(vp->v_mount);
  462 
  463         do {
  464                 int chunk;
  465 
  466                 /*
  467                  * Force `offset' to a multiple of MAXBSIZE except possibly
  468                  * for the first chunk, so that filesystems only need to
  469                  * write full blocks except possibly for the first and last
  470                  * chunks.
  471                  */
  472                 chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE;
  473 
  474                 if (chunk > len)
  475                         chunk = len;
  476                 if (rw != UIO_READ && vp->v_type == VREG)
  477                         bwillwrite();
  478                 iaresid = 0;
  479                 error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
  480                     ioflg, active_cred, file_cred, &iaresid, td);
  481                 len -= chunk;   /* aresid calc already includes length */
  482                 if (error)
  483                         break;
  484                 offset += chunk;
  485                 base = (char *)base + chunk;
  486                 uio_yield();
  487         } while (len);
  488         if (aresid)
  489                 *aresid = len + iaresid;
  490         return (error);
  491 }
  492 
  493 /*
  494  * File table vnode read routine.
  495  */
  496 static int
  497 vn_read(fp, uio, active_cred, flags, td)
  498         struct file *fp;
  499         struct uio *uio;
  500         struct ucred *active_cred;
  501         struct thread *td;
  502         int flags;
  503 {
  504         struct vnode *vp;
  505         int error, ioflag;
  506         struct mtx *mtxp;
  507         int vfslocked;
  508 
  509         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  510             uio->uio_td, td));
  511         mtxp = NULL;
  512         vp = fp->f_vnode;
  513         ioflag = 0;
  514         if (fp->f_flag & FNONBLOCK)
  515                 ioflag |= IO_NDELAY;
  516         if (fp->f_flag & O_DIRECT)
  517                 ioflag |= IO_DIRECT;
  518         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  519         /*
  520          * According to McKusick the vn lock was protecting f_offset here.
  521          * It is now protected by the FOFFSET_LOCKED flag.
  522          */
  523         if ((flags & FOF_OFFSET) == 0) {
  524                 mtxp = mtx_pool_find(mtxpool_sleep, fp);
  525                 mtx_lock(mtxp);
  526                 while(fp->f_vnread_flags & FOFFSET_LOCKED) {
  527                         fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
  528                         msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
  529                             "vnread offlock", 0);
  530                 }
  531                 fp->f_vnread_flags |= FOFFSET_LOCKED;
  532                 mtx_unlock(mtxp);
  533                 vn_lock(vp, LK_SHARED | LK_RETRY);
  534                 uio->uio_offset = fp->f_offset;
  535         } else
  536                 vn_lock(vp, LK_SHARED | LK_RETRY);
  537 
  538         ioflag |= sequential_heuristic(uio, fp);
  539 
  540 #ifdef MAC
  541         error = mac_vnode_check_read(active_cred, fp->f_cred, vp);
  542         if (error == 0)
  543 #endif
  544                 error = VOP_READ(vp, uio, ioflag, fp->f_cred);
  545         if ((flags & FOF_OFFSET) == 0) {
  546                 fp->f_offset = uio->uio_offset;
  547                 mtx_lock(mtxp);
  548                 if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
  549                         wakeup(&fp->f_vnread_flags);
  550                 fp->f_vnread_flags = 0;
  551                 mtx_unlock(mtxp);
  552         }
  553         fp->f_nextoff = uio->uio_offset;
  554         VOP_UNLOCK(vp, 0);
  555         VFS_UNLOCK_GIANT(vfslocked);
  556         return (error);
  557 }
  558 
  559 /*
  560  * File table vnode write routine.
  561  */
  562 static int
  563 vn_write(fp, uio, active_cred, flags, td)
  564         struct file *fp;
  565         struct uio *uio;
  566         struct ucred *active_cred;
  567         struct thread *td;
  568         int flags;
  569 {
  570         struct vnode *vp;
  571         struct mount *mp;
  572         int error, ioflag, lock_flags;
  573         int vfslocked;
  574 
  575         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  576             uio->uio_td, td));
  577         vp = fp->f_vnode;
  578         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  579         if (vp->v_type == VREG)
  580                 bwillwrite();
  581         ioflag = IO_UNIT;
  582         if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
  583                 ioflag |= IO_APPEND;
  584         if (fp->f_flag & FNONBLOCK)
  585                 ioflag |= IO_NDELAY;
  586         if (fp->f_flag & O_DIRECT)
  587                 ioflag |= IO_DIRECT;
  588         if ((fp->f_flag & O_FSYNC) ||
  589             (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
  590                 ioflag |= IO_SYNC;
  591         mp = NULL;
  592         if (vp->v_type != VCHR &&
  593             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
  594                 goto unlock;
  595  
  596         if ((MNT_SHARED_WRITES(mp) ||
  597             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) &&
  598             (flags & FOF_OFFSET) != 0) {
  599                 lock_flags = LK_SHARED;
  600         } else {
  601                 lock_flags = LK_EXCLUSIVE;
  602         }
  603 
  604         vn_lock(vp, lock_flags | LK_RETRY);
  605         if ((flags & FOF_OFFSET) == 0)
  606                 uio->uio_offset = fp->f_offset;
  607         ioflag |= sequential_heuristic(uio, fp);
  608 #ifdef MAC
  609         error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
  610         if (error == 0)
  611 #endif
  612                 error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
  613         if ((flags & FOF_OFFSET) == 0)
  614                 fp->f_offset = uio->uio_offset;
  615         fp->f_nextoff = uio->uio_offset;
  616         VOP_UNLOCK(vp, 0);
  617         if (vp->v_type != VCHR)
  618                 vn_finished_write(mp);
  619 unlock:
  620         VFS_UNLOCK_GIANT(vfslocked);
  621         return (error);
  622 }
  623 
  624 /*
  625  * File table truncate routine.
  626  */
  627 static int
  628 vn_truncate(fp, length, active_cred, td)
  629         struct file *fp;
  630         off_t length;
  631         struct ucred *active_cred;
  632         struct thread *td;
  633 {
  634         struct vattr vattr;
  635         struct mount *mp;
  636         struct vnode *vp;
  637         int vfslocked;
  638         int error;
  639 
  640         vp = fp->f_vnode;
  641         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  642         error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
  643         if (error) {
  644                 VFS_UNLOCK_GIANT(vfslocked);
  645                 return (error);
  646         }
  647         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  648         if (vp->v_type == VDIR) {
  649                 error = EISDIR;
  650                 goto out;
  651         }
  652 #ifdef MAC
  653         error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
  654         if (error)
  655                 goto out;
  656 #endif
  657         error = vn_writechk(vp);
  658         if (error == 0) {
  659                 VATTR_NULL(&vattr);
  660                 vattr.va_size = length;
  661                 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
  662         }
  663 out:
  664         VOP_UNLOCK(vp, 0);
  665         vn_finished_write(mp);
  666         VFS_UNLOCK_GIANT(vfslocked);
  667         return (error);
  668 }
  669 
  670 /*
  671  * File table vnode stat routine.
  672  */
  673 static int
  674 vn_statfile(fp, sb, active_cred, td)
  675         struct file *fp;
  676         struct stat *sb;
  677         struct ucred *active_cred;
  678         struct thread *td;
  679 {
  680         struct vnode *vp = fp->f_vnode;
  681         int vfslocked;
  682         int error;
  683 
  684         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  685         vn_lock(vp, LK_SHARED | LK_RETRY);
  686         error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
  687         VOP_UNLOCK(vp, 0);
  688         VFS_UNLOCK_GIANT(vfslocked);
  689 
  690         return (error);
  691 }
  692 
  693 /*
  694  * Stat a vnode; implementation for the stat syscall
  695  */
  696 int
  697 vn_stat(vp, sb, active_cred, file_cred, td)
  698         struct vnode *vp;
  699         register struct stat *sb;
  700         struct ucred *active_cred;
  701         struct ucred *file_cred;
  702         struct thread *td;
  703 {
  704         struct vattr vattr;
  705         register struct vattr *vap;
  706         int error;
  707         u_short mode;
  708 
  709 #ifdef MAC
  710         error = mac_vnode_check_stat(active_cred, file_cred, vp);
  711         if (error)
  712                 return (error);
  713 #endif
  714 
  715         vap = &vattr;
  716 
  717         /*
  718          * Initialize defaults for new and unusual fields, so that file
  719          * systems which don't support these fields don't need to know
  720          * about them.
  721          */
  722         vap->va_birthtime.tv_sec = -1;
  723         vap->va_birthtime.tv_nsec = 0;
  724         vap->va_fsid = VNOVAL;
  725         vap->va_rdev = NODEV;
  726 
  727         error = VOP_GETATTR(vp, vap, active_cred);
  728         if (error)
  729                 return (error);
  730 
  731         /*
  732          * Zero the spare stat fields
  733          */
  734         bzero(sb, sizeof *sb);
  735 
  736         /*
  737          * Copy from vattr table
  738          */
  739         if (vap->va_fsid != VNOVAL)
  740                 sb->st_dev = vap->va_fsid;
  741         else
  742                 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
  743         sb->st_ino = vap->va_fileid;
  744         mode = vap->va_mode;
  745         switch (vap->va_type) {
  746         case VREG:
  747                 mode |= S_IFREG;
  748                 break;
  749         case VDIR:
  750                 mode |= S_IFDIR;
  751                 break;
  752         case VBLK:
  753                 mode |= S_IFBLK;
  754                 break;
  755         case VCHR:
  756                 mode |= S_IFCHR;
  757                 break;
  758         case VLNK:
  759                 mode |= S_IFLNK;
  760                 break;
  761         case VSOCK:
  762                 mode |= S_IFSOCK;
  763                 break;
  764         case VFIFO:
  765                 mode |= S_IFIFO;
  766                 break;
  767         default:
  768                 return (EBADF);
  769         };
  770         sb->st_mode = mode;
  771         sb->st_nlink = vap->va_nlink;
  772         sb->st_uid = vap->va_uid;
  773         sb->st_gid = vap->va_gid;
  774         sb->st_rdev = vap->va_rdev;
  775         if (vap->va_size > OFF_MAX)
  776                 return (EOVERFLOW);
  777         sb->st_size = vap->va_size;
  778         sb->st_atimespec = vap->va_atime;
  779         sb->st_mtimespec = vap->va_mtime;
  780         sb->st_ctimespec = vap->va_ctime;
  781         sb->st_birthtimespec = vap->va_birthtime;
  782 
  783         /*
  784          * According to www.opengroup.org, the meaning of st_blksize is 
  785          *   "a filesystem-specific preferred I/O block size for this 
  786          *    object.  In some filesystem types, this may vary from file
  787          *    to file"
  788          * Default to PAGE_SIZE after much discussion.
  789          * XXX: min(PAGE_SIZE, vp->v_bufobj.bo_bsize) may be more correct.
  790          */
  791 
  792         sb->st_blksize = PAGE_SIZE;
  793         
  794         sb->st_flags = vap->va_flags;
  795         if (priv_check(td, PRIV_VFS_GENERATION))
  796                 sb->st_gen = 0;
  797         else
  798                 sb->st_gen = vap->va_gen;
  799 
  800         sb->st_blocks = vap->va_bytes / S_BLKSIZE;
  801         return (0);
  802 }
  803 
  804 /*
  805  * File table vnode ioctl routine.
  806  */
  807 static int
  808 vn_ioctl(fp, com, data, active_cred, td)
  809         struct file *fp;
  810         u_long com;
  811         void *data;
  812         struct ucred *active_cred;
  813         struct thread *td;
  814 {
  815         struct vnode *vp = fp->f_vnode;
  816         struct vattr vattr;
  817         int vfslocked;
  818         int error;
  819 
  820         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  821         error = ENOTTY;
  822         switch (vp->v_type) {
  823         case VREG:
  824         case VDIR:
  825                 if (com == FIONREAD) {
  826                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  827                         error = VOP_GETATTR(vp, &vattr, active_cred);
  828                         VOP_UNLOCK(vp, 0);
  829                         if (!error)
  830                                 *(int *)data = vattr.va_size - fp->f_offset;
  831                 }
  832                 if (com == FIONBIO || com == FIOASYNC)  /* XXX */
  833                         error = 0;
  834                 else
  835                         error = VOP_IOCTL(vp, com, data, fp->f_flag,
  836                             active_cred, td);
  837                 break;
  838 
  839         default:
  840                 break;
  841         }
  842         VFS_UNLOCK_GIANT(vfslocked);
  843         return (error);
  844 }
  845 
  846 /*
  847  * File table vnode poll routine.
  848  */
  849 static int
  850 vn_poll(fp, events, active_cred, td)
  851         struct file *fp;
  852         int events;
  853         struct ucred *active_cred;
  854         struct thread *td;
  855 {
  856         struct vnode *vp;
  857         int vfslocked;
  858         int error;
  859 
  860         vp = fp->f_vnode;
  861         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  862 #ifdef MAC
  863         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  864         error = mac_vnode_check_poll(active_cred, fp->f_cred, vp);
  865         VOP_UNLOCK(vp, 0);
  866         if (!error)
  867 #endif
  868 
  869         error = VOP_POLL(vp, events, fp->f_cred, td);
  870         VFS_UNLOCK_GIANT(vfslocked);
  871         return (error);
  872 }
  873 
  874 /*
  875  * Acquire the requested lock and then check for validity.  LK_RETRY
  876  * permits vn_lock to return doomed vnodes.
  877  */
  878 int
  879 _vn_lock(struct vnode *vp, int flags, char *file, int line)
  880 {
  881         int error;
  882 
  883         VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
  884             ("vn_lock called with no locktype."));
  885         do {
  886 #ifdef DEBUG_VFS_LOCKS
  887                 KASSERT(vp->v_holdcnt != 0,
  888                     ("vn_lock %p: zero hold count", vp));
  889 #endif
  890                 error = VOP_LOCK1(vp, flags, file, line);
  891                 flags &= ~LK_INTERLOCK; /* Interlock is always dropped. */
  892                 KASSERT((flags & LK_RETRY) == 0 || error == 0,
  893                     ("LK_RETRY set with incompatible flags (0x%x) or an error occured (%d)",
  894                     flags, error));
  895                 /*
  896                  * Callers specify LK_RETRY if they wish to get dead vnodes.
  897                  * If RETRY is not set, we return ENOENT instead.
  898                  */
  899                 if (error == 0 && vp->v_iflag & VI_DOOMED &&
  900                     (flags & LK_RETRY) == 0) {
  901                         VOP_UNLOCK(vp, 0);
  902                         error = ENOENT;
  903                         break;
  904                 }
  905         } while (flags & LK_RETRY && error != 0);
  906         return (error);
  907 }
  908 
  909 /*
  910  * File table vnode close routine.
  911  */
  912 static int
  913 vn_closefile(fp, td)
  914         struct file *fp;
  915         struct thread *td;
  916 {
  917         struct vnode *vp;
  918         struct flock lf;
  919         int vfslocked;
  920         int error;
  921 
  922         vp = fp->f_vnode;
  923 
  924         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  925         if (fp->f_type == DTYPE_VNODE && fp->f_flag & FHASLOCK) {
  926                 lf.l_whence = SEEK_SET;
  927                 lf.l_start = 0;
  928                 lf.l_len = 0;
  929                 lf.l_type = F_UNLCK;
  930                 (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
  931         }
  932 
  933         fp->f_ops = &badfileops;
  934 
  935         error = vn_close(vp, fp->f_flag, fp->f_cred, td);
  936         VFS_UNLOCK_GIANT(vfslocked);
  937         return (error);
  938 }
  939 
  940 /*
  941  * Preparing to start a filesystem write operation. If the operation is
  942  * permitted, then we bump the count of operations in progress and
  943  * proceed. If a suspend request is in progress, we wait until the
  944  * suspension is over, and then proceed.
  945  */
  946 int
  947 vn_start_write(vp, mpp, flags)
  948         struct vnode *vp;
  949         struct mount **mpp;
  950         int flags;
  951 {
  952         struct mount *mp;
  953         int error;
  954 
  955         error = 0;
  956         /*
  957          * If a vnode is provided, get and return the mount point that
  958          * to which it will write.
  959          */
  960         if (vp != NULL) {
  961                 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
  962                         *mpp = NULL;
  963                         if (error != EOPNOTSUPP)
  964                                 return (error);
  965                         return (0);
  966                 }
  967         }
  968         if ((mp = *mpp) == NULL)
  969                 return (0);
  970 
  971         /*
  972          * VOP_GETWRITEMOUNT() returns with the mp refcount held through
  973          * a vfs_ref().
  974          * As long as a vnode is not provided we need to acquire a
  975          * refcount for the provided mountpoint too, in order to
  976          * emulate a vfs_ref().
  977          */
  978         MNT_ILOCK(mp);
  979         if (vp == NULL)
  980                 MNT_REF(mp);
  981 
  982         /*
  983          * Check on status of suspension.
  984          */
  985         if ((curthread->td_pflags & TDP_IGNSUSP) == 0 ||
  986             mp->mnt_susp_owner != curthread) {
  987                 while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
  988                         if (flags & V_NOWAIT) {
  989                                 error = EWOULDBLOCK;
  990                                 goto unlock;
  991                         }
  992                         error = msleep(&mp->mnt_flag, MNT_MTX(mp),
  993                             (PUSER - 1) | (flags & PCATCH), "suspfs", 0);
  994                         if (error)
  995                                 goto unlock;
  996                 }
  997         }
  998         if (flags & V_XSLEEP)
  999                 goto unlock;
 1000         mp->mnt_writeopcount++;
 1001 unlock:
 1002         if (error != 0 || (flags & V_XSLEEP) != 0)
 1003                 MNT_REL(mp);
 1004         MNT_IUNLOCK(mp);
 1005         return (error);
 1006 }
 1007 
 1008 /*
 1009  * Secondary suspension. Used by operations such as vop_inactive
 1010  * routines that are needed by the higher level functions. These
 1011  * are allowed to proceed until all the higher level functions have
 1012  * completed (indicated by mnt_writeopcount dropping to zero). At that
 1013  * time, these operations are halted until the suspension is over.
 1014  */
 1015 int
 1016 vn_start_secondary_write(vp, mpp, flags)
 1017         struct vnode *vp;
 1018         struct mount **mpp;
 1019         int flags;
 1020 {
 1021         struct mount *mp;
 1022         int error;
 1023 
 1024  retry:
 1025         if (vp != NULL) {
 1026                 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
 1027                         *mpp = NULL;
 1028                         if (error != EOPNOTSUPP)
 1029                                 return (error);
 1030                         return (0);
 1031                 }
 1032         }
 1033         /*
 1034          * If we are not suspended or have not yet reached suspended
 1035          * mode, then let the operation proceed.
 1036          */
 1037         if ((mp = *mpp) == NULL)
 1038                 return (0);
 1039 
 1040         /*
 1041          * VOP_GETWRITEMOUNT() returns with the mp refcount held through
 1042          * a vfs_ref().
 1043          * As long as a vnode is not provided we need to acquire a
 1044          * refcount for the provided mountpoint too, in order to
 1045          * emulate a vfs_ref().
 1046          */
 1047         MNT_ILOCK(mp);
 1048         if (vp == NULL)
 1049                 MNT_REF(mp);
 1050         if ((mp->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND2)) == 0) {
 1051                 mp->mnt_secondary_writes++;
 1052                 mp->mnt_secondary_accwrites++;
 1053                 MNT_IUNLOCK(mp);
 1054                 return (0);
 1055         }
 1056         if (flags & V_NOWAIT) {
 1057                 MNT_REL(mp);
 1058                 MNT_IUNLOCK(mp);
 1059                 return (EWOULDBLOCK);
 1060         }
 1061         /*
 1062          * Wait for the suspension to finish.
 1063          */
 1064         error = msleep(&mp->mnt_flag, MNT_MTX(mp),
 1065                        (PUSER - 1) | (flags & PCATCH) | PDROP, "suspfs", 0);
 1066         vfs_rel(mp);
 1067         if (error == 0)
 1068                 goto retry;
 1069         return (error);
 1070 }
 1071 
 1072 /*
 1073  * Filesystem write operation has completed. If we are suspending and this
 1074  * operation is the last one, notify the suspender that the suspension is
 1075  * now in effect.
 1076  */
 1077 void
 1078 vn_finished_write(mp)
 1079         struct mount *mp;
 1080 {
 1081         if (mp == NULL)
 1082                 return;
 1083         MNT_ILOCK(mp);
 1084         MNT_REL(mp);
 1085         mp->mnt_writeopcount--;
 1086         if (mp->mnt_writeopcount < 0)
 1087                 panic("vn_finished_write: neg cnt");
 1088         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 1089             mp->mnt_writeopcount <= 0)
 1090                 wakeup(&mp->mnt_writeopcount);
 1091         MNT_IUNLOCK(mp);
 1092 }
 1093 
 1094 
 1095 /*
 1096  * Filesystem secondary write operation has completed. If we are
 1097  * suspending and this operation is the last one, notify the suspender
 1098  * that the suspension is now in effect.
 1099  */
 1100 void
 1101 vn_finished_secondary_write(mp)
 1102         struct mount *mp;
 1103 {
 1104         if (mp == NULL)
 1105                 return;
 1106         MNT_ILOCK(mp);
 1107         MNT_REL(mp);
 1108         mp->mnt_secondary_writes--;
 1109         if (mp->mnt_secondary_writes < 0)
 1110                 panic("vn_finished_secondary_write: neg cnt");
 1111         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 1112             mp->mnt_secondary_writes <= 0)
 1113                 wakeup(&mp->mnt_secondary_writes);
 1114         MNT_IUNLOCK(mp);
 1115 }
 1116 
 1117 
 1118 
 1119 /*
 1120  * Request a filesystem to suspend write operations.
 1121  */
 1122 int
 1123 vfs_write_suspend(mp)
 1124         struct mount *mp;
 1125 {
 1126         int error;
 1127 
 1128         MNT_ILOCK(mp);
 1129         if (mp->mnt_susp_owner == curthread) {
 1130                 MNT_IUNLOCK(mp);
 1131                 return (EALREADY);
 1132         }
 1133         while (mp->mnt_kern_flag & MNTK_SUSPEND)
 1134                 msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1, "wsuspfs", 0);
 1135         mp->mnt_kern_flag |= MNTK_SUSPEND;
 1136         mp->mnt_susp_owner = curthread;
 1137         if (mp->mnt_writeopcount > 0)
 1138                 (void) msleep(&mp->mnt_writeopcount, 
 1139                     MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt", 0);
 1140         else
 1141                 MNT_IUNLOCK(mp);
 1142         if ((error = VFS_SYNC(mp, MNT_SUSPEND)) != 0)
 1143                 vfs_write_resume(mp);
 1144         return (error);
 1145 }
 1146 
 1147 /*
 1148  * Request a filesystem to resume write operations.
 1149  */
 1150 void
 1151 vfs_write_resume(mp)
 1152         struct mount *mp;
 1153 {
 1154 
 1155         MNT_ILOCK(mp);
 1156         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
 1157                 KASSERT(mp->mnt_susp_owner == curthread, ("mnt_susp_owner"));
 1158                 mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPEND2 |
 1159                                        MNTK_SUSPENDED);
 1160                 mp->mnt_susp_owner = NULL;
 1161                 wakeup(&mp->mnt_writeopcount);
 1162                 wakeup(&mp->mnt_flag);
 1163                 curthread->td_pflags &= ~TDP_IGNSUSP;
 1164                 MNT_IUNLOCK(mp);
 1165                 VFS_SUSP_CLEAN(mp);
 1166         } else
 1167                 MNT_IUNLOCK(mp);
 1168 }
 1169 
 1170 /*
 1171  * Implement kqueues for files by translating it to vnode operation.
 1172  */
 1173 static int
 1174 vn_kqfilter(struct file *fp, struct knote *kn)
 1175 {
 1176         int vfslocked;
 1177         int error;
 1178 
 1179         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 1180         error = VOP_KQFILTER(fp->f_vnode, kn);
 1181         VFS_UNLOCK_GIANT(vfslocked);
 1182 
 1183         return error;
 1184 }
 1185 
 1186 /*
 1187  * Simplified in-kernel wrapper calls for extended attribute access.
 1188  * Both calls pass in a NULL credential, authorizing as "kernel" access.
 1189  * Set IO_NODELOCKED in ioflg if the vnode is already locked.
 1190  */
 1191 int
 1192 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
 1193     const char *attrname, int *buflen, char *buf, struct thread *td)
 1194 {
 1195         struct uio      auio;
 1196         struct iovec    iov;
 1197         int     error;
 1198 
 1199         iov.iov_len = *buflen;
 1200         iov.iov_base = buf;
 1201 
 1202         auio.uio_iov = &iov;
 1203         auio.uio_iovcnt = 1;
 1204         auio.uio_rw = UIO_READ;
 1205         auio.uio_segflg = UIO_SYSSPACE;
 1206         auio.uio_td = td;
 1207         auio.uio_offset = 0;
 1208         auio.uio_resid = *buflen;
 1209 
 1210         if ((ioflg & IO_NODELOCKED) == 0)
 1211                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1212 
 1213         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1214 
 1215         /* authorize attribute retrieval as kernel */
 1216         error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL,
 1217             td);
 1218 
 1219         if ((ioflg & IO_NODELOCKED) == 0)
 1220                 VOP_UNLOCK(vp, 0);
 1221 
 1222         if (error == 0) {
 1223                 *buflen = *buflen - auio.uio_resid;
 1224         }
 1225 
 1226         return (error);
 1227 }
 1228 
 1229 /*
 1230  * XXX failure mode if partially written?
 1231  */
 1232 int
 1233 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
 1234     const char *attrname, int buflen, char *buf, struct thread *td)
 1235 {
 1236         struct uio      auio;
 1237         struct iovec    iov;
 1238         struct mount    *mp;
 1239         int     error;
 1240 
 1241         iov.iov_len = buflen;
 1242         iov.iov_base = buf;
 1243 
 1244         auio.uio_iov = &iov;
 1245         auio.uio_iovcnt = 1;
 1246         auio.uio_rw = UIO_WRITE;
 1247         auio.uio_segflg = UIO_SYSSPACE;
 1248         auio.uio_td = td;
 1249         auio.uio_offset = 0;
 1250         auio.uio_resid = buflen;
 1251 
 1252         if ((ioflg & IO_NODELOCKED) == 0) {
 1253                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1254                         return (error);
 1255                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1256         }
 1257 
 1258         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1259 
 1260         /* authorize attribute setting as kernel */
 1261         error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td);
 1262 
 1263         if ((ioflg & IO_NODELOCKED) == 0) {
 1264                 vn_finished_write(mp);
 1265                 VOP_UNLOCK(vp, 0);
 1266         }
 1267 
 1268         return (error);
 1269 }
 1270 
 1271 int
 1272 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
 1273     const char *attrname, struct thread *td)
 1274 {
 1275         struct mount    *mp;
 1276         int     error;
 1277 
 1278         if ((ioflg & IO_NODELOCKED) == 0) {
 1279                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1280                         return (error);
 1281                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1282         }
 1283 
 1284         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1285 
 1286         /* authorize attribute removal as kernel */
 1287         error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL, td);
 1288         if (error == EOPNOTSUPP)
 1289                 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
 1290                     NULL, td);
 1291 
 1292         if ((ioflg & IO_NODELOCKED) == 0) {
 1293                 vn_finished_write(mp);
 1294                 VOP_UNLOCK(vp, 0);
 1295         }
 1296 
 1297         return (error);
 1298 }
 1299 
 1300 int
 1301 vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags, struct vnode **rvp)
 1302 {
 1303         struct mount *mp;
 1304         int ltype, error;
 1305 
 1306         mp = vp->v_mount;
 1307         ltype = VOP_ISLOCKED(vp);
 1308         KASSERT(ltype == LK_EXCLUSIVE || ltype == LK_SHARED,
 1309             ("vn_vget_ino: vp not locked"));
 1310         error = vfs_busy(mp, MBF_NOWAIT);
 1311         if (error != 0) {
 1312                 vfs_ref(mp);
 1313                 VOP_UNLOCK(vp, 0);
 1314                 error = vfs_busy(mp, 0);
 1315                 vn_lock(vp, ltype | LK_RETRY);
 1316                 vfs_rel(mp);
 1317                 if (error != 0)
 1318                         return (ENOENT);
 1319                 if (vp->v_iflag & VI_DOOMED) {
 1320                         vfs_unbusy(mp);
 1321                         return (ENOENT);
 1322                 }
 1323         }
 1324         VOP_UNLOCK(vp, 0);
 1325         error = VFS_VGET(mp, ino, lkflags, rvp);
 1326         vfs_unbusy(mp);
 1327         vn_lock(vp, ltype | LK_RETRY);
 1328         if (vp->v_iflag & VI_DOOMED) {
 1329                 if (error == 0)
 1330                         vput(*rvp);
 1331                 error = ENOENT;
 1332         }
 1333         return (error);
 1334 }

Cache object: 1360cf5df0fd9f4e6eb504104e21b428


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.