The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_vnops.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/8.2/sys/kern/vfs_vnops.c 206753 2010-04-17 11:25:30Z avg $");
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/fcntl.h>
   43 #include <sys/file.h>
   44 #include <sys/kdb.h>
   45 #include <sys/stat.h>
   46 #include <sys/priv.h>
   47 #include <sys/proc.h>
   48 #include <sys/limits.h>
   49 #include <sys/lock.h>
   50 #include <sys/mount.h>
   51 #include <sys/mutex.h>
   52 #include <sys/namei.h>
   53 #include <sys/vnode.h>
   54 #include <sys/bio.h>
   55 #include <sys/buf.h>
   56 #include <sys/filio.h>
   57 #include <sys/sx.h>
   58 #include <sys/ttycom.h>
   59 #include <sys/conf.h>
   60 #include <sys/syslog.h>
   61 #include <sys/unistd.h>
   62 
   63 #include <security/mac/mac_framework.h>
   64 
   65 static fo_rdwr_t        vn_read;
   66 static fo_rdwr_t        vn_write;
   67 static fo_truncate_t    vn_truncate;
   68 static fo_ioctl_t       vn_ioctl;
   69 static fo_poll_t        vn_poll;
   70 static fo_kqfilter_t    vn_kqfilter;
   71 static fo_stat_t        vn_statfile;
   72 static fo_close_t       vn_closefile;
   73 
   74 struct  fileops vnops = {
   75         .fo_read = vn_read,
   76         .fo_write = vn_write,
   77         .fo_truncate = vn_truncate,
   78         .fo_ioctl = vn_ioctl,
   79         .fo_poll = vn_poll,
   80         .fo_kqfilter = vn_kqfilter,
   81         .fo_stat = vn_statfile,
   82         .fo_close = vn_closefile,
   83         .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
   84 };
   85 
   86 int
   87 vn_open(ndp, flagp, cmode, fp)
   88         struct nameidata *ndp;
   89         int *flagp, cmode;
   90         struct file *fp;
   91 {
   92         struct thread *td = ndp->ni_cnd.cn_thread;
   93 
   94         return (vn_open_cred(ndp, flagp, cmode, 0, td->td_ucred, fp));
   95 }
   96 
   97 /*
   98  * Common code for vnode open operations.
   99  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
  100  * 
  101  * Note that this does NOT free nameidata for the successful case,
  102  * due to the NDINIT being done elsewhere.
  103  */
  104 int
  105 vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags,
  106     struct ucred *cred, struct file *fp)
  107 {
  108         struct vnode *vp;
  109         struct mount *mp;
  110         struct thread *td = ndp->ni_cnd.cn_thread;
  111         struct vattr vat;
  112         struct vattr *vap = &vat;
  113         int fmode, error;
  114         accmode_t accmode;
  115         int vfslocked, mpsafe;
  116 
  117         mpsafe = ndp->ni_cnd.cn_flags & MPSAFE;
  118 restart:
  119         vfslocked = 0;
  120         fmode = *flagp;
  121         if (fmode & O_CREAT) {
  122                 ndp->ni_cnd.cn_nameiop = CREATE;
  123                 ndp->ni_cnd.cn_flags = ISOPEN | LOCKPARENT | LOCKLEAF |
  124                     MPSAFE;
  125                 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
  126                         ndp->ni_cnd.cn_flags |= FOLLOW;
  127                 if (!(vn_open_flags & VN_OPEN_NOAUDIT))
  128                         ndp->ni_cnd.cn_flags |= AUDITVNODE1;
  129                 bwillwrite();
  130                 if ((error = namei(ndp)) != 0)
  131                         return (error);
  132                 vfslocked = NDHASGIANT(ndp);
  133                 if (!mpsafe)
  134                         ndp->ni_cnd.cn_flags &= ~MPSAFE;
  135                 if (ndp->ni_vp == NULL) {
  136                         VATTR_NULL(vap);
  137                         vap->va_type = VREG;
  138                         vap->va_mode = cmode;
  139                         if (fmode & O_EXCL)
  140                                 vap->va_vaflags |= VA_EXCLUSIVE;
  141                         if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
  142                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  143                                 vput(ndp->ni_dvp);
  144                                 VFS_UNLOCK_GIANT(vfslocked);
  145                                 if ((error = vn_start_write(NULL, &mp,
  146                                     V_XSLEEP | PCATCH)) != 0)
  147                                         return (error);
  148                                 goto restart;
  149                         }
  150 #ifdef MAC
  151                         error = mac_vnode_check_create(cred, ndp->ni_dvp,
  152                             &ndp->ni_cnd, vap);
  153                         if (error == 0)
  154 #endif
  155                                 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
  156                                                    &ndp->ni_cnd, vap);
  157                         vput(ndp->ni_dvp);
  158                         vn_finished_write(mp);
  159                         if (error) {
  160                                 VFS_UNLOCK_GIANT(vfslocked);
  161                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  162                                 return (error);
  163                         }
  164                         fmode &= ~O_TRUNC;
  165                         vp = ndp->ni_vp;
  166                 } else {
  167                         if (ndp->ni_dvp == ndp->ni_vp)
  168                                 vrele(ndp->ni_dvp);
  169                         else
  170                                 vput(ndp->ni_dvp);
  171                         ndp->ni_dvp = NULL;
  172                         vp = ndp->ni_vp;
  173                         if (fmode & O_EXCL) {
  174                                 error = EEXIST;
  175                                 goto bad;
  176                         }
  177                         fmode &= ~O_CREAT;
  178                 }
  179         } else {
  180                 ndp->ni_cnd.cn_nameiop = LOOKUP;
  181                 ndp->ni_cnd.cn_flags = ISOPEN |
  182                     ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) |
  183                     LOCKLEAF | MPSAFE;
  184                 if (!(fmode & FWRITE))
  185                         ndp->ni_cnd.cn_flags |= LOCKSHARED;
  186                 if (!(vn_open_flags & VN_OPEN_NOAUDIT))
  187                         ndp->ni_cnd.cn_flags |= AUDITVNODE1;
  188                 if ((error = namei(ndp)) != 0)
  189                         return (error);
  190                 if (!mpsafe)
  191                         ndp->ni_cnd.cn_flags &= ~MPSAFE;
  192                 vfslocked = NDHASGIANT(ndp);
  193                 vp = ndp->ni_vp;
  194         }
  195         if (vp->v_type == VLNK) {
  196                 error = EMLINK;
  197                 goto bad;
  198         }
  199         if (vp->v_type == VSOCK) {
  200                 error = EOPNOTSUPP;
  201                 goto bad;
  202         }
  203         accmode = 0;
  204         if (fmode & (FWRITE | O_TRUNC)) {
  205                 if (vp->v_type == VDIR) {
  206                         error = EISDIR;
  207                         goto bad;
  208                 }
  209                 accmode |= VWRITE;
  210         }
  211         if (fmode & FREAD)
  212                 accmode |= VREAD;
  213         if (fmode & FEXEC)
  214                 accmode |= VEXEC;
  215         if ((fmode & O_APPEND) && (fmode & FWRITE))
  216                 accmode |= VAPPEND;
  217 #ifdef MAC
  218         error = mac_vnode_check_open(cred, vp, accmode);
  219         if (error)
  220                 goto bad;
  221 #endif
  222         if ((fmode & O_CREAT) == 0) {
  223                 if (accmode & VWRITE) {
  224                         error = vn_writechk(vp);
  225                         if (error)
  226                                 goto bad;
  227                 }
  228                 if (accmode) {
  229                         error = VOP_ACCESS(vp, accmode, cred, td);
  230                         if (error)
  231                                 goto bad;
  232                 }
  233         }
  234         if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0)
  235                 goto bad;
  236 
  237         if (fmode & FWRITE)
  238                 vp->v_writecount++;
  239         *flagp = fmode;
  240         ASSERT_VOP_LOCKED(vp, "vn_open_cred");
  241         if (!mpsafe)
  242                 VFS_UNLOCK_GIANT(vfslocked);
  243         return (0);
  244 bad:
  245         NDFREE(ndp, NDF_ONLY_PNBUF);
  246         vput(vp);
  247         VFS_UNLOCK_GIANT(vfslocked);
  248         *flagp = fmode;
  249         ndp->ni_vp = NULL;
  250         return (error);
  251 }
  252 
  253 /*
  254  * Check for write permissions on the specified vnode.
  255  * Prototype text segments cannot be written.
  256  */
  257 int
  258 vn_writechk(vp)
  259         register struct vnode *vp;
  260 {
  261 
  262         ASSERT_VOP_LOCKED(vp, "vn_writechk");
  263         /*
  264          * If there's shared text associated with
  265          * the vnode, try to free it up once.  If
  266          * we fail, we can't allow writing.
  267          */
  268         if (vp->v_vflag & VV_TEXT)
  269                 return (ETXTBSY);
  270 
  271         return (0);
  272 }
  273 
  274 /*
  275  * Vnode close call
  276  */
  277 int
  278 vn_close(vp, flags, file_cred, td)
  279         register struct vnode *vp;
  280         int flags;
  281         struct ucred *file_cred;
  282         struct thread *td;
  283 {
  284         struct mount *mp;
  285         int error, lock_flags;
  286 
  287         if (!(flags & FWRITE) && vp->v_mount != NULL &&
  288             vp->v_mount->mnt_kern_flag & MNTK_EXTENDED_SHARED)
  289                 lock_flags = LK_SHARED;
  290         else
  291                 lock_flags = LK_EXCLUSIVE;
  292 
  293         VFS_ASSERT_GIANT(vp->v_mount);
  294 
  295         vn_start_write(vp, &mp, V_WAIT);
  296         vn_lock(vp, lock_flags | LK_RETRY);
  297         if (flags & FWRITE) {
  298                 VNASSERT(vp->v_writecount > 0, vp, 
  299                     ("vn_close: negative writecount"));
  300                 vp->v_writecount--;
  301         }
  302         error = VOP_CLOSE(vp, flags, file_cred, td);
  303         vput(vp);
  304         vn_finished_write(mp);
  305         return (error);
  306 }
  307 
  308 /*
  309  * Heuristic to detect sequential operation.
  310  */
  311 static int
  312 sequential_heuristic(struct uio *uio, struct file *fp)
  313 {
  314 
  315         if (atomic_load_acq_int(&(fp->f_flag)) & FRDAHEAD)
  316                 return (fp->f_seqcount << IO_SEQSHIFT);
  317 
  318         /*
  319          * Offset 0 is handled specially.  open() sets f_seqcount to 1 so
  320          * that the first I/O is normally considered to be slightly
  321          * sequential.  Seeking to offset 0 doesn't change sequentiality
  322          * unless previous seeks have reduced f_seqcount to 0, in which
  323          * case offset 0 is not special.
  324          */
  325         if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
  326             uio->uio_offset == fp->f_nextoff) {
  327                 /*
  328                  * f_seqcount is in units of fixed-size blocks so that it
  329                  * depends mainly on the amount of sequential I/O and not
  330                  * much on the number of sequential I/O's.  The fixed size
  331                  * of 16384 is hard-coded here since it is (not quite) just
  332                  * a magic size that works well here.  This size is more
  333                  * closely related to the best I/O size for real disks than
  334                  * to any block size used by software.
  335                  */
  336                 fp->f_seqcount += howmany(uio->uio_resid, 16384);
  337                 if (fp->f_seqcount > IO_SEQMAX)
  338                         fp->f_seqcount = IO_SEQMAX;
  339                 return (fp->f_seqcount << IO_SEQSHIFT);
  340         }
  341 
  342         /* Not sequential.  Quickly draw-down sequentiality. */
  343         if (fp->f_seqcount > 1)
  344                 fp->f_seqcount = 1;
  345         else
  346                 fp->f_seqcount = 0;
  347         return (0);
  348 }
  349 
  350 /*
  351  * Package up an I/O request on a vnode into a uio and do it.
  352  */
  353 int
  354 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
  355     aresid, td)
  356         enum uio_rw rw;
  357         struct vnode *vp;
  358         void *base;
  359         int len;
  360         off_t offset;
  361         enum uio_seg segflg;
  362         int ioflg;
  363         struct ucred *active_cred;
  364         struct ucred *file_cred;
  365         int *aresid;
  366         struct thread *td;
  367 {
  368         struct uio auio;
  369         struct iovec aiov;
  370         struct mount *mp;
  371         struct ucred *cred;
  372         int error, lock_flags;
  373 
  374         VFS_ASSERT_GIANT(vp->v_mount);
  375 
  376         if ((ioflg & IO_NODELOCKED) == 0) {
  377                 mp = NULL;
  378                 if (rw == UIO_WRITE) { 
  379                         if (vp->v_type != VCHR &&
  380                             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
  381                             != 0)
  382                                 return (error);
  383                         if (MNT_SHARED_WRITES(mp) ||
  384                             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
  385                                 lock_flags = LK_SHARED;
  386                         } else {
  387                                 lock_flags = LK_EXCLUSIVE;
  388                         }
  389                         vn_lock(vp, lock_flags | LK_RETRY);
  390                 } else
  391                         vn_lock(vp, LK_SHARED | LK_RETRY);
  392 
  393         }
  394         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
  395         auio.uio_iov = &aiov;
  396         auio.uio_iovcnt = 1;
  397         aiov.iov_base = base;
  398         aiov.iov_len = len;
  399         auio.uio_resid = len;
  400         auio.uio_offset = offset;
  401         auio.uio_segflg = segflg;
  402         auio.uio_rw = rw;
  403         auio.uio_td = td;
  404         error = 0;
  405 #ifdef MAC
  406         if ((ioflg & IO_NOMACCHECK) == 0) {
  407                 if (rw == UIO_READ)
  408                         error = mac_vnode_check_read(active_cred, file_cred,
  409                             vp);
  410                 else
  411                         error = mac_vnode_check_write(active_cred, file_cred,
  412                             vp);
  413         }
  414 #endif
  415         if (error == 0) {
  416                 if (file_cred)
  417                         cred = file_cred;
  418                 else
  419                         cred = active_cred;
  420                 if (rw == UIO_READ)
  421                         error = VOP_READ(vp, &auio, ioflg, cred);
  422                 else
  423                         error = VOP_WRITE(vp, &auio, ioflg, cred);
  424         }
  425         if (aresid)
  426                 *aresid = auio.uio_resid;
  427         else
  428                 if (auio.uio_resid && error == 0)
  429                         error = EIO;
  430         if ((ioflg & IO_NODELOCKED) == 0) {
  431                 if (rw == UIO_WRITE && vp->v_type != VCHR)
  432                         vn_finished_write(mp);
  433                 VOP_UNLOCK(vp, 0);
  434         }
  435         return (error);
  436 }
  437 
  438 /*
  439  * Package up an I/O request on a vnode into a uio and do it.  The I/O
  440  * request is split up into smaller chunks and we try to avoid saturating
  441  * the buffer cache while potentially holding a vnode locked, so we 
  442  * check bwillwrite() before calling vn_rdwr().  We also call uio_yield()
  443  * to give other processes a chance to lock the vnode (either other processes
  444  * core'ing the same binary, or unrelated processes scanning the directory).
  445  */
  446 int
  447 vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred,
  448     file_cred, aresid, td)
  449         enum uio_rw rw;
  450         struct vnode *vp;
  451         void *base;
  452         size_t len;
  453         off_t offset;
  454         enum uio_seg segflg;
  455         int ioflg;
  456         struct ucred *active_cred;
  457         struct ucred *file_cred;
  458         size_t *aresid;
  459         struct thread *td;
  460 {
  461         int error = 0;
  462         int iaresid;
  463 
  464         VFS_ASSERT_GIANT(vp->v_mount);
  465 
  466         do {
  467                 int chunk;
  468 
  469                 /*
  470                  * Force `offset' to a multiple of MAXBSIZE except possibly
  471                  * for the first chunk, so that filesystems only need to
  472                  * write full blocks except possibly for the first and last
  473                  * chunks.
  474                  */
  475                 chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE;
  476 
  477                 if (chunk > len)
  478                         chunk = len;
  479                 if (rw != UIO_READ && vp->v_type == VREG)
  480                         bwillwrite();
  481                 iaresid = 0;
  482                 error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
  483                     ioflg, active_cred, file_cred, &iaresid, td);
  484                 len -= chunk;   /* aresid calc already includes length */
  485                 if (error)
  486                         break;
  487                 offset += chunk;
  488                 base = (char *)base + chunk;
  489                 uio_yield();
  490         } while (len);
  491         if (aresid)
  492                 *aresid = len + iaresid;
  493         return (error);
  494 }
  495 
  496 /*
  497  * File table vnode read routine.
  498  */
  499 static int
  500 vn_read(fp, uio, active_cred, flags, td)
  501         struct file *fp;
  502         struct uio *uio;
  503         struct ucred *active_cred;
  504         struct thread *td;
  505         int flags;
  506 {
  507         struct vnode *vp;
  508         int error, ioflag;
  509         struct mtx *mtxp;
  510         int vfslocked;
  511 
  512         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  513             uio->uio_td, td));
  514         mtxp = NULL;
  515         vp = fp->f_vnode;
  516         ioflag = 0;
  517         if (fp->f_flag & FNONBLOCK)
  518                 ioflag |= IO_NDELAY;
  519         if (fp->f_flag & O_DIRECT)
  520                 ioflag |= IO_DIRECT;
  521         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  522         /*
  523          * According to McKusick the vn lock was protecting f_offset here.
  524          * It is now protected by the FOFFSET_LOCKED flag.
  525          */
  526         if ((flags & FOF_OFFSET) == 0) {
  527                 mtxp = mtx_pool_find(mtxpool_sleep, fp);
  528                 mtx_lock(mtxp);
  529                 while(fp->f_vnread_flags & FOFFSET_LOCKED) {
  530                         fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
  531                         msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
  532                             "vnread offlock", 0);
  533                 }
  534                 fp->f_vnread_flags |= FOFFSET_LOCKED;
  535                 mtx_unlock(mtxp);
  536                 vn_lock(vp, LK_SHARED | LK_RETRY);
  537                 uio->uio_offset = fp->f_offset;
  538         } else
  539                 vn_lock(vp, LK_SHARED | LK_RETRY);
  540 
  541         ioflag |= sequential_heuristic(uio, fp);
  542 
  543 #ifdef MAC
  544         error = mac_vnode_check_read(active_cred, fp->f_cred, vp);
  545         if (error == 0)
  546 #endif
  547                 error = VOP_READ(vp, uio, ioflag, fp->f_cred);
  548         if ((flags & FOF_OFFSET) == 0) {
  549                 fp->f_offset = uio->uio_offset;
  550                 mtx_lock(mtxp);
  551                 if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
  552                         wakeup(&fp->f_vnread_flags);
  553                 fp->f_vnread_flags = 0;
  554                 mtx_unlock(mtxp);
  555         }
  556         fp->f_nextoff = uio->uio_offset;
  557         VOP_UNLOCK(vp, 0);
  558         VFS_UNLOCK_GIANT(vfslocked);
  559         return (error);
  560 }
  561 
  562 /*
  563  * File table vnode write routine.
  564  */
  565 static int
  566 vn_write(fp, uio, active_cred, flags, td)
  567         struct file *fp;
  568         struct uio *uio;
  569         struct ucred *active_cred;
  570         struct thread *td;
  571         int flags;
  572 {
  573         struct vnode *vp;
  574         struct mount *mp;
  575         int error, ioflag, lock_flags;
  576         int vfslocked;
  577 
  578         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  579             uio->uio_td, td));
  580         vp = fp->f_vnode;
  581         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  582         if (vp->v_type == VREG)
  583                 bwillwrite();
  584         ioflag = IO_UNIT;
  585         if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
  586                 ioflag |= IO_APPEND;
  587         if (fp->f_flag & FNONBLOCK)
  588                 ioflag |= IO_NDELAY;
  589         if (fp->f_flag & O_DIRECT)
  590                 ioflag |= IO_DIRECT;
  591         if ((fp->f_flag & O_FSYNC) ||
  592             (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
  593                 ioflag |= IO_SYNC;
  594         mp = NULL;
  595         if (vp->v_type != VCHR &&
  596             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
  597                 goto unlock;
  598  
  599         if ((MNT_SHARED_WRITES(mp) ||
  600             ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) &&
  601             (flags & FOF_OFFSET) != 0) {
  602                 lock_flags = LK_SHARED;
  603         } else {
  604                 lock_flags = LK_EXCLUSIVE;
  605         }
  606 
  607         vn_lock(vp, lock_flags | LK_RETRY);
  608         if ((flags & FOF_OFFSET) == 0)
  609                 uio->uio_offset = fp->f_offset;
  610         ioflag |= sequential_heuristic(uio, fp);
  611 #ifdef MAC
  612         error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
  613         if (error == 0)
  614 #endif
  615                 error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
  616         if ((flags & FOF_OFFSET) == 0)
  617                 fp->f_offset = uio->uio_offset;
  618         fp->f_nextoff = uio->uio_offset;
  619         VOP_UNLOCK(vp, 0);
  620         if (vp->v_type != VCHR)
  621                 vn_finished_write(mp);
  622 unlock:
  623         VFS_UNLOCK_GIANT(vfslocked);
  624         return (error);
  625 }
  626 
  627 /*
  628  * File table truncate routine.
  629  */
  630 static int
  631 vn_truncate(fp, length, active_cred, td)
  632         struct file *fp;
  633         off_t length;
  634         struct ucred *active_cred;
  635         struct thread *td;
  636 {
  637         struct vattr vattr;
  638         struct mount *mp;
  639         struct vnode *vp;
  640         int vfslocked;
  641         int error;
  642 
  643         vp = fp->f_vnode;
  644         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  645         error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
  646         if (error) {
  647                 VFS_UNLOCK_GIANT(vfslocked);
  648                 return (error);
  649         }
  650         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  651         if (vp->v_type == VDIR) {
  652                 error = EISDIR;
  653                 goto out;
  654         }
  655 #ifdef MAC
  656         error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
  657         if (error)
  658                 goto out;
  659 #endif
  660         error = vn_writechk(vp);
  661         if (error == 0) {
  662                 VATTR_NULL(&vattr);
  663                 vattr.va_size = length;
  664                 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
  665         }
  666 out:
  667         VOP_UNLOCK(vp, 0);
  668         vn_finished_write(mp);
  669         VFS_UNLOCK_GIANT(vfslocked);
  670         return (error);
  671 }
  672 
  673 /*
  674  * File table vnode stat routine.
  675  */
  676 static int
  677 vn_statfile(fp, sb, active_cred, td)
  678         struct file *fp;
  679         struct stat *sb;
  680         struct ucred *active_cred;
  681         struct thread *td;
  682 {
  683         struct vnode *vp = fp->f_vnode;
  684         int vfslocked;
  685         int error;
  686 
  687         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  688         vn_lock(vp, LK_SHARED | LK_RETRY);
  689         error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
  690         VOP_UNLOCK(vp, 0);
  691         VFS_UNLOCK_GIANT(vfslocked);
  692 
  693         return (error);
  694 }
  695 
  696 /*
  697  * Stat a vnode; implementation for the stat syscall
  698  */
  699 int
  700 vn_stat(vp, sb, active_cred, file_cred, td)
  701         struct vnode *vp;
  702         register struct stat *sb;
  703         struct ucred *active_cred;
  704         struct ucred *file_cred;
  705         struct thread *td;
  706 {
  707         struct vattr vattr;
  708         register struct vattr *vap;
  709         int error;
  710         u_short mode;
  711 
  712 #ifdef MAC
  713         error = mac_vnode_check_stat(active_cred, file_cred, vp);
  714         if (error)
  715                 return (error);
  716 #endif
  717 
  718         vap = &vattr;
  719 
  720         /*
  721          * Initialize defaults for new and unusual fields, so that file
  722          * systems which don't support these fields don't need to know
  723          * about them.
  724          */
  725         vap->va_birthtime.tv_sec = -1;
  726         vap->va_birthtime.tv_nsec = 0;
  727         vap->va_fsid = VNOVAL;
  728         vap->va_rdev = NODEV;
  729 
  730         error = VOP_GETATTR(vp, vap, active_cred);
  731         if (error)
  732                 return (error);
  733 
  734         /*
  735          * Zero the spare stat fields
  736          */
  737         bzero(sb, sizeof *sb);
  738 
  739         /*
  740          * Copy from vattr table
  741          */
  742         if (vap->va_fsid != VNOVAL)
  743                 sb->st_dev = vap->va_fsid;
  744         else
  745                 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
  746         sb->st_ino = vap->va_fileid;
  747         mode = vap->va_mode;
  748         switch (vap->va_type) {
  749         case VREG:
  750                 mode |= S_IFREG;
  751                 break;
  752         case VDIR:
  753                 mode |= S_IFDIR;
  754                 break;
  755         case VBLK:
  756                 mode |= S_IFBLK;
  757                 break;
  758         case VCHR:
  759                 mode |= S_IFCHR;
  760                 break;
  761         case VLNK:
  762                 mode |= S_IFLNK;
  763                 break;
  764         case VSOCK:
  765                 mode |= S_IFSOCK;
  766                 break;
  767         case VFIFO:
  768                 mode |= S_IFIFO;
  769                 break;
  770         default:
  771                 return (EBADF);
  772         };
  773         sb->st_mode = mode;
  774         sb->st_nlink = vap->va_nlink;
  775         sb->st_uid = vap->va_uid;
  776         sb->st_gid = vap->va_gid;
  777         sb->st_rdev = vap->va_rdev;
  778         if (vap->va_size > OFF_MAX)
  779                 return (EOVERFLOW);
  780         sb->st_size = vap->va_size;
  781         sb->st_atimespec = vap->va_atime;
  782         sb->st_mtimespec = vap->va_mtime;
  783         sb->st_ctimespec = vap->va_ctime;
  784         sb->st_birthtimespec = vap->va_birthtime;
  785 
  786         /*
  787          * According to www.opengroup.org, the meaning of st_blksize is 
  788          *   "a filesystem-specific preferred I/O block size for this 
  789          *    object.  In some filesystem types, this may vary from file
  790          *    to file"
  791          * Use miminum/default of PAGE_SIZE (e.g. for VCHR).
  792          */
  793 
  794         sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
  795         
  796         sb->st_flags = vap->va_flags;
  797         if (priv_check(td, PRIV_VFS_GENERATION))
  798                 sb->st_gen = 0;
  799         else
  800                 sb->st_gen = vap->va_gen;
  801 
  802         sb->st_blocks = vap->va_bytes / S_BLKSIZE;
  803         return (0);
  804 }
  805 
  806 /*
  807  * File table vnode ioctl routine.
  808  */
  809 static int
  810 vn_ioctl(fp, com, data, active_cred, td)
  811         struct file *fp;
  812         u_long com;
  813         void *data;
  814         struct ucred *active_cred;
  815         struct thread *td;
  816 {
  817         struct vnode *vp = fp->f_vnode;
  818         struct vattr vattr;
  819         int vfslocked;
  820         int error;
  821 
  822         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  823         error = ENOTTY;
  824         switch (vp->v_type) {
  825         case VREG:
  826         case VDIR:
  827                 if (com == FIONREAD) {
  828                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  829                         error = VOP_GETATTR(vp, &vattr, active_cred);
  830                         VOP_UNLOCK(vp, 0);
  831                         if (!error)
  832                                 *(int *)data = vattr.va_size - fp->f_offset;
  833                 }
  834                 if (com == FIONBIO || com == FIOASYNC)  /* XXX */
  835                         error = 0;
  836                 else
  837                         error = VOP_IOCTL(vp, com, data, fp->f_flag,
  838                             active_cred, td);
  839                 break;
  840 
  841         default:
  842                 break;
  843         }
  844         VFS_UNLOCK_GIANT(vfslocked);
  845         return (error);
  846 }
  847 
  848 /*
  849  * File table vnode poll routine.
  850  */
  851 static int
  852 vn_poll(fp, events, active_cred, td)
  853         struct file *fp;
  854         int events;
  855         struct ucred *active_cred;
  856         struct thread *td;
  857 {
  858         struct vnode *vp;
  859         int vfslocked;
  860         int error;
  861 
  862         vp = fp->f_vnode;
  863         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  864 #ifdef MAC
  865         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  866         error = mac_vnode_check_poll(active_cred, fp->f_cred, vp);
  867         VOP_UNLOCK(vp, 0);
  868         if (!error)
  869 #endif
  870 
  871         error = VOP_POLL(vp, events, fp->f_cred, td);
  872         VFS_UNLOCK_GIANT(vfslocked);
  873         return (error);
  874 }
  875 
  876 /*
  877  * Acquire the requested lock and then check for validity.  LK_RETRY
  878  * permits vn_lock to return doomed vnodes.
  879  */
  880 int
  881 _vn_lock(struct vnode *vp, int flags, char *file, int line)
  882 {
  883         int error;
  884 
  885         VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
  886             ("vn_lock called with no locktype."));
  887         do {
  888 #ifdef DEBUG_VFS_LOCKS
  889                 KASSERT(vp->v_holdcnt != 0,
  890                     ("vn_lock %p: zero hold count", vp));
  891 #endif
  892                 error = VOP_LOCK1(vp, flags, file, line);
  893                 flags &= ~LK_INTERLOCK; /* Interlock is always dropped. */
  894                 KASSERT((flags & LK_RETRY) == 0 || error == 0,
  895                     ("LK_RETRY set with incompatible flags (0x%x) or an error occured (%d)",
  896                     flags, error));
  897                 /*
  898                  * Callers specify LK_RETRY if they wish to get dead vnodes.
  899                  * If RETRY is not set, we return ENOENT instead.
  900                  */
  901                 if (error == 0 && vp->v_iflag & VI_DOOMED &&
  902                     (flags & LK_RETRY) == 0) {
  903                         VOP_UNLOCK(vp, 0);
  904                         error = ENOENT;
  905                         break;
  906                 }
  907         } while (flags & LK_RETRY && error != 0);
  908         return (error);
  909 }
  910 
  911 /*
  912  * File table vnode close routine.
  913  */
  914 static int
  915 vn_closefile(fp, td)
  916         struct file *fp;
  917         struct thread *td;
  918 {
  919         struct vnode *vp;
  920         struct flock lf;
  921         int vfslocked;
  922         int error;
  923 
  924         vp = fp->f_vnode;
  925 
  926         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  927         if (fp->f_type == DTYPE_VNODE && fp->f_flag & FHASLOCK) {
  928                 lf.l_whence = SEEK_SET;
  929                 lf.l_start = 0;
  930                 lf.l_len = 0;
  931                 lf.l_type = F_UNLCK;
  932                 (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
  933         }
  934 
  935         fp->f_ops = &badfileops;
  936 
  937         error = vn_close(vp, fp->f_flag, fp->f_cred, td);
  938         VFS_UNLOCK_GIANT(vfslocked);
  939         return (error);
  940 }
  941 
  942 /*
  943  * Preparing to start a filesystem write operation. If the operation is
  944  * permitted, then we bump the count of operations in progress and
  945  * proceed. If a suspend request is in progress, we wait until the
  946  * suspension is over, and then proceed.
  947  */
  948 int
  949 vn_start_write(vp, mpp, flags)
  950         struct vnode *vp;
  951         struct mount **mpp;
  952         int flags;
  953 {
  954         struct mount *mp;
  955         int error;
  956 
  957         error = 0;
  958         /*
  959          * If a vnode is provided, get and return the mount point that
  960          * to which it will write.
  961          */
  962         if (vp != NULL) {
  963                 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
  964                         *mpp = NULL;
  965                         if (error != EOPNOTSUPP)
  966                                 return (error);
  967                         return (0);
  968                 }
  969         }
  970         if ((mp = *mpp) == NULL)
  971                 return (0);
  972 
  973         /*
  974          * VOP_GETWRITEMOUNT() returns with the mp refcount held through
  975          * a vfs_ref().
  976          * As long as a vnode is not provided we need to acquire a
  977          * refcount for the provided mountpoint too, in order to
  978          * emulate a vfs_ref().
  979          */
  980         MNT_ILOCK(mp);
  981         if (vp == NULL)
  982                 MNT_REF(mp);
  983 
  984         /*
  985          * Check on status of suspension.
  986          */
  987         if ((curthread->td_pflags & TDP_IGNSUSP) == 0 ||
  988             mp->mnt_susp_owner != curthread) {
  989                 while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
  990                         if (flags & V_NOWAIT) {
  991                                 error = EWOULDBLOCK;
  992                                 goto unlock;
  993                         }
  994                         error = msleep(&mp->mnt_flag, MNT_MTX(mp),
  995                             (PUSER - 1) | (flags & PCATCH), "suspfs", 0);
  996                         if (error)
  997                                 goto unlock;
  998                 }
  999         }
 1000         if (flags & V_XSLEEP)
 1001                 goto unlock;
 1002         mp->mnt_writeopcount++;
 1003 unlock:
 1004         if (error != 0 || (flags & V_XSLEEP) != 0)
 1005                 MNT_REL(mp);
 1006         MNT_IUNLOCK(mp);
 1007         return (error);
 1008 }
 1009 
 1010 /*
 1011  * Secondary suspension. Used by operations such as vop_inactive
 1012  * routines that are needed by the higher level functions. These
 1013  * are allowed to proceed until all the higher level functions have
 1014  * completed (indicated by mnt_writeopcount dropping to zero). At that
 1015  * time, these operations are halted until the suspension is over.
 1016  */
 1017 int
 1018 vn_start_secondary_write(vp, mpp, flags)
 1019         struct vnode *vp;
 1020         struct mount **mpp;
 1021         int flags;
 1022 {
 1023         struct mount *mp;
 1024         int error;
 1025 
 1026  retry:
 1027         if (vp != NULL) {
 1028                 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
 1029                         *mpp = NULL;
 1030                         if (error != EOPNOTSUPP)
 1031                                 return (error);
 1032                         return (0);
 1033                 }
 1034         }
 1035         /*
 1036          * If we are not suspended or have not yet reached suspended
 1037          * mode, then let the operation proceed.
 1038          */
 1039         if ((mp = *mpp) == NULL)
 1040                 return (0);
 1041 
 1042         /*
 1043          * VOP_GETWRITEMOUNT() returns with the mp refcount held through
 1044          * a vfs_ref().
 1045          * As long as a vnode is not provided we need to acquire a
 1046          * refcount for the provided mountpoint too, in order to
 1047          * emulate a vfs_ref().
 1048          */
 1049         MNT_ILOCK(mp);
 1050         if (vp == NULL)
 1051                 MNT_REF(mp);
 1052         if ((mp->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND2)) == 0) {
 1053                 mp->mnt_secondary_writes++;
 1054                 mp->mnt_secondary_accwrites++;
 1055                 MNT_IUNLOCK(mp);
 1056                 return (0);
 1057         }
 1058         if (flags & V_NOWAIT) {
 1059                 MNT_REL(mp);
 1060                 MNT_IUNLOCK(mp);
 1061                 return (EWOULDBLOCK);
 1062         }
 1063         /*
 1064          * Wait for the suspension to finish.
 1065          */
 1066         error = msleep(&mp->mnt_flag, MNT_MTX(mp),
 1067                        (PUSER - 1) | (flags & PCATCH) | PDROP, "suspfs", 0);
 1068         vfs_rel(mp);
 1069         if (error == 0)
 1070                 goto retry;
 1071         return (error);
 1072 }
 1073 
 1074 /*
 1075  * Filesystem write operation has completed. If we are suspending and this
 1076  * operation is the last one, notify the suspender that the suspension is
 1077  * now in effect.
 1078  */
 1079 void
 1080 vn_finished_write(mp)
 1081         struct mount *mp;
 1082 {
 1083         if (mp == NULL)
 1084                 return;
 1085         MNT_ILOCK(mp);
 1086         MNT_REL(mp);
 1087         mp->mnt_writeopcount--;
 1088         if (mp->mnt_writeopcount < 0)
 1089                 panic("vn_finished_write: neg cnt");
 1090         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 1091             mp->mnt_writeopcount <= 0)
 1092                 wakeup(&mp->mnt_writeopcount);
 1093         MNT_IUNLOCK(mp);
 1094 }
 1095 
 1096 
 1097 /*
 1098  * Filesystem secondary write operation has completed. If we are
 1099  * suspending and this operation is the last one, notify the suspender
 1100  * that the suspension is now in effect.
 1101  */
 1102 void
 1103 vn_finished_secondary_write(mp)
 1104         struct mount *mp;
 1105 {
 1106         if (mp == NULL)
 1107                 return;
 1108         MNT_ILOCK(mp);
 1109         MNT_REL(mp);
 1110         mp->mnt_secondary_writes--;
 1111         if (mp->mnt_secondary_writes < 0)
 1112                 panic("vn_finished_secondary_write: neg cnt");
 1113         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 1114             mp->mnt_secondary_writes <= 0)
 1115                 wakeup(&mp->mnt_secondary_writes);
 1116         MNT_IUNLOCK(mp);
 1117 }
 1118 
 1119 
 1120 
 1121 /*
 1122  * Request a filesystem to suspend write operations.
 1123  */
 1124 int
 1125 vfs_write_suspend(mp)
 1126         struct mount *mp;
 1127 {
 1128         int error;
 1129 
 1130         MNT_ILOCK(mp);
 1131         if (mp->mnt_susp_owner == curthread) {
 1132                 MNT_IUNLOCK(mp);
 1133                 return (EALREADY);
 1134         }
 1135         while (mp->mnt_kern_flag & MNTK_SUSPEND)
 1136                 msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1, "wsuspfs", 0);
 1137         mp->mnt_kern_flag |= MNTK_SUSPEND;
 1138         mp->mnt_susp_owner = curthread;
 1139         if (mp->mnt_writeopcount > 0)
 1140                 (void) msleep(&mp->mnt_writeopcount, 
 1141                     MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt", 0);
 1142         else
 1143                 MNT_IUNLOCK(mp);
 1144         if ((error = VFS_SYNC(mp, MNT_SUSPEND)) != 0)
 1145                 vfs_write_resume(mp);
 1146         return (error);
 1147 }
 1148 
 1149 /*
 1150  * Request a filesystem to resume write operations.
 1151  */
 1152 void
 1153 vfs_write_resume(mp)
 1154         struct mount *mp;
 1155 {
 1156 
 1157         MNT_ILOCK(mp);
 1158         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
 1159                 KASSERT(mp->mnt_susp_owner == curthread, ("mnt_susp_owner"));
 1160                 mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPEND2 |
 1161                                        MNTK_SUSPENDED);
 1162                 mp->mnt_susp_owner = NULL;
 1163                 wakeup(&mp->mnt_writeopcount);
 1164                 wakeup(&mp->mnt_flag);
 1165                 curthread->td_pflags &= ~TDP_IGNSUSP;
 1166                 MNT_IUNLOCK(mp);
 1167                 VFS_SUSP_CLEAN(mp);
 1168         } else
 1169                 MNT_IUNLOCK(mp);
 1170 }
 1171 
 1172 /*
 1173  * Implement kqueues for files by translating it to vnode operation.
 1174  */
 1175 static int
 1176 vn_kqfilter(struct file *fp, struct knote *kn)
 1177 {
 1178         int vfslocked;
 1179         int error;
 1180 
 1181         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 1182         error = VOP_KQFILTER(fp->f_vnode, kn);
 1183         VFS_UNLOCK_GIANT(vfslocked);
 1184 
 1185         return error;
 1186 }
 1187 
 1188 /*
 1189  * Simplified in-kernel wrapper calls for extended attribute access.
 1190  * Both calls pass in a NULL credential, authorizing as "kernel" access.
 1191  * Set IO_NODELOCKED in ioflg if the vnode is already locked.
 1192  */
 1193 int
 1194 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
 1195     const char *attrname, int *buflen, char *buf, struct thread *td)
 1196 {
 1197         struct uio      auio;
 1198         struct iovec    iov;
 1199         int     error;
 1200 
 1201         iov.iov_len = *buflen;
 1202         iov.iov_base = buf;
 1203 
 1204         auio.uio_iov = &iov;
 1205         auio.uio_iovcnt = 1;
 1206         auio.uio_rw = UIO_READ;
 1207         auio.uio_segflg = UIO_SYSSPACE;
 1208         auio.uio_td = td;
 1209         auio.uio_offset = 0;
 1210         auio.uio_resid = *buflen;
 1211 
 1212         if ((ioflg & IO_NODELOCKED) == 0)
 1213                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1214 
 1215         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1216 
 1217         /* authorize attribute retrieval as kernel */
 1218         error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL,
 1219             td);
 1220 
 1221         if ((ioflg & IO_NODELOCKED) == 0)
 1222                 VOP_UNLOCK(vp, 0);
 1223 
 1224         if (error == 0) {
 1225                 *buflen = *buflen - auio.uio_resid;
 1226         }
 1227 
 1228         return (error);
 1229 }
 1230 
 1231 /*
 1232  * XXX failure mode if partially written?
 1233  */
 1234 int
 1235 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
 1236     const char *attrname, int buflen, char *buf, struct thread *td)
 1237 {
 1238         struct uio      auio;
 1239         struct iovec    iov;
 1240         struct mount    *mp;
 1241         int     error;
 1242 
 1243         iov.iov_len = buflen;
 1244         iov.iov_base = buf;
 1245 
 1246         auio.uio_iov = &iov;
 1247         auio.uio_iovcnt = 1;
 1248         auio.uio_rw = UIO_WRITE;
 1249         auio.uio_segflg = UIO_SYSSPACE;
 1250         auio.uio_td = td;
 1251         auio.uio_offset = 0;
 1252         auio.uio_resid = buflen;
 1253 
 1254         if ((ioflg & IO_NODELOCKED) == 0) {
 1255                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1256                         return (error);
 1257                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1258         }
 1259 
 1260         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1261 
 1262         /* authorize attribute setting as kernel */
 1263         error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td);
 1264 
 1265         if ((ioflg & IO_NODELOCKED) == 0) {
 1266                 vn_finished_write(mp);
 1267                 VOP_UNLOCK(vp, 0);
 1268         }
 1269 
 1270         return (error);
 1271 }
 1272 
 1273 int
 1274 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
 1275     const char *attrname, struct thread *td)
 1276 {
 1277         struct mount    *mp;
 1278         int     error;
 1279 
 1280         if ((ioflg & IO_NODELOCKED) == 0) {
 1281                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1282                         return (error);
 1283                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1284         }
 1285 
 1286         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1287 
 1288         /* authorize attribute removal as kernel */
 1289         error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL, td);
 1290         if (error == EOPNOTSUPP)
 1291                 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
 1292                     NULL, td);
 1293 
 1294         if ((ioflg & IO_NODELOCKED) == 0) {
 1295                 vn_finished_write(mp);
 1296                 VOP_UNLOCK(vp, 0);
 1297         }
 1298 
 1299         return (error);
 1300 }
 1301 
 1302 int
 1303 vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags, struct vnode **rvp)
 1304 {
 1305         struct mount *mp;
 1306         int ltype, error;
 1307 
 1308         mp = vp->v_mount;
 1309         ltype = VOP_ISLOCKED(vp);
 1310         KASSERT(ltype == LK_EXCLUSIVE || ltype == LK_SHARED,
 1311             ("vn_vget_ino: vp not locked"));
 1312         error = vfs_busy(mp, MBF_NOWAIT);
 1313         if (error != 0) {
 1314                 vfs_ref(mp);
 1315                 VOP_UNLOCK(vp, 0);
 1316                 error = vfs_busy(mp, 0);
 1317                 vn_lock(vp, ltype | LK_RETRY);
 1318                 vfs_rel(mp);
 1319                 if (error != 0)
 1320                         return (ENOENT);
 1321                 if (vp->v_iflag & VI_DOOMED) {
 1322                         vfs_unbusy(mp);
 1323                         return (ENOENT);
 1324                 }
 1325         }
 1326         VOP_UNLOCK(vp, 0);
 1327         error = VFS_VGET(mp, ino, lkflags, rvp);
 1328         vfs_unbusy(mp);
 1329         vn_lock(vp, ltype | LK_RETRY);
 1330         if (vp->v_iflag & VI_DOOMED) {
 1331                 if (error == 0)
 1332                         vput(*rvp);
 1333                 error = ENOENT;
 1334         }
 1335         return (error);
 1336 }

Cache object: 3e5c146666bf5f410563b68ce0f416e1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.