vfs_vnops.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD$");
   39 
   40 #include "opt_mac.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/fcntl.h>
   45 #include <sys/file.h>
   46 #include <sys/kdb.h>
   47 #include <sys/stat.h>
   48 #include <sys/priv.h>
   49 #include <sys/proc.h>
   50 #include <sys/limits.h>
   51 #include <sys/lock.h>
   52 #include <sys/mount.h>
   53 #include <sys/mutex.h>
   54 #include <sys/namei.h>
   55 #include <sys/vnode.h>
   56 #include <sys/bio.h>
   57 #include <sys/buf.h>
   58 #include <sys/filio.h>
   59 #include <sys/sx.h>
   60 #include <sys/ttycom.h>
   61 #include <sys/conf.h>
   62 #include <sys/syslog.h>
   63 #include <sys/unistd.h>
   64 
   65 #include <security/mac/mac_framework.h>
   66 
   67 static fo_rdwr_t        vn_read;
   68 static fo_rdwr_t        vn_write;
   69 static fo_ioctl_t       vn_ioctl;
   70 static fo_poll_t        vn_poll;
   71 static fo_kqfilter_t    vn_kqfilter;
   72 static fo_stat_t        vn_statfile;
   73 static fo_close_t       vn_closefile;
   74 
   75 struct  fileops vnops = {
   76         .fo_read = vn_read,
   77         .fo_write = vn_write,
   78         .fo_ioctl = vn_ioctl,
   79         .fo_poll = vn_poll,
   80         .fo_kqfilter = vn_kqfilter,
   81         .fo_stat = vn_statfile,
   82         .fo_close = vn_closefile,
   83         .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
   84 };
   85 
   86 int
   87 vn_open(ndp, flagp, cmode, fp)
   88         struct nameidata *ndp;
   89         int *flagp, cmode;
   90         struct file *fp;
   91 {
   92         struct thread *td = ndp->ni_cnd.cn_thread;
   93 
   94         return (vn_open_cred(ndp, flagp, cmode, td->td_ucred, fp));
   95 }
   96 
   97 /*
   98  * Common code for vnode open operations.
   99  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
  100  * 
  101  * Note that this does NOT free nameidata for the successful case,
  102  * due to the NDINIT being done elsewhere.
  103  */
  104 int
  105 vn_open_cred(ndp, flagp, cmode, cred, fp)
  106         struct nameidata *ndp;
  107         int *flagp, cmode;
  108         struct ucred *cred;
  109         struct file *fp;
  110 {
  111         struct vnode *vp;
  112         struct mount *mp;
  113         struct thread *td = ndp->ni_cnd.cn_thread;
  114         struct vattr vat;
  115         struct vattr *vap = &vat;
  116         int mode, fmode, error;
  117         int vfslocked, mpsafe;
  118 
  119         mpsafe = ndp->ni_cnd.cn_flags & MPSAFE;
  120 restart:
  121         vfslocked = 0;
  122         fmode = *flagp;
  123         if (fmode & O_CREAT) {
  124                 ndp->ni_cnd.cn_nameiop = CREATE;
  125                 ndp->ni_cnd.cn_flags = ISOPEN | LOCKPARENT | LOCKLEAF |
  126                     MPSAFE | AUDITVNODE1;
  127                 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
  128                         ndp->ni_cnd.cn_flags |= FOLLOW;
  129                 bwillwrite();
  130                 if ((error = namei(ndp)) != 0)
  131                         return (error);
  132                 vfslocked = NDHASGIANT(ndp);
  133                 if (!mpsafe)
  134                         ndp->ni_cnd.cn_flags &= ~MPSAFE;
  135                 if (ndp->ni_vp == NULL) {
  136                         VATTR_NULL(vap);
  137                         vap->va_type = VREG;
  138                         vap->va_mode = cmode;
  139                         if (fmode & O_EXCL)
  140                                 vap->va_vaflags |= VA_EXCLUSIVE;
  141                         if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
  142                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  143                                 vput(ndp->ni_dvp);
  144                                 VFS_UNLOCK_GIANT(vfslocked);
  145                                 if ((error = vn_start_write(NULL, &mp,
  146                                     V_XSLEEP | PCATCH)) != 0)
  147                                         return (error);
  148                                 goto restart;
  149                         }
  150 #ifdef MAC
  151                         error = mac_check_vnode_create(cred, ndp->ni_dvp,
  152                             &ndp->ni_cnd, vap);
  153                         if (error == 0) {
  154 #endif
  155                                 VOP_LEASE(ndp->ni_dvp, td, cred, LEASE_WRITE);
  156                                 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
  157                                                    &ndp->ni_cnd, vap);
  158 #ifdef MAC
  159                         }
  160 #endif
  161                         vput(ndp->ni_dvp);
  162                         vn_finished_write(mp);
  163                         if (error) {
  164                                 VFS_UNLOCK_GIANT(vfslocked);
  165                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  166                                 return (error);
  167                         }
  168                         fmode &= ~O_TRUNC;
  169                         vp = ndp->ni_vp;
  170                 } else {
  171                         if (ndp->ni_dvp == ndp->ni_vp)
  172                                 vrele(ndp->ni_dvp);
  173                         else
  174                                 vput(ndp->ni_dvp);
  175                         ndp->ni_dvp = NULL;
  176                         vp = ndp->ni_vp;
  177                         if (fmode & O_EXCL) {
  178                                 error = EEXIST;
  179                                 goto bad;
  180                         }
  181                         fmode &= ~O_CREAT;
  182                 }
  183         } else {
  184                 ndp->ni_cnd.cn_nameiop = LOOKUP;
  185                 ndp->ni_cnd.cn_flags = ISOPEN |
  186                     ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) |
  187                     LOCKLEAF | MPSAFE | AUDITVNODE1;
  188                 if ((error = namei(ndp)) != 0)
  189                         return (error);
  190                 if (!mpsafe)
  191                         ndp->ni_cnd.cn_flags &= ~MPSAFE;
  192                 vfslocked = NDHASGIANT(ndp);
  193                 vp = ndp->ni_vp;
  194         }
  195         if (vp->v_type == VLNK) {
  196                 error = EMLINK;
  197                 goto bad;
  198         }
  199         if (vp->v_type == VSOCK) {
  200                 error = EOPNOTSUPP;
  201                 goto bad;
  202         }
  203         mode = 0;
  204         if (fmode & (FWRITE | O_TRUNC)) {
  205                 if (vp->v_type == VDIR) {
  206                         error = EISDIR;
  207                         goto bad;
  208                 }
  209                 mode |= VWRITE;
  210         }
  211         if (fmode & FREAD)
  212                 mode |= VREAD;
  213         if (fmode & O_APPEND)
  214                 mode |= VAPPEND;
  215 #ifdef MAC
  216         error = mac_check_vnode_open(cred, vp, mode);
  217         if (error)
  218                 goto bad;
  219 #endif
  220         if ((fmode & O_CREAT) == 0) {
  221                 if (mode & VWRITE) {
  222                         error = vn_writechk(vp);
  223                         if (error)
  224                                 goto bad;
  225                 }
  226                 if (mode) {
  227                         error = VOP_ACCESS(vp, mode, cred, td);
  228                         if (error)
  229                                 goto bad;
  230                 }
  231         }
  232         if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0)
  233                 goto bad;
  234 
  235         if (fmode & FWRITE)
  236                 vp->v_writecount++;
  237         *flagp = fmode;
  238         ASSERT_VOP_ELOCKED(vp, "vn_open_cred");
  239         if (!mpsafe)
  240                 VFS_UNLOCK_GIANT(vfslocked);
  241         return (0);
  242 bad:
  243         NDFREE(ndp, NDF_ONLY_PNBUF);
  244         vput(vp);
  245         VFS_UNLOCK_GIANT(vfslocked);
  246         *flagp = fmode;
  247         ndp->ni_vp = NULL;
  248         return (error);
  249 }
  250 
  251 /*
  252  * Check for write permissions on the specified vnode.
  253  * Prototype text segments cannot be written.
  254  */
  255 int
  256 vn_writechk(vp)
  257         register struct vnode *vp;
  258 {
  259 
  260         ASSERT_VOP_LOCKED(vp, "vn_writechk");
  261         /*
  262          * If there's shared text associated with
  263          * the vnode, try to free it up once.  If
  264          * we fail, we can't allow writing.
  265          */
  266         if (vp->v_vflag & VV_TEXT)
  267                 return (ETXTBSY);
  268 
  269         return (0);
  270 }
  271 
  272 /*
  273  * Vnode close call
  274  */
  275 int
  276 vn_close(vp, flags, file_cred, td)
  277         register struct vnode *vp;
  278         int flags;
  279         struct ucred *file_cred;
  280         struct thread *td;
  281 {
  282         struct mount *mp;
  283         int error;
  284 
  285         VFS_ASSERT_GIANT(vp->v_mount);
  286 
  287         vn_start_write(vp, &mp, V_WAIT);
  288         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  289         if (flags & FWRITE) {
  290                 VNASSERT(vp->v_writecount > 0, vp, 
  291                     ("vn_close: negative writecount"));
  292                 vp->v_writecount--;
  293         }
  294         error = VOP_CLOSE(vp, flags, file_cred, td);
  295         vput(vp);
  296         vn_finished_write(mp);
  297         return (error);
  298 }
  299 
  300 /*
  301  * Sequential heuristic - detect sequential operation
  302  */
  303 static __inline
  304 int
  305 sequential_heuristic(struct uio *uio, struct file *fp)
  306 {
  307 
  308         if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
  309             uio->uio_offset == fp->f_nextoff) {
  310                 /*
  311                  * XXX we assume that the filesystem block size is
  312                  * the default.  Not true, but still gives us a pretty
  313                  * good indicator of how sequential the read operations
  314                  * are.
  315                  */
  316                 fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
  317                 if (fp->f_seqcount > IO_SEQMAX)
  318                         fp->f_seqcount = IO_SEQMAX;
  319                 return(fp->f_seqcount << IO_SEQSHIFT);
  320         }
  321 
  322         /*
  323          * Not sequential, quick draw-down of seqcount
  324          */
  325         if (fp->f_seqcount > 1)
  326                 fp->f_seqcount = 1;
  327         else
  328                 fp->f_seqcount = 0;
  329         return(0);
  330 }
  331 
  332 /*
  333  * Package up an I/O request on a vnode into a uio and do it.
  334  */
  335 int
  336 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
  337     aresid, td)
  338         enum uio_rw rw;
  339         struct vnode *vp;
  340         void *base;
  341         int len;
  342         off_t offset;
  343         enum uio_seg segflg;
  344         int ioflg;
  345         struct ucred *active_cred;
  346         struct ucred *file_cred;
  347         int *aresid;
  348         struct thread *td;
  349 {
  350         struct uio auio;
  351         struct iovec aiov;
  352         struct mount *mp;
  353         struct ucred *cred;
  354         int error;
  355 
  356         VFS_ASSERT_GIANT(vp->v_mount);
  357 
  358         if ((ioflg & IO_NODELOCKED) == 0) {
  359                 mp = NULL;
  360                 if (rw == UIO_WRITE) { 
  361                         if (vp->v_type != VCHR &&
  362                             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
  363                             != 0)
  364                                 return (error);
  365                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  366                 } else {
  367                         /*
  368                          * XXX This should be LK_SHARED but I don't trust VFS
  369                          * enough to leave it like that until it has been
  370                          * reviewed further.
  371                          */
  372                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  373                 }
  374 
  375         }
  376         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
  377         auio.uio_iov = &aiov;
  378         auio.uio_iovcnt = 1;
  379         aiov.iov_base = base;
  380         aiov.iov_len = len;
  381         auio.uio_resid = len;
  382         auio.uio_offset = offset;
  383         auio.uio_segflg = segflg;
  384         auio.uio_rw = rw;
  385         auio.uio_td = td;
  386         error = 0;
  387 #ifdef MAC
  388         if ((ioflg & IO_NOMACCHECK) == 0) {
  389                 if (rw == UIO_READ)
  390                         error = mac_check_vnode_read(active_cred, file_cred,
  391                             vp);
  392                 else
  393                         error = mac_check_vnode_write(active_cred, file_cred,
  394                             vp);
  395         }
  396 #endif
  397         if (error == 0) {
  398                 if (file_cred)
  399                         cred = file_cred;
  400                 else
  401                         cred = active_cred;
  402                 if (rw == UIO_READ)
  403                         error = VOP_READ(vp, &auio, ioflg, cred);
  404                 else
  405                         error = VOP_WRITE(vp, &auio, ioflg, cred);
  406         }
  407         if (aresid)
  408                 *aresid = auio.uio_resid;
  409         else
  410                 if (auio.uio_resid && error == 0)
  411                         error = EIO;
  412         if ((ioflg & IO_NODELOCKED) == 0) {
  413                 if (rw == UIO_WRITE && vp->v_type != VCHR)
  414                         vn_finished_write(mp);
  415                 VOP_UNLOCK(vp, 0, td);
  416         }
  417         return (error);
  418 }
  419 
  420 /*
  421  * Package up an I/O request on a vnode into a uio and do it.  The I/O
  422  * request is split up into smaller chunks and we try to avoid saturating
  423  * the buffer cache while potentially holding a vnode locked, so we 
  424  * check bwillwrite() before calling vn_rdwr().  We also call uio_yield()
  425  * to give other processes a chance to lock the vnode (either other processes
  426  * core'ing the same binary, or unrelated processes scanning the directory).
  427  */
  428 int
  429 vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred,
  430     file_cred, aresid, td)
  431         enum uio_rw rw;
  432         struct vnode *vp;
  433         void *base;
  434         size_t len;
  435         off_t offset;
  436         enum uio_seg segflg;
  437         int ioflg;
  438         struct ucred *active_cred;
  439         struct ucred *file_cred;
  440         size_t *aresid;
  441         struct thread *td;
  442 {
  443         int error = 0;
  444         int iaresid;
  445 
  446         VFS_ASSERT_GIANT(vp->v_mount);
  447 
  448         do {
  449                 int chunk;
  450 
  451                 /*
  452                  * Force `offset' to a multiple of MAXBSIZE except possibly
  453                  * for the first chunk, so that filesystems only need to
  454                  * write full blocks except possibly for the first and last
  455                  * chunks.
  456                  */
  457                 chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE;
  458 
  459                 if (chunk > len)
  460                         chunk = len;
  461                 if (rw != UIO_READ && vp->v_type == VREG)
  462                         bwillwrite();
  463                 iaresid = 0;
  464                 error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
  465                     ioflg, active_cred, file_cred, &iaresid, td);
  466                 len -= chunk;   /* aresid calc already includes length */
  467                 if (error)
  468                         break;
  469                 offset += chunk;
  470                 base = (char *)base + chunk;
  471                 uio_yield();
  472         } while (len);
  473         if (aresid)
  474                 *aresid = len + iaresid;
  475         return (error);
  476 }
  477 
  478 /*
  479  * File table vnode read routine.
  480  */
  481 static int
  482 vn_read(fp, uio, active_cred, flags, td)
  483         struct file *fp;
  484         struct uio *uio;
  485         struct ucred *active_cred;
  486         struct thread *td;
  487         int flags;
  488 {
  489         struct vnode *vp;
  490         int error, ioflag;
  491         int vfslocked;
  492 
  493         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  494             uio->uio_td, td));
  495         vp = fp->f_vnode;
  496         ioflag = 0;
  497         if (fp->f_flag & FNONBLOCK)
  498                 ioflag |= IO_NDELAY;
  499         if (fp->f_flag & O_DIRECT)
  500                 ioflag |= IO_DIRECT;
  501         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  502         VOP_LEASE(vp, td, fp->f_cred, LEASE_READ);
  503         /*
  504          * According to McKusick the vn lock was protecting f_offset here.
  505          * It is now protected by the FOFFSET_LOCKED flag.
  506          */
  507         if ((flags & FOF_OFFSET) == 0) {
  508                 FILE_LOCK(fp);
  509                 while(fp->f_vnread_flags & FOFFSET_LOCKED) {
  510                         fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
  511                         msleep(&fp->f_vnread_flags,fp->f_mtxp,PUSER -1,"vnread offlock",0);
  512                 }
  513                 fp->f_vnread_flags |= FOFFSET_LOCKED;
  514                 FILE_UNLOCK(fp);
  515                 vn_lock(vp, LK_SHARED | LK_RETRY, td);
  516                 uio->uio_offset = fp->f_offset;
  517         } else
  518                 vn_lock(vp, LK_SHARED | LK_RETRY, td);
  519 
  520         ioflag |= sequential_heuristic(uio, fp);
  521 
  522 #ifdef MAC
  523         error = mac_check_vnode_read(active_cred, fp->f_cred, vp);
  524         if (error == 0)
  525 #endif
  526                 error = VOP_READ(vp, uio, ioflag, fp->f_cred);
  527         if ((flags & FOF_OFFSET) == 0) {
  528                 fp->f_offset = uio->uio_offset;
  529                 FILE_LOCK(fp);
  530                 if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
  531                         wakeup(&fp->f_vnread_flags);
  532                 fp->f_vnread_flags = 0;
  533                 FILE_UNLOCK(fp);
  534         }
  535         fp->f_nextoff = uio->uio_offset;
  536         VOP_UNLOCK(vp, 0, td);
  537         VFS_UNLOCK_GIANT(vfslocked);
  538         return (error);
  539 }
  540 
  541 /*
  542  * File table vnode write routine.
  543  */
  544 static int
  545 vn_write(fp, uio, active_cred, flags, td)
  546         struct file *fp;
  547         struct uio *uio;
  548         struct ucred *active_cred;
  549         struct thread *td;
  550         int flags;
  551 {
  552         struct vnode *vp;
  553         struct mount *mp;
  554         int error, ioflag;
  555         int vfslocked;
  556 
  557         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  558             uio->uio_td, td));
  559         vp = fp->f_vnode;
  560         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  561         if (vp->v_type == VREG)
  562                 bwillwrite();
  563         ioflag = IO_UNIT;
  564         if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
  565                 ioflag |= IO_APPEND;
  566         if (fp->f_flag & FNONBLOCK)
  567                 ioflag |= IO_NDELAY;
  568         if (fp->f_flag & O_DIRECT)
  569                 ioflag |= IO_DIRECT;
  570         if ((fp->f_flag & O_FSYNC) ||
  571             (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
  572                 ioflag |= IO_SYNC;
  573         mp = NULL;
  574         if (vp->v_type != VCHR &&
  575             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
  576                 goto unlock;
  577         VOP_LEASE(vp, td, fp->f_cred, LEASE_WRITE);
  578         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  579         if ((flags & FOF_OFFSET) == 0)
  580                 uio->uio_offset = fp->f_offset;
  581         ioflag |= sequential_heuristic(uio, fp);
  582 #ifdef MAC
  583         error = mac_check_vnode_write(active_cred, fp->f_cred, vp);
  584         if (error == 0)
  585 #endif
  586                 error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
  587         if ((flags & FOF_OFFSET) == 0)
  588                 fp->f_offset = uio->uio_offset;
  589         fp->f_nextoff = uio->uio_offset;
  590         VOP_UNLOCK(vp, 0, td);
  591         if (vp->v_type != VCHR)
  592                 vn_finished_write(mp);
  593 unlock:
  594         VFS_UNLOCK_GIANT(vfslocked);
  595         return (error);
  596 }
  597 
  598 /*
  599  * File table vnode stat routine.
  600  */
  601 static int
  602 vn_statfile(fp, sb, active_cred, td)
  603         struct file *fp;
  604         struct stat *sb;
  605         struct ucred *active_cred;
  606         struct thread *td;
  607 {
  608         struct vnode *vp = fp->f_vnode;
  609         int vfslocked;
  610         int error;
  611 
  612         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  613         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  614         error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
  615         VOP_UNLOCK(vp, 0, td);
  616         VFS_UNLOCK_GIANT(vfslocked);
  617 
  618         return (error);
  619 }
  620 
  621 /*
  622  * Stat a vnode; implementation for the stat syscall
  623  */
  624 int
  625 vn_stat(vp, sb, active_cred, file_cred, td)
  626         struct vnode *vp;
  627         register struct stat *sb;
  628         struct ucred *active_cred;
  629         struct ucred *file_cred;
  630         struct thread *td;
  631 {
  632         struct vattr vattr;
  633         register struct vattr *vap;
  634         int error;
  635         u_short mode;
  636 
  637 #ifdef MAC
  638         error = mac_check_vnode_stat(active_cred, file_cred, vp);
  639         if (error)
  640                 return (error);
  641 #endif
  642 
  643         vap = &vattr;
  644         error = VOP_GETATTR(vp, vap, active_cred, td);
  645         if (error)
  646                 return (error);
  647 
  648         /*
  649          * Zero the spare stat fields
  650          */
  651         bzero(sb, sizeof *sb);
  652 
  653         /*
  654          * Copy from vattr table
  655          */
  656         if (vap->va_fsid != VNOVAL)
  657                 sb->st_dev = vap->va_fsid;
  658         else
  659                 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
  660         sb->st_ino = vap->va_fileid;
  661         mode = vap->va_mode;
  662         switch (vap->va_type) {
  663         case VREG:
  664                 mode |= S_IFREG;
  665                 break;
  666         case VDIR:
  667                 mode |= S_IFDIR;
  668                 break;
  669         case VBLK:
  670                 mode |= S_IFBLK;
  671                 break;
  672         case VCHR:
  673                 mode |= S_IFCHR;
  674                 break;
  675         case VLNK:
  676                 mode |= S_IFLNK;
  677                 /* This is a cosmetic change, symlinks do not have a mode. */
  678                 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW)
  679                         sb->st_mode &= ~ACCESSPERMS;    /* 0000 */
  680                 else
  681                         sb->st_mode |= ACCESSPERMS;     /* 0777 */
  682                 break;
  683         case VSOCK:
  684                 mode |= S_IFSOCK;
  685                 break;
  686         case VFIFO:
  687                 mode |= S_IFIFO;
  688                 break;
  689         default:
  690                 return (EBADF);
  691         };
  692         sb->st_mode = mode;
  693         sb->st_nlink = vap->va_nlink;
  694         sb->st_uid = vap->va_uid;
  695         sb->st_gid = vap->va_gid;
  696         sb->st_rdev = vap->va_rdev;
  697         if (vap->va_size > OFF_MAX)
  698                 return (EOVERFLOW);
  699         sb->st_size = vap->va_size;
  700         sb->st_atimespec = vap->va_atime;
  701         sb->st_mtimespec = vap->va_mtime;
  702         sb->st_ctimespec = vap->va_ctime;
  703         sb->st_birthtimespec = vap->va_birthtime;
  704 
  705         /*
  706          * According to www.opengroup.org, the meaning of st_blksize is 
  707          *   "a filesystem-specific preferred I/O block size for this 
  708          *    object.  In some filesystem types, this may vary from file
  709          *    to file"
  710          * Default to PAGE_SIZE after much discussion.
  711          * XXX: min(PAGE_SIZE, vp->v_bufobj.bo_bsize) may be more correct.
  712          */
  713 
  714         sb->st_blksize = PAGE_SIZE;
  715         
  716         sb->st_flags = vap->va_flags;
  717         if (priv_check(td, PRIV_VFS_GENERATION))
  718                 sb->st_gen = 0;
  719         else
  720                 sb->st_gen = vap->va_gen;
  721 
  722         sb->st_blocks = vap->va_bytes / S_BLKSIZE;
  723         return (0);
  724 }
  725 
  726 /*
  727  * File table vnode ioctl routine.
  728  */
  729 static int
  730 vn_ioctl(fp, com, data, active_cred, td)
  731         struct file *fp;
  732         u_long com;
  733         void *data;
  734         struct ucred *active_cred;
  735         struct thread *td;
  736 {
  737         struct vnode *vp = fp->f_vnode;
  738         struct vattr vattr;
  739         int vfslocked;
  740         int error;
  741 
  742         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  743         error = ENOTTY;
  744         switch (vp->v_type) {
  745         case VREG:
  746         case VDIR:
  747                 if (com == FIONREAD) {
  748                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  749                         error = VOP_GETATTR(vp, &vattr, active_cred, td);
  750                         VOP_UNLOCK(vp, 0, td);
  751                         if (!error)
  752                                 *(int *)data = vattr.va_size - fp->f_offset;
  753                 }
  754                 if (com == FIONBIO || com == FIOASYNC)  /* XXX */
  755                         error = 0;
  756                 else
  757                         error = VOP_IOCTL(vp, com, data, fp->f_flag,
  758                             active_cred, td);
  759                 break;
  760 
  761         default:
  762                 break;
  763         }
  764         VFS_UNLOCK_GIANT(vfslocked);
  765         return (error);
  766 }
  767 
  768 /*
  769  * File table vnode poll routine.
  770  */
  771 static int
  772 vn_poll(fp, events, active_cred, td)
  773         struct file *fp;
  774         int events;
  775         struct ucred *active_cred;
  776         struct thread *td;
  777 {
  778         struct vnode *vp;
  779         int vfslocked;
  780         int error;
  781 
  782         vp = fp->f_vnode;
  783         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  784 #ifdef MAC
  785         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  786         error = mac_check_vnode_poll(active_cred, fp->f_cred, vp);
  787         VOP_UNLOCK(vp, 0, td);
  788         if (!error)
  789 #endif
  790 
  791         error = VOP_POLL(vp, events, fp->f_cred, td);
  792         VFS_UNLOCK_GIANT(vfslocked);
  793         return (error);
  794 }
  795 
  796 /*
  797  * Check that the vnode is still valid, and if so
  798  * acquire requested lock.
  799  */
  800 int
  801 _vn_lock(struct vnode *vp, int flags, struct thread *td, char *file, int line)
  802 {
  803         int error;
  804 
  805         do {
  806                 if ((flags & LK_INTERLOCK) == 0)
  807                         VI_LOCK(vp);
  808                 if ((flags & LK_NOWAIT || (flags & LK_TYPE_MASK) == 0) &&
  809                     vp->v_iflag & VI_DOOMED) {
  810                         VI_UNLOCK(vp);
  811                         return (ENOENT);
  812                 }
  813                 /*
  814                  * Just polling to check validity.
  815                  */
  816                 if ((flags & LK_TYPE_MASK) == 0) {
  817                         VI_UNLOCK(vp);
  818                         return (0);
  819                 }
  820                 /*
  821                  * lockmgr drops interlock before it will return for
  822                  * any reason.  So force the code above to relock it.
  823                  */
  824                 error = VOP_LOCK1(vp, flags | LK_INTERLOCK, td, file, line);
  825                 flags &= ~LK_INTERLOCK;
  826                 KASSERT((flags & LK_RETRY) == 0 || error == 0,
  827                     ("LK_RETRY set with incompatible flags %d\n", flags));
  828                 /*
  829                  * Callers specify LK_RETRY if they wish to get dead vnodes.
  830                  * If RETRY is not set, we return ENOENT instead.
  831                  */
  832                 if (error == 0 && vp->v_iflag & VI_DOOMED &&
  833                     (flags & LK_RETRY) == 0) {
  834                         VOP_UNLOCK(vp, 0, td);
  835                         error = ENOENT;
  836                         break;
  837                 }
  838         } while (flags & LK_RETRY && error != 0);
  839         return (error);
  840 }
  841 
  842 /*
  843  * File table vnode close routine.
  844  */
  845 static int
  846 vn_closefile(fp, td)
  847         struct file *fp;
  848         struct thread *td;
  849 {
  850         struct vnode *vp;
  851         struct flock lf;
  852         int vfslocked;
  853         int error;
  854 
  855         vp = fp->f_vnode;
  856 
  857         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  858         if (fp->f_type == DTYPE_VNODE && fp->f_flag & FHASLOCK) {
  859                 lf.l_whence = SEEK_SET;
  860                 lf.l_start = 0;
  861                 lf.l_len = 0;
  862                 lf.l_type = F_UNLCK;
  863                 (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
  864         }
  865 
  866         fp->f_ops = &badfileops;
  867 
  868         error = vn_close(vp, fp->f_flag, fp->f_cred, td);
  869         VFS_UNLOCK_GIANT(vfslocked);
  870         return (error);
  871 }
  872 
  873 /*
  874  * Preparing to start a filesystem write operation. If the operation is
  875  * permitted, then we bump the count of operations in progress and
  876  * proceed. If a suspend request is in progress, we wait until the
  877  * suspension is over, and then proceed.
  878  */
  879 int
  880 vn_start_write(vp, mpp, flags)
  881         struct vnode *vp;
  882         struct mount **mpp;
  883         int flags;
  884 {
  885         struct mount *mp;
  886         int error;
  887 
  888         error = 0;
  889         /*
  890          * If a vnode is provided, get and return the mount point that
  891          * to which it will write.
  892          */
  893         if (vp != NULL) {
  894                 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
  895                         *mpp = NULL;
  896                         if (error != EOPNOTSUPP)
  897                                 return (error);
  898                         return (0);
  899                 }
  900         }
  901         if ((mp = *mpp) == NULL)
  902                 return (0);
  903         MNT_ILOCK(mp);
  904         if (vp == NULL)
  905                 MNT_REF(mp);
  906         /*
  907          * Check on status of suspension.
  908          */
  909         while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
  910                 if (flags & V_NOWAIT) {
  911                         error = EWOULDBLOCK;
  912                         goto unlock;
  913                 }
  914                 error = msleep(&mp->mnt_flag, MNT_MTX(mp), 
  915                     (PUSER - 1) | (flags & PCATCH), "suspfs", 0);
  916                 if (error)
  917                         goto unlock;
  918         }
  919         if (flags & V_XSLEEP)
  920                 goto unlock;
  921         mp->mnt_writeopcount++;
  922 unlock:
  923         MNT_REL(mp);
  924         MNT_IUNLOCK(mp);
  925         return (error);
  926 }
  927 
  928 /*
  929  * Secondary suspension. Used by operations such as vop_inactive
  930  * routines that are needed by the higher level functions. These
  931  * are allowed to proceed until all the higher level functions have
  932  * completed (indicated by mnt_writeopcount dropping to zero). At that
  933  * time, these operations are halted until the suspension is over.
  934  */
  935 int
  936 vn_write_suspend_wait(vp, mp, flags)
  937         struct vnode *vp;
  938         struct mount *mp;
  939         int flags;
  940 {
  941         int error;
  942 
  943         if (vp != NULL) {
  944                 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) {
  945                         if (error != EOPNOTSUPP)
  946                                 return (error);
  947                         return (0);
  948                 }
  949         }
  950         /*
  951          * If we are not suspended or have not yet reached suspended
  952          * mode, then let the operation proceed.
  953          */
  954         if (mp == NULL)
  955                 return (0);
  956         MNT_ILOCK(mp);
  957         if (vp == NULL)
  958                 MNT_REF(mp);
  959         if ((mp->mnt_kern_flag & MNTK_SUSPENDED) == 0) {
  960                 MNT_REL(mp);
  961                 MNT_IUNLOCK(mp);
  962                 return (0);
  963         }
  964         if (flags & V_NOWAIT) {
  965                 MNT_REL(mp);
  966                 MNT_IUNLOCK(mp);
  967                 return (EWOULDBLOCK);
  968         }
  969         /*
  970          * Wait for the suspension to finish.
  971          */
  972         error = msleep(&mp->mnt_flag, MNT_MTX(mp),
  973             (PUSER - 1) | (flags & PCATCH) | PDROP, "suspfs", 0);
  974         vfs_rel(mp);
  975         return (error);
  976 }
  977 
  978 /*
  979  * Secondary suspension. Used by operations such as vop_inactive
  980  * routines that are needed by the higher level functions. These
  981  * are allowed to proceed until all the higher level functions have
  982  * completed (indicated by mnt_writeopcount dropping to zero). At that
  983  * time, these operations are halted until the suspension is over.
  984  */
  985 int
  986 vn_start_secondary_write(vp, mpp, flags)
  987         struct vnode *vp;
  988         struct mount **mpp;
  989         int flags;
  990 {
  991         struct mount *mp;
  992         int error;
  993 
  994  retry:
  995         if (vp != NULL) {
  996                 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
  997                         *mpp = NULL;
  998                         if (error != EOPNOTSUPP)
  999                                 return (error);
 1000                         return (0);
 1001                 }
 1002         }
 1003         /*
 1004          * If we are not suspended or have not yet reached suspended
 1005          * mode, then let the operation proceed.
 1006          */
 1007         if ((mp = *mpp) == NULL)
 1008                 return (0);
 1009         MNT_ILOCK(mp);
 1010         if (vp == NULL)
 1011                 MNT_REF(mp);
 1012         if ((mp->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND2)) == 0) {
 1013                 mp->mnt_secondary_writes++;
 1014                 mp->mnt_secondary_accwrites++;
 1015                 MNT_REL(mp);
 1016                 MNT_IUNLOCK(mp);
 1017                 return (0);
 1018         }
 1019         if (flags & V_NOWAIT) {
 1020                 MNT_REL(mp);
 1021                 MNT_IUNLOCK(mp);
 1022                 return (EWOULDBLOCK);
 1023         }
 1024         /*
 1025          * Wait for the suspension to finish.
 1026          */
 1027         error = msleep(&mp->mnt_flag, MNT_MTX(mp),
 1028                        (PUSER - 1) | (flags & PCATCH) | PDROP, "suspfs", 0);
 1029         vfs_rel(mp);
 1030         if (error == 0)
 1031                 goto retry;
 1032         return (error);
 1033 }
 1034 
 1035 /*
 1036  * Filesystem write operation has completed. If we are suspending and this
 1037  * operation is the last one, notify the suspender that the suspension is
 1038  * now in effect.
 1039  */
 1040 void
 1041 vn_finished_write(mp)
 1042         struct mount *mp;
 1043 {
 1044         if (mp == NULL)
 1045                 return;
 1046         MNT_ILOCK(mp);
 1047         mp->mnt_writeopcount--;
 1048         if (mp->mnt_writeopcount < 0)
 1049                 panic("vn_finished_write: neg cnt");
 1050         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 1051             mp->mnt_writeopcount <= 0)
 1052                 wakeup(&mp->mnt_writeopcount);
 1053         MNT_IUNLOCK(mp);
 1054 }
 1055 
 1056 
 1057 /*
 1058  * Filesystem secondary write operation has completed. If we are
 1059  * suspending and this operation is the last one, notify the suspender
 1060  * that the suspension is now in effect.
 1061  */
 1062 void
 1063 vn_finished_secondary_write(mp)
 1064         struct mount *mp;
 1065 {
 1066         if (mp == NULL)
 1067                 return;
 1068         MNT_ILOCK(mp);
 1069         mp->mnt_secondary_writes--;
 1070         if (mp->mnt_secondary_writes < 0)
 1071                 panic("vn_finished_secondary_write: neg cnt");
 1072         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 1073             mp->mnt_secondary_writes <= 0)
 1074                 wakeup(&mp->mnt_secondary_writes);
 1075         MNT_IUNLOCK(mp);
 1076 }
 1077 
 1078 
 1079 
 1080 /*
 1081  * Request a filesystem to suspend write operations.
 1082  */
 1083 int
 1084 vfs_write_suspend(mp)
 1085         struct mount *mp;
 1086 {
 1087         struct thread *td = curthread;
 1088         int error;
 1089 
 1090         MNT_ILOCK(mp);
 1091         if (mp->mnt_kern_flag & MNTK_SUSPEND) {
 1092                 MNT_IUNLOCK(mp);
 1093                 return (0);
 1094         }
 1095         mp->mnt_kern_flag |= MNTK_SUSPEND;
 1096         if (mp->mnt_writeopcount > 0)
 1097                 (void) msleep(&mp->mnt_writeopcount, 
 1098                     MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt", 0);
 1099         else
 1100                 MNT_IUNLOCK(mp);
 1101         if ((error = VFS_SYNC(mp, MNT_SUSPEND, td)) != 0)
 1102                 vfs_write_resume(mp);
 1103         return (error);
 1104 }
 1105 
 1106 /*
 1107  * Request a filesystem to resume write operations.
 1108  */
 1109 void
 1110 vfs_write_resume(mp)
 1111         struct mount *mp;
 1112 {
 1113 
 1114         MNT_ILOCK(mp);
 1115         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
 1116                 mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPEND2 |
 1117                                        MNTK_SUSPENDED);
 1118                 wakeup(&mp->mnt_writeopcount);
 1119                 wakeup(&mp->mnt_flag);
 1120         }
 1121         MNT_IUNLOCK(mp);
 1122 }
 1123 
 1124 /*
 1125  * Implement kqueues for files by translating it to vnode operation.
 1126  */
 1127 static int
 1128 vn_kqfilter(struct file *fp, struct knote *kn)
 1129 {
 1130         int vfslocked;
 1131         int error;
 1132 
 1133         vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
 1134         error = VOP_KQFILTER(fp->f_vnode, kn);
 1135         VFS_UNLOCK_GIANT(vfslocked);
 1136 
 1137         return error;
 1138 }
 1139 
 1140 /*
 1141  * Simplified in-kernel wrapper calls for extended attribute access.
 1142  * Both calls pass in a NULL credential, authorizing as "kernel" access.
 1143  * Set IO_NODELOCKED in ioflg if the vnode is already locked.
 1144  */
 1145 int
 1146 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
 1147     const char *attrname, int *buflen, char *buf, struct thread *td)
 1148 {
 1149         struct uio      auio;
 1150         struct iovec    iov;
 1151         int     error;
 1152 
 1153         iov.iov_len = *buflen;
 1154         iov.iov_base = buf;
 1155 
 1156         auio.uio_iov = &iov;
 1157         auio.uio_iovcnt = 1;
 1158         auio.uio_rw = UIO_READ;
 1159         auio.uio_segflg = UIO_SYSSPACE;
 1160         auio.uio_td = td;
 1161         auio.uio_offset = 0;
 1162         auio.uio_resid = *buflen;
 1163 
 1164         if ((ioflg & IO_NODELOCKED) == 0)
 1165                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1166 
 1167         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1168 
 1169         /* authorize attribute retrieval as kernel */
 1170         error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL,
 1171             td);
 1172 
 1173         if ((ioflg & IO_NODELOCKED) == 0)
 1174                 VOP_UNLOCK(vp, 0, td);
 1175 
 1176         if (error == 0) {
 1177                 *buflen = *buflen - auio.uio_resid;
 1178         }
 1179 
 1180         return (error);
 1181 }
 1182 
 1183 /*
 1184  * XXX failure mode if partially written?
 1185  */
 1186 int
 1187 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
 1188     const char *attrname, int buflen, char *buf, struct thread *td)
 1189 {
 1190         struct uio      auio;
 1191         struct iovec    iov;
 1192         struct mount    *mp;
 1193         int     error;
 1194 
 1195         iov.iov_len = buflen;
 1196         iov.iov_base = buf;
 1197 
 1198         auio.uio_iov = &iov;
 1199         auio.uio_iovcnt = 1;
 1200         auio.uio_rw = UIO_WRITE;
 1201         auio.uio_segflg = UIO_SYSSPACE;
 1202         auio.uio_td = td;
 1203         auio.uio_offset = 0;
 1204         auio.uio_resid = buflen;
 1205 
 1206         if ((ioflg & IO_NODELOCKED) == 0) {
 1207                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1208                         return (error);
 1209                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1210         }
 1211 
 1212         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1213 
 1214         /* authorize attribute setting as kernel */
 1215         error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td);
 1216 
 1217         if ((ioflg & IO_NODELOCKED) == 0) {
 1218                 vn_finished_write(mp);
 1219                 VOP_UNLOCK(vp, 0, td);
 1220         }
 1221 
 1222         return (error);
 1223 }
 1224 
 1225 int
 1226 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
 1227     const char *attrname, struct thread *td)
 1228 {
 1229         struct mount    *mp;
 1230         int     error;
 1231 
 1232         if ((ioflg & IO_NODELOCKED) == 0) {
 1233                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1234                         return (error);
 1235                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1236         }
 1237 
 1238         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1239 
 1240         /* authorize attribute removal as kernel */
 1241         error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL, td);
 1242         if (error == EOPNOTSUPP)
 1243                 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
 1244                     NULL, td);
 1245 
 1246         if ((ioflg & IO_NODELOCKED) == 0) {
 1247                 vn_finished_write(mp);
 1248                 VOP_UNLOCK(vp, 0, td);
 1249         }
 1250 
 1251         return (error);
 1252 }
Cache object: 12a62027555a213e0996098cddaf4df9
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_vnops.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_vnops.c