vfs_vnops.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/6.1/sys/kern/vfs_vnops.c 158179 2006-04-30 16:44:43Z cvs2svn $");
   39 
   40 #include "opt_mac.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/fcntl.h>
   45 #include <sys/file.h>
   46 #include <sys/kdb.h>
   47 #include <sys/stat.h>
   48 #include <sys/proc.h>
   49 #include <sys/limits.h>
   50 #include <sys/lock.h>
   51 #include <sys/mac.h>
   52 #include <sys/mount.h>
   53 #include <sys/mutex.h>
   54 #include <sys/namei.h>
   55 #include <sys/vnode.h>
   56 #include <sys/bio.h>
   57 #include <sys/buf.h>
   58 #include <sys/filio.h>
   59 #include <sys/sx.h>
   60 #include <sys/ttycom.h>
   61 #include <sys/conf.h>
   62 #include <sys/syslog.h>
   63 #include <sys/unistd.h>
   64 
   65 static fo_rdwr_t        vn_read;
   66 static fo_rdwr_t        vn_write;
   67 static fo_ioctl_t       vn_ioctl;
   68 static fo_poll_t        vn_poll;
   69 static fo_kqfilter_t    vn_kqfilter;
   70 static fo_stat_t        vn_statfile;
   71 static fo_close_t       vn_closefile;
   72 
   73 struct  fileops vnops = {
   74         .fo_read = vn_read,
   75         .fo_write = vn_write,
   76         .fo_ioctl = vn_ioctl,
   77         .fo_poll = vn_poll,
   78         .fo_kqfilter = vn_kqfilter,
   79         .fo_stat = vn_statfile,
   80         .fo_close = vn_closefile,
   81         .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
   82 };
   83 
   84 int
   85 vn_open(ndp, flagp, cmode, fdidx)
   86         struct nameidata *ndp;
   87         int *flagp, cmode, fdidx;
   88 {
   89         struct thread *td = ndp->ni_cnd.cn_thread;
   90 
   91         return (vn_open_cred(ndp, flagp, cmode, td->td_ucred, fdidx));
   92 }
   93 
   94 /*
   95  * Common code for vnode open operations.
   96  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
   97  * 
   98  * Note that this does NOT free nameidata for the successful case,
   99  * due to the NDINIT being done elsewhere.
  100  */
  101 int
  102 vn_open_cred(ndp, flagp, cmode, cred, fdidx)
  103         struct nameidata *ndp;
  104         int *flagp, cmode;
  105         struct ucred *cred;
  106         int fdidx;
  107 {
  108         struct vnode *vp;
  109         struct mount *mp;
  110         struct thread *td = ndp->ni_cnd.cn_thread;
  111         struct vattr vat;
  112         struct vattr *vap = &vat;
  113         int mode, fmode, error;
  114         int vfslocked;
  115 
  116 restart:
  117         vfslocked = 0;
  118         fmode = *flagp;
  119         if (fmode & O_CREAT) {
  120                 ndp->ni_cnd.cn_nameiop = CREATE;
  121                 ndp->ni_cnd.cn_flags = ISOPEN | LOCKPARENT | LOCKLEAF | MPSAFE;
  122                 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
  123                         ndp->ni_cnd.cn_flags |= FOLLOW;
  124                 bwillwrite();
  125                 if ((error = namei(ndp)) != 0)
  126                         return (error);
  127                 vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
  128                 ndp->ni_cnd.cn_flags &= ~MPSAFE;
  129                 if (ndp->ni_vp == NULL) {
  130                         VATTR_NULL(vap);
  131                         vap->va_type = VREG;
  132                         vap->va_mode = cmode;
  133                         if (fmode & O_EXCL)
  134                                 vap->va_vaflags |= VA_EXCLUSIVE;
  135                         if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
  136                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  137                                 vput(ndp->ni_dvp);
  138                                 VFS_UNLOCK_GIANT(vfslocked);
  139                                 if ((error = vn_start_write(NULL, &mp,
  140                                     V_XSLEEP | PCATCH)) != 0)
  141                                         return (error);
  142                                 goto restart;
  143                         }
  144 #ifdef MAC
  145                         error = mac_check_vnode_create(cred, ndp->ni_dvp,
  146                             &ndp->ni_cnd, vap);
  147                         if (error == 0) {
  148 #endif
  149                                 VOP_LEASE(ndp->ni_dvp, td, cred, LEASE_WRITE);
  150                                 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
  151                                                    &ndp->ni_cnd, vap);
  152 #ifdef MAC
  153                         }
  154 #endif
  155                         vput(ndp->ni_dvp);
  156                         vn_finished_write(mp);
  157                         if (error) {
  158                                 VFS_UNLOCK_GIANT(vfslocked);
  159                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  160                                 return (error);
  161                         }
  162                         fmode &= ~O_TRUNC;
  163                         vp = ndp->ni_vp;
  164                 } else {
  165                         if (ndp->ni_dvp == ndp->ni_vp)
  166                                 vrele(ndp->ni_dvp);
  167                         else
  168                                 vput(ndp->ni_dvp);
  169                         ndp->ni_dvp = NULL;
  170                         vp = ndp->ni_vp;
  171                         if (fmode & O_EXCL) {
  172                                 error = EEXIST;
  173                                 goto bad;
  174                         }
  175                         fmode &= ~O_CREAT;
  176                 }
  177         } else {
  178                 ndp->ni_cnd.cn_nameiop = LOOKUP;
  179                 ndp->ni_cnd.cn_flags = ISOPEN |
  180                     ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) |
  181                     LOCKSHARED | LOCKLEAF | MPSAFE;
  182                 if ((error = namei(ndp)) != 0)
  183                         return (error);
  184                 ndp->ni_cnd.cn_flags &= ~MPSAFE;
  185                 vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
  186                 vp = ndp->ni_vp;
  187         }
  188         if (vp->v_type == VLNK) {
  189                 error = EMLINK;
  190                 goto bad;
  191         }
  192         if (vp->v_type == VSOCK) {
  193                 error = EOPNOTSUPP;
  194                 goto bad;
  195         }
  196         mode = 0;
  197         if (fmode & (FWRITE | O_TRUNC)) {
  198                 if (vp->v_type == VDIR) {
  199                         error = EISDIR;
  200                         goto bad;
  201                 }
  202                 mode |= VWRITE;
  203         }
  204         if (fmode & FREAD)
  205                 mode |= VREAD;
  206         if (fmode & O_APPEND)
  207                 mode |= VAPPEND;
  208 #ifdef MAC
  209         error = mac_check_vnode_open(cred, vp, mode);
  210         if (error)
  211                 goto bad;
  212 #endif
  213         if ((fmode & O_CREAT) == 0) {
  214                 if (mode & VWRITE) {
  215                         error = vn_writechk(vp);
  216                         if (error)
  217                                 goto bad;
  218                 }
  219                 if (mode) {
  220                         error = VOP_ACCESS(vp, mode, cred, td);
  221                         if (error)
  222                                 goto bad;
  223                 }
  224         }
  225         if ((error = VOP_OPEN(vp, fmode, cred, td, fdidx)) != 0)
  226                 goto bad;
  227 
  228         if (fmode & FWRITE)
  229                 vp->v_writecount++;
  230         *flagp = fmode;
  231         ASSERT_VOP_LOCKED(vp, "vn_open_cred");
  232         if (fdidx == -1)
  233                 VFS_UNLOCK_GIANT(vfslocked);
  234         return (0);
  235 bad:
  236         NDFREE(ndp, NDF_ONLY_PNBUF);
  237         vput(vp);
  238         VFS_UNLOCK_GIANT(vfslocked);
  239         *flagp = fmode;
  240         ndp->ni_vp = NULL;
  241         return (error);
  242 }
  243 
  244 /*
  245  * Check for write permissions on the specified vnode.
  246  * Prototype text segments cannot be written.
  247  */
  248 int
  249 vn_writechk(vp)
  250         register struct vnode *vp;
  251 {
  252 
  253         ASSERT_VOP_LOCKED(vp, "vn_writechk");
  254         /*
  255          * If there's shared text associated with
  256          * the vnode, try to free it up once.  If
  257          * we fail, we can't allow writing.
  258          */
  259         if (vp->v_vflag & VV_TEXT)
  260                 return (ETXTBSY);
  261 
  262         return (0);
  263 }
  264 
  265 /*
  266  * Vnode close call
  267  */
  268 int
  269 vn_close(vp, flags, file_cred, td)
  270         register struct vnode *vp;
  271         int flags;
  272         struct ucred *file_cred;
  273         struct thread *td;
  274 {
  275         struct mount *mp;
  276         int error;
  277 
  278         VFS_ASSERT_GIANT(vp->v_mount);
  279 
  280         vn_start_write(vp, &mp, V_WAIT);
  281         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  282         if (flags & FWRITE)
  283                 vp->v_writecount--;
  284         error = VOP_CLOSE(vp, flags, file_cred, td);
  285         vput(vp);
  286         vn_finished_write(mp);
  287         return (error);
  288 }
  289 
  290 /*
  291  * Sequential heuristic - detect sequential operation
  292  */
  293 static __inline
  294 int
  295 sequential_heuristic(struct uio *uio, struct file *fp)
  296 {
  297 
  298         if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
  299             uio->uio_offset == fp->f_nextoff) {
  300                 /*
  301                  * XXX we assume that the filesystem block size is
  302                  * the default.  Not true, but still gives us a pretty
  303                  * good indicator of how sequential the read operations
  304                  * are.
  305                  */
  306                 fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
  307                 if (fp->f_seqcount > IO_SEQMAX)
  308                         fp->f_seqcount = IO_SEQMAX;
  309                 return(fp->f_seqcount << IO_SEQSHIFT);
  310         }
  311 
  312         /*
  313          * Not sequential, quick draw-down of seqcount
  314          */
  315         if (fp->f_seqcount > 1)
  316                 fp->f_seqcount = 1;
  317         else
  318                 fp->f_seqcount = 0;
  319         return(0);
  320 }
  321 
  322 /*
  323  * Package up an I/O request on a vnode into a uio and do it.
  324  */
  325 int
  326 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
  327     aresid, td)
  328         enum uio_rw rw;
  329         struct vnode *vp;
  330         caddr_t base;
  331         int len;
  332         off_t offset;
  333         enum uio_seg segflg;
  334         int ioflg;
  335         struct ucred *active_cred;
  336         struct ucred *file_cred;
  337         int *aresid;
  338         struct thread *td;
  339 {
  340         struct uio auio;
  341         struct iovec aiov;
  342         struct mount *mp;
  343         struct ucred *cred;
  344         int error;
  345 
  346         VFS_ASSERT_GIANT(vp->v_mount);
  347 
  348         if ((ioflg & IO_NODELOCKED) == 0) {
  349                 mp = NULL;
  350                 if (rw == UIO_WRITE) { 
  351                         if (vp->v_type != VCHR &&
  352                             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
  353                             != 0)
  354                                 return (error);
  355                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  356                 } else {
  357                         /*
  358                          * XXX This should be LK_SHARED but I don't trust VFS
  359                          * enough to leave it like that until it has been
  360                          * reviewed further.
  361                          */
  362                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  363                 }
  364 
  365         }
  366         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
  367         auio.uio_iov = &aiov;
  368         auio.uio_iovcnt = 1;
  369         aiov.iov_base = base;
  370         aiov.iov_len = len;
  371         auio.uio_resid = len;
  372         auio.uio_offset = offset;
  373         auio.uio_segflg = segflg;
  374         auio.uio_rw = rw;
  375         auio.uio_td = td;
  376         error = 0;
  377 #ifdef MAC
  378         if ((ioflg & IO_NOMACCHECK) == 0) {
  379                 if (rw == UIO_READ)
  380                         error = mac_check_vnode_read(active_cred, file_cred,
  381                             vp);
  382                 else
  383                         error = mac_check_vnode_write(active_cred, file_cred,
  384                             vp);
  385         }
  386 #endif
  387         if (error == 0) {
  388                 if (file_cred)
  389                         cred = file_cred;
  390                 else
  391                         cred = active_cred;
  392                 if (rw == UIO_READ)
  393                         error = VOP_READ(vp, &auio, ioflg, cred);
  394                 else
  395                         error = VOP_WRITE(vp, &auio, ioflg, cred);
  396         }
  397         if (aresid)
  398                 *aresid = auio.uio_resid;
  399         else
  400                 if (auio.uio_resid && error == 0)
  401                         error = EIO;
  402         if ((ioflg & IO_NODELOCKED) == 0) {
  403                 if (rw == UIO_WRITE)
  404                         vn_finished_write(mp);
  405                 VOP_UNLOCK(vp, 0, td);
  406         }
  407         return (error);
  408 }
  409 
  410 /*
  411  * Package up an I/O request on a vnode into a uio and do it.  The I/O
  412  * request is split up into smaller chunks and we try to avoid saturating
  413  * the buffer cache while potentially holding a vnode locked, so we 
  414  * check bwillwrite() before calling vn_rdwr().  We also call uio_yield()
  415  * to give other processes a chance to lock the vnode (either other processes
  416  * core'ing the same binary, or unrelated processes scanning the directory).
  417  */
  418 int
  419 vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred,
  420     file_cred, aresid, td)
  421         enum uio_rw rw;
  422         struct vnode *vp;
  423         caddr_t base;
  424         size_t len;
  425         off_t offset;
  426         enum uio_seg segflg;
  427         int ioflg;
  428         struct ucred *active_cred;
  429         struct ucred *file_cred;
  430         size_t *aresid;
  431         struct thread *td;
  432 {
  433         int error = 0;
  434         int iaresid;
  435 
  436         VFS_ASSERT_GIANT(vp->v_mount);
  437 
  438         do {
  439                 int chunk;
  440 
  441                 /*
  442                  * Force `offset' to a multiple of MAXBSIZE except possibly
  443                  * for the first chunk, so that filesystems only need to
  444                  * write full blocks except possibly for the first and last
  445                  * chunks.
  446                  */
  447                 chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE;
  448 
  449                 if (chunk > len)
  450                         chunk = len;
  451                 if (rw != UIO_READ && vp->v_type == VREG)
  452                         bwillwrite();
  453                 iaresid = 0;
  454                 error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
  455                     ioflg, active_cred, file_cred, &iaresid, td);
  456                 len -= chunk;   /* aresid calc already includes length */
  457                 if (error)
  458                         break;
  459                 offset += chunk;
  460                 base += chunk;
  461                 uio_yield();
  462         } while (len);
  463         if (aresid)
  464                 *aresid = len + iaresid;
  465         return (error);
  466 }
  467 
  468 /*
  469  * File table vnode read routine.
  470  */
  471 static int
  472 vn_read(fp, uio, active_cred, flags, td)
  473         struct file *fp;
  474         struct uio *uio;
  475         struct ucred *active_cred;
  476         struct thread *td;
  477         int flags;
  478 {
  479         struct vnode *vp;
  480         int error, ioflag;
  481         int vfslocked;
  482 
  483         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  484             uio->uio_td, td));
  485         vp = fp->f_vnode;
  486         ioflag = 0;
  487         if (fp->f_flag & FNONBLOCK)
  488                 ioflag |= IO_NDELAY;
  489         if (fp->f_flag & O_DIRECT)
  490                 ioflag |= IO_DIRECT;
  491         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  492         VOP_LEASE(vp, td, fp->f_cred, LEASE_READ);
  493         /*
  494          * According to McKusick the vn lock is protecting f_offset here.
  495          * Once this field has it's own lock we can acquire this shared.
  496          */
  497         if ((flags & FOF_OFFSET) == 0) {
  498                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  499                 uio->uio_offset = fp->f_offset;
  500         } else
  501                 vn_lock(vp, LK_SHARED | LK_RETRY, td);
  502 
  503         ioflag |= sequential_heuristic(uio, fp);
  504 
  505 #ifdef MAC
  506         error = mac_check_vnode_read(active_cred, fp->f_cred, vp);
  507         if (error == 0)
  508 #endif
  509                 error = VOP_READ(vp, uio, ioflag, fp->f_cred);
  510         if ((flags & FOF_OFFSET) == 0)
  511                 fp->f_offset = uio->uio_offset;
  512         fp->f_nextoff = uio->uio_offset;
  513         VOP_UNLOCK(vp, 0, td);
  514         VFS_UNLOCK_GIANT(vfslocked);
  515         return (error);
  516 }
  517 
  518 /*
  519  * File table vnode write routine.
  520  */
  521 static int
  522 vn_write(fp, uio, active_cred, flags, td)
  523         struct file *fp;
  524         struct uio *uio;
  525         struct ucred *active_cred;
  526         struct thread *td;
  527         int flags;
  528 {
  529         struct vnode *vp;
  530         struct mount *mp;
  531         int error, ioflag;
  532         int vfslocked;
  533 
  534         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  535             uio->uio_td, td));
  536         vp = fp->f_vnode;
  537         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  538         if (vp->v_type == VREG)
  539                 bwillwrite();
  540         ioflag = IO_UNIT;
  541         if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
  542                 ioflag |= IO_APPEND;
  543         if (fp->f_flag & FNONBLOCK)
  544                 ioflag |= IO_NDELAY;
  545         if (fp->f_flag & O_DIRECT)
  546                 ioflag |= IO_DIRECT;
  547         if ((fp->f_flag & O_FSYNC) ||
  548             (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
  549                 ioflag |= IO_SYNC;
  550         mp = NULL;
  551         if (vp->v_type != VCHR &&
  552             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
  553                 goto unlock;
  554         VOP_LEASE(vp, td, fp->f_cred, LEASE_WRITE);
  555         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  556         if ((flags & FOF_OFFSET) == 0)
  557                 uio->uio_offset = fp->f_offset;
  558         ioflag |= sequential_heuristic(uio, fp);
  559 #ifdef MAC
  560         error = mac_check_vnode_write(active_cred, fp->f_cred, vp);
  561         if (error == 0)
  562 #endif
  563                 error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
  564         if ((flags & FOF_OFFSET) == 0)
  565                 fp->f_offset = uio->uio_offset;
  566         fp->f_nextoff = uio->uio_offset;
  567         VOP_UNLOCK(vp, 0, td);
  568         vn_finished_write(mp);
  569 unlock:
  570         VFS_UNLOCK_GIANT(vfslocked);
  571         return (error);
  572 }
  573 
  574 /*
  575  * File table vnode stat routine.
  576  */
  577 static int
  578 vn_statfile(fp, sb, active_cred, td)
  579         struct file *fp;
  580         struct stat *sb;
  581         struct ucred *active_cred;
  582         struct thread *td;
  583 {
  584         struct vnode *vp = fp->f_vnode;
  585         int vfslocked;
  586         int error;
  587 
  588         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  589         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  590         error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
  591         VOP_UNLOCK(vp, 0, td);
  592         VFS_UNLOCK_GIANT(vfslocked);
  593 
  594         return (error);
  595 }
  596 
  597 /*
  598  * Stat a vnode; implementation for the stat syscall
  599  */
  600 int
  601 vn_stat(vp, sb, active_cred, file_cred, td)
  602         struct vnode *vp;
  603         register struct stat *sb;
  604         struct ucred *active_cred;
  605         struct ucred *file_cred;
  606         struct thread *td;
  607 {
  608         struct vattr vattr;
  609         register struct vattr *vap;
  610         int error;
  611         u_short mode;
  612 
  613 #ifdef MAC
  614         error = mac_check_vnode_stat(active_cred, file_cred, vp);
  615         if (error)
  616                 return (error);
  617 #endif
  618 
  619         vap = &vattr;
  620         error = VOP_GETATTR(vp, vap, active_cred, td);
  621         if (error)
  622                 return (error);
  623 
  624         /*
  625          * Zero the spare stat fields
  626          */
  627         bzero(sb, sizeof *sb);
  628 
  629         /*
  630          * Copy from vattr table
  631          */
  632         if (vap->va_fsid != VNOVAL)
  633                 sb->st_dev = vap->va_fsid;
  634         else
  635                 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
  636         sb->st_ino = vap->va_fileid;
  637         mode = vap->va_mode;
  638         switch (vap->va_type) {
  639         case VREG:
  640                 mode |= S_IFREG;
  641                 break;
  642         case VDIR:
  643                 mode |= S_IFDIR;
  644                 break;
  645         case VBLK:
  646                 mode |= S_IFBLK;
  647                 break;
  648         case VCHR:
  649                 mode |= S_IFCHR;
  650                 break;
  651         case VLNK:
  652                 mode |= S_IFLNK;
  653                 /* This is a cosmetic change, symlinks do not have a mode. */
  654                 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW)
  655                         sb->st_mode &= ~ACCESSPERMS;    /* 0000 */
  656                 else
  657                         sb->st_mode |= ACCESSPERMS;     /* 0777 */
  658                 break;
  659         case VSOCK:
  660                 mode |= S_IFSOCK;
  661                 break;
  662         case VFIFO:
  663                 mode |= S_IFIFO;
  664                 break;
  665         default:
  666                 return (EBADF);
  667         };
  668         sb->st_mode = mode;
  669         sb->st_nlink = vap->va_nlink;
  670         sb->st_uid = vap->va_uid;
  671         sb->st_gid = vap->va_gid;
  672         sb->st_rdev = vap->va_rdev;
  673         if (vap->va_size > OFF_MAX)
  674                 return (EOVERFLOW);
  675         sb->st_size = vap->va_size;
  676         sb->st_atimespec = vap->va_atime;
  677         sb->st_mtimespec = vap->va_mtime;
  678         sb->st_ctimespec = vap->va_ctime;
  679         sb->st_birthtimespec = vap->va_birthtime;
  680 
  681         /*
  682          * According to www.opengroup.org, the meaning of st_blksize is 
  683          *   "a filesystem-specific preferred I/O block size for this 
  684          *    object.  In some filesystem types, this may vary from file
  685          *    to file"
  686          * Default to PAGE_SIZE after much discussion.
  687          * XXX: min(PAGE_SIZE, vp->v_bufobj.bo_bsize) may be more correct.
  688          */
  689 
  690         sb->st_blksize = PAGE_SIZE;
  691         
  692         sb->st_flags = vap->va_flags;
  693         if (suser(td))
  694                 sb->st_gen = 0;
  695         else
  696                 sb->st_gen = vap->va_gen;
  697 
  698 #if (S_BLKSIZE == 512)
  699         /* Optimize this case */
  700         sb->st_blocks = vap->va_bytes >> 9;
  701 #else
  702         sb->st_blocks = vap->va_bytes / S_BLKSIZE;
  703 #endif
  704         return (0);
  705 }
  706 
  707 /*
  708  * File table vnode ioctl routine.
  709  */
  710 static int
  711 vn_ioctl(fp, com, data, active_cred, td)
  712         struct file *fp;
  713         u_long com;
  714         void *data;
  715         struct ucred *active_cred;
  716         struct thread *td;
  717 {
  718         struct vnode *vp = fp->f_vnode;
  719         struct vattr vattr;
  720         int vfslocked;
  721         int error;
  722 
  723         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  724         error = ENOTTY;
  725         switch (vp->v_type) {
  726         case VREG:
  727         case VDIR:
  728                 if (com == FIONREAD) {
  729                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  730                         error = VOP_GETATTR(vp, &vattr, active_cred, td);
  731                         VOP_UNLOCK(vp, 0, td);
  732                         if (!error)
  733                                 *(int *)data = vattr.va_size - fp->f_offset;
  734                 }
  735                 if (com == FIONBIO || com == FIOASYNC)  /* XXX */
  736                         error = 0;
  737                 else
  738                         error = VOP_IOCTL(vp, com, data, fp->f_flag,
  739                             active_cred, td);
  740                 break;
  741 
  742         default:
  743                 break;
  744         }
  745         VFS_UNLOCK_GIANT(vfslocked);
  746         return (error);
  747 }
  748 
  749 /*
  750  * File table vnode poll routine.
  751  */
  752 static int
  753 vn_poll(fp, events, active_cred, td)
  754         struct file *fp;
  755         int events;
  756         struct ucred *active_cred;
  757         struct thread *td;
  758 {
  759         struct vnode *vp;
  760         int error;
  761 
  762         mtx_lock(&Giant);
  763 
  764         vp = fp->f_vnode;
  765 #ifdef MAC
  766         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  767         error = mac_check_vnode_poll(active_cred, fp->f_cred, vp);
  768         VOP_UNLOCK(vp, 0, td);
  769         if (!error)
  770 #endif
  771 
  772         error = VOP_POLL(vp, events, fp->f_cred, td);
  773         mtx_unlock(&Giant);
  774         return (error);
  775 }
  776 
  777 /*
  778  * Check that the vnode is still valid, and if so
  779  * acquire requested lock.
  780  */
  781 int
  782 vn_lock(vp, flags, td)
  783         struct vnode *vp;
  784         int flags;
  785         struct thread *td;
  786 {
  787         int error;
  788 
  789         do {
  790                 if ((flags & LK_INTERLOCK) == 0)
  791                         VI_LOCK(vp);
  792                 if ((flags & LK_NOWAIT || (flags & LK_TYPE_MASK) == 0) &&
  793                     vp->v_iflag & VI_DOOMED) {
  794                         VI_UNLOCK(vp);
  795                         return (ENOENT);
  796                 }
  797                 /*
  798                  * Just polling to check validity.
  799                  */
  800                 if ((flags & LK_TYPE_MASK) == 0) {
  801                         VI_UNLOCK(vp);
  802                         return (0);
  803                 }
  804                 /*
  805                  * lockmgr drops interlock before it will return for
  806                  * any reason.  So force the code above to relock it.
  807                  */
  808                 error = VOP_LOCK(vp, flags | LK_INTERLOCK, td);
  809                 flags &= ~LK_INTERLOCK;
  810                 KASSERT((flags & LK_RETRY) == 0 || error == 0,
  811                     ("LK_RETRY set with incompatible flags %d\n", flags));
  812                 /*
  813                  * Callers specify LK_RETRY if they wish to get dead vnodes.
  814                  * If RETRY is not set, we return ENOENT instead.
  815                  */
  816                 if (error == 0 && vp->v_iflag & VI_DOOMED &&
  817                     (flags & LK_RETRY) == 0) {
  818                         VOP_UNLOCK(vp, 0, td);
  819                         error = ENOENT;
  820                         break;
  821                 }
  822         } while (flags & LK_RETRY && error != 0);
  823         return (error);
  824 }
  825 
  826 /*
  827  * File table vnode close routine.
  828  */
  829 static int
  830 vn_closefile(fp, td)
  831         struct file *fp;
  832         struct thread *td;
  833 {
  834         struct vnode *vp;
  835         struct flock lf;
  836         int vfslocked;
  837         int error;
  838 
  839         vp = fp->f_vnode;
  840 
  841         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  842         if (fp->f_type == DTYPE_VNODE && fp->f_flag & FHASLOCK) {
  843                 lf.l_whence = SEEK_SET;
  844                 lf.l_start = 0;
  845                 lf.l_len = 0;
  846                 lf.l_type = F_UNLCK;
  847                 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
  848         }
  849 
  850         fp->f_ops = &badfileops;
  851 
  852         error = vn_close(vp, fp->f_flag, fp->f_cred, td);
  853         VFS_UNLOCK_GIANT(vfslocked);
  854         return (error);
  855 }
  856 
  857 /*
  858  * Preparing to start a filesystem write operation. If the operation is
  859  * permitted, then we bump the count of operations in progress and
  860  * proceed. If a suspend request is in progress, we wait until the
  861  * suspension is over, and then proceed.
  862  */
  863 int
  864 vn_start_write(vp, mpp, flags)
  865         struct vnode *vp;
  866         struct mount **mpp;
  867         int flags;
  868 {
  869         struct mount *mp;
  870         int error;
  871 
  872         error = 0;
  873         /*
  874          * If a vnode is provided, get and return the mount point that
  875          * to which it will write.
  876          */
  877         if (vp != NULL) {
  878                 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
  879                         *mpp = NULL;
  880                         if (error != EOPNOTSUPP)
  881                                 return (error);
  882                         return (0);
  883                 }
  884         }
  885         if ((mp = *mpp) == NULL)
  886                 return (0);
  887         MNT_ILOCK(mp);
  888         /*
  889          * Check on status of suspension.
  890          */
  891         while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
  892                 if (flags & V_NOWAIT) {
  893                         error = EWOULDBLOCK;
  894                         goto unlock;
  895                 }
  896                 error = msleep(&mp->mnt_flag, MNT_MTX(mp), 
  897                     (PUSER - 1) | (flags & PCATCH), "suspfs", 0);
  898                 if (error)
  899                         goto unlock;
  900         }
  901         if (flags & V_XSLEEP)
  902                 goto unlock;
  903         mp->mnt_writeopcount++;
  904 unlock:
  905         MNT_IUNLOCK(mp);
  906         return (error);
  907 }
  908 
  909 /*
  910  * Secondary suspension. Used by operations such as vop_inactive
  911  * routines that are needed by the higher level functions. These
  912  * are allowed to proceed until all the higher level functions have
  913  * completed (indicated by mnt_writeopcount dropping to zero). At that
  914  * time, these operations are halted until the suspension is over.
  915  */
  916 int
  917 vn_write_suspend_wait(vp, mp, flags)
  918         struct vnode *vp;
  919         struct mount *mp;
  920         int flags;
  921 {
  922         int error;
  923 
  924         if (vp != NULL) {
  925                 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) {
  926                         if (error != EOPNOTSUPP)
  927                                 return (error);
  928                         return (0);
  929                 }
  930         }
  931         /*
  932          * If we are not suspended or have not yet reached suspended
  933          * mode, then let the operation proceed.
  934          */
  935         if (mp == NULL)
  936                 return (0);
  937         MNT_ILOCK(mp);
  938         if ((mp->mnt_kern_flag & MNTK_SUSPENDED) == 0) {
  939                 MNT_IUNLOCK(mp);
  940                 return (0);
  941         }
  942         if (flags & V_NOWAIT) {
  943                 MNT_IUNLOCK(mp);
  944                 return (EWOULDBLOCK);
  945         }
  946         /*
  947          * Wait for the suspension to finish.
  948          */
  949         return (msleep(&mp->mnt_flag, MNT_MTX(mp),
  950             (PUSER - 1) | (flags & PCATCH) | PDROP, "suspfs", 0));
  951 }
  952 
  953 /*
  954  * Secondary suspension. Used by operations such as vop_inactive
  955  * routines that are needed by the higher level functions. These
  956  * are allowed to proceed until all the higher level functions have
  957  * completed (indicated by mnt_writeopcount dropping to zero). At that
  958  * time, these operations are halted until the suspension is over.
  959  */
  960 int
  961 vn_start_secondary_write(vp, mpp, flags)
  962         struct vnode *vp;
  963         struct mount **mpp;
  964         int flags;
  965 {
  966         struct mount *mp;
  967         int error;
  968 
  969  retry:
  970         if (vp != NULL) {
  971                 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
  972                         *mpp = NULL;
  973                         if (error != EOPNOTSUPP)
  974                                 return (error);
  975                         return (0);
  976                 }
  977         }
  978         /*
  979          * If we are not suspended or have not yet reached suspended
  980          * mode, then let the operation proceed.
  981          */
  982         if ((mp = *mpp) == NULL)
  983                 return (0);
  984         MNT_ILOCK(mp);
  985         if ((mp->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND2)) == 0) {
  986                 mp->mnt_secondary_writes++;
  987                 mp->mnt_secondary_accwrites++;
  988                 MNT_IUNLOCK(mp);
  989                 return (0);
  990         }
  991         if (flags & V_NOWAIT) {
  992                 MNT_IUNLOCK(mp);
  993                 return (EWOULDBLOCK);
  994         }
  995         /*
  996          * Wait for the suspension to finish.
  997          */
  998         error = msleep(&mp->mnt_flag, MNT_MTX(mp),
  999                        (PUSER - 1) | (flags & PCATCH) | PDROP, "suspfs", 0);
 1000         if (error == 0)
 1001                 goto retry;
 1002         return (error);
 1003 }
 1004 
 1005 /*
 1006  * Filesystem write operation has completed. If we are suspending and this
 1007  * operation is the last one, notify the suspender that the suspension is
 1008  * now in effect.
 1009  */
 1010 void
 1011 vn_finished_write(mp)
 1012         struct mount *mp;
 1013 {
 1014         if (mp == NULL)
 1015                 return;
 1016         MNT_ILOCK(mp);
 1017         mp->mnt_writeopcount--;
 1018         if (mp->mnt_writeopcount < 0)
 1019                 panic("vn_finished_write: neg cnt");
 1020         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 1021             mp->mnt_writeopcount <= 0)
 1022                 wakeup(&mp->mnt_writeopcount);
 1023         MNT_IUNLOCK(mp);
 1024 }
 1025 
 1026 
 1027 /*
 1028  * Filesystem secondary write operation has completed. If we are
 1029  * suspending and this operation is the last one, notify the suspender
 1030  * that the suspension is now in effect.
 1031  */
 1032 void
 1033 vn_finished_secondary_write(mp)
 1034         struct mount *mp;
 1035 {
 1036         if (mp == NULL)
 1037                 return;
 1038         MNT_ILOCK(mp);
 1039         mp->mnt_secondary_writes--;
 1040         if (mp->mnt_secondary_writes < 0)
 1041                 panic("vn_finished_secondary_write: neg cnt");
 1042         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 1043             mp->mnt_secondary_writes <= 0)
 1044                 wakeup(&mp->mnt_secondary_writes);
 1045         MNT_IUNLOCK(mp);
 1046 }
 1047 
 1048 
 1049 
 1050 /*
 1051  * Request a filesystem to suspend write operations.
 1052  */
 1053 int
 1054 vfs_write_suspend(mp)
 1055         struct mount *mp;
 1056 {
 1057         struct thread *td = curthread;
 1058         int error;
 1059 
 1060         error = 0;
 1061         MNT_ILOCK(mp);
 1062         if (mp->mnt_kern_flag & MNTK_SUSPEND)
 1063                 goto unlock;
 1064         mp->mnt_kern_flag |= MNTK_SUSPEND;
 1065         if (mp->mnt_writeopcount > 0)
 1066                 (void) msleep(&mp->mnt_writeopcount, 
 1067                     MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt", 0);
 1068         else
 1069                 MNT_IUNLOCK(mp);
 1070         if ((error = VFS_SYNC(mp, MNT_SUSPEND, td)) != 0) {
 1071                 vfs_write_resume(mp);
 1072                 return (error);
 1073         }
 1074         MNT_ILOCK(mp);
 1075 unlock:
 1076         MNT_IUNLOCK(mp);
 1077         return (error);
 1078 }
 1079 
 1080 /*
 1081  * Request a filesystem to resume write operations.
 1082  */
 1083 void
 1084 vfs_write_resume(mp)
 1085         struct mount *mp;
 1086 {
 1087 
 1088         MNT_ILOCK(mp);
 1089         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
 1090                 mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPEND2 |
 1091                                        MNTK_SUSPENDED);
 1092                 wakeup(&mp->mnt_writeopcount);
 1093                 wakeup(&mp->mnt_flag);
 1094         }
 1095         MNT_IUNLOCK(mp);
 1096 }
 1097 
 1098 /*
 1099  * Implement kqueues for files by translating it to vnode operation.
 1100  */
 1101 static int
 1102 vn_kqfilter(struct file *fp, struct knote *kn)
 1103 {
 1104         int error;
 1105 
 1106         mtx_lock(&Giant);
 1107         error = VOP_KQFILTER(fp->f_vnode, kn);
 1108         mtx_unlock(&Giant);
 1109 
 1110         return error;
 1111 }
 1112 
 1113 /*
 1114  * Simplified in-kernel wrapper calls for extended attribute access.
 1115  * Both calls pass in a NULL credential, authorizing as "kernel" access.
 1116  * Set IO_NODELOCKED in ioflg if the vnode is already locked.
 1117  */
 1118 int
 1119 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
 1120     const char *attrname, int *buflen, char *buf, struct thread *td)
 1121 {
 1122         struct uio      auio;
 1123         struct iovec    iov;
 1124         int     error;
 1125 
 1126         iov.iov_len = *buflen;
 1127         iov.iov_base = buf;
 1128 
 1129         auio.uio_iov = &iov;
 1130         auio.uio_iovcnt = 1;
 1131         auio.uio_rw = UIO_READ;
 1132         auio.uio_segflg = UIO_SYSSPACE;
 1133         auio.uio_td = td;
 1134         auio.uio_offset = 0;
 1135         auio.uio_resid = *buflen;
 1136 
 1137         if ((ioflg & IO_NODELOCKED) == 0)
 1138                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1139 
 1140         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1141 
 1142         /* authorize attribute retrieval as kernel */
 1143         error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL,
 1144             td);
 1145 
 1146         if ((ioflg & IO_NODELOCKED) == 0)
 1147                 VOP_UNLOCK(vp, 0, td);
 1148 
 1149         if (error == 0) {
 1150                 *buflen = *buflen - auio.uio_resid;
 1151         }
 1152 
 1153         return (error);
 1154 }
 1155 
 1156 /*
 1157  * XXX failure mode if partially written?
 1158  */
 1159 int
 1160 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
 1161     const char *attrname, int buflen, char *buf, struct thread *td)
 1162 {
 1163         struct uio      auio;
 1164         struct iovec    iov;
 1165         struct mount    *mp;
 1166         int     error;
 1167 
 1168         iov.iov_len = buflen;
 1169         iov.iov_base = buf;
 1170 
 1171         auio.uio_iov = &iov;
 1172         auio.uio_iovcnt = 1;
 1173         auio.uio_rw = UIO_WRITE;
 1174         auio.uio_segflg = UIO_SYSSPACE;
 1175         auio.uio_td = td;
 1176         auio.uio_offset = 0;
 1177         auio.uio_resid = buflen;
 1178 
 1179         if ((ioflg & IO_NODELOCKED) == 0) {
 1180                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1181                         return (error);
 1182                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1183         }
 1184 
 1185         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1186 
 1187         /* authorize attribute setting as kernel */
 1188         error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td);
 1189 
 1190         if ((ioflg & IO_NODELOCKED) == 0) {
 1191                 vn_finished_write(mp);
 1192                 VOP_UNLOCK(vp, 0, td);
 1193         }
 1194 
 1195         return (error);
 1196 }
 1197 
 1198 int
 1199 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
 1200     const char *attrname, struct thread *td)
 1201 {
 1202         struct mount    *mp;
 1203         int     error;
 1204 
 1205         if ((ioflg & IO_NODELOCKED) == 0) {
 1206                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1207                         return (error);
 1208                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1209         }
 1210 
 1211         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1212 
 1213         /* authorize attribute removal as kernel */
 1214         error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL, td);
 1215         if (error == EOPNOTSUPP)
 1216                 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
 1217                     NULL, td);
 1218 
 1219         if ((ioflg & IO_NODELOCKED) == 0) {
 1220                 vn_finished_write(mp);
 1221                 VOP_UNLOCK(vp, 0, td);
 1222         }
 1223 
 1224         return (error);
 1225 }
Cache object: 25704d6cfda890692ce61a702b7faf80
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_vnops.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_vnops.c