vfs_vnops.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/6.4/sys/kern/vfs_vnops.c 176599 2008-02-26 18:19:49Z obrien $");
   39 
   40 #include "opt_mac.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/fcntl.h>
   45 #include <sys/file.h>
   46 #include <sys/kdb.h>
   47 #include <sys/stat.h>
   48 #include <sys/proc.h>
   49 #include <sys/limits.h>
   50 #include <sys/lock.h>
   51 #include <sys/mac.h>
   52 #include <sys/mount.h>
   53 #include <sys/mutex.h>
   54 #include <sys/namei.h>
   55 #include <sys/vnode.h>
   56 #include <sys/bio.h>
   57 #include <sys/buf.h>
   58 #include <sys/filio.h>
   59 #include <sys/sx.h>
   60 #include <sys/ttycom.h>
   61 #include <sys/conf.h>
   62 #include <sys/syslog.h>
   63 #include <sys/unistd.h>
   64 
   65 static fo_rdwr_t        vn_read;
   66 static fo_rdwr_t        vn_write;
   67 static fo_ioctl_t       vn_ioctl;
   68 static fo_poll_t        vn_poll;
   69 static fo_kqfilter_t    vn_kqfilter;
   70 static fo_stat_t        vn_statfile;
   71 static fo_close_t       vn_closefile;
   72 
   73 struct  fileops vnops = {
   74         .fo_read = vn_read,
   75         .fo_write = vn_write,
   76         .fo_ioctl = vn_ioctl,
   77         .fo_poll = vn_poll,
   78         .fo_kqfilter = vn_kqfilter,
   79         .fo_stat = vn_statfile,
   80         .fo_close = vn_closefile,
   81         .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
   82 };
   83 
   84 int
   85 vn_open(ndp, flagp, cmode, fdidx)
   86         struct nameidata *ndp;
   87         int *flagp, cmode, fdidx;
   88 {
   89         struct thread *td = ndp->ni_cnd.cn_thread;
   90 
   91         return (vn_open_cred(ndp, flagp, cmode, td->td_ucred, fdidx));
   92 }
   93 
   94 /*
   95  * Common code for vnode open operations.
   96  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
   97  * 
   98  * Note that this does NOT free nameidata for the successful case,
   99  * due to the NDINIT being done elsewhere.
  100  */
  101 int
  102 vn_open_cred(ndp, flagp, cmode, cred, fdidx)
  103         struct nameidata *ndp;
  104         int *flagp, cmode;
  105         struct ucred *cred;
  106         int fdidx;
  107 {
  108         struct vnode *vp;
  109         struct mount *mp;
  110         struct thread *td = ndp->ni_cnd.cn_thread;
  111         struct vattr vat;
  112         struct vattr *vap = &vat;
  113         int mode, fmode, error;
  114         int vfslocked;
  115 
  116 restart:
  117         vfslocked = 0;
  118         fmode = *flagp;
  119         if (fmode & O_CREAT) {
  120                 ndp->ni_cnd.cn_nameiop = CREATE;
  121                 ndp->ni_cnd.cn_flags = ISOPEN | LOCKPARENT | LOCKLEAF |
  122                     MPSAFE | AUDITVNODE1;
  123                 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
  124                         ndp->ni_cnd.cn_flags |= FOLLOW;
  125                 bwillwrite();
  126                 if ((error = namei(ndp)) != 0)
  127                         return (error);
  128                 vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
  129                 ndp->ni_cnd.cn_flags &= ~MPSAFE;
  130                 if (ndp->ni_vp == NULL) {
  131                         VATTR_NULL(vap);
  132                         vap->va_type = VREG;
  133                         vap->va_mode = cmode;
  134                         if (fmode & O_EXCL)
  135                                 vap->va_vaflags |= VA_EXCLUSIVE;
  136                         if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
  137                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  138                                 vput(ndp->ni_dvp);
  139                                 VFS_UNLOCK_GIANT(vfslocked);
  140                                 if ((error = vn_start_write(NULL, &mp,
  141                                     V_XSLEEP | PCATCH)) != 0)
  142                                         return (error);
  143                                 goto restart;
  144                         }
  145 #ifdef MAC
  146                         error = mac_check_vnode_create(cred, ndp->ni_dvp,
  147                             &ndp->ni_cnd, vap);
  148                         if (error == 0) {
  149 #endif
  150                                 VOP_LEASE(ndp->ni_dvp, td, cred, LEASE_WRITE);
  151                                 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
  152                                                    &ndp->ni_cnd, vap);
  153 #ifdef MAC
  154                         }
  155 #endif
  156                         vput(ndp->ni_dvp);
  157                         vn_finished_write(mp);
  158                         if (error) {
  159                                 VFS_UNLOCK_GIANT(vfslocked);
  160                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  161                                 return (error);
  162                         }
  163                         fmode &= ~O_TRUNC;
  164                         vp = ndp->ni_vp;
  165                 } else {
  166                         if (ndp->ni_dvp == ndp->ni_vp)
  167                                 vrele(ndp->ni_dvp);
  168                         else
  169                                 vput(ndp->ni_dvp);
  170                         ndp->ni_dvp = NULL;
  171                         vp = ndp->ni_vp;
  172                         if (fmode & O_EXCL) {
  173                                 error = EEXIST;
  174                                 goto bad;
  175                         }
  176                         fmode &= ~O_CREAT;
  177                 }
  178         } else {
  179                 ndp->ni_cnd.cn_nameiop = LOOKUP;
  180                 ndp->ni_cnd.cn_flags = ISOPEN |
  181                     ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) |
  182                     LOCKLEAF | MPSAFE | AUDITVNODE1;
  183                 if ((error = namei(ndp)) != 0)
  184                         return (error);
  185                 ndp->ni_cnd.cn_flags &= ~MPSAFE;
  186                 vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
  187                 vp = ndp->ni_vp;
  188         }
  189         if (vp->v_type == VLNK) {
  190                 error = EMLINK;
  191                 goto bad;
  192         }
  193         if (vp->v_type == VSOCK) {
  194                 error = EOPNOTSUPP;
  195                 goto bad;
  196         }
  197         mode = 0;
  198         if (fmode & (FWRITE | O_TRUNC)) {
  199                 if (vp->v_type == VDIR) {
  200                         error = EISDIR;
  201                         goto bad;
  202                 }
  203                 mode |= VWRITE;
  204         }
  205         if (fmode & FREAD)
  206                 mode |= VREAD;
  207         if (fmode & O_APPEND)
  208                 mode |= VAPPEND;
  209 #ifdef MAC
  210         error = mac_check_vnode_open(cred, vp, mode);
  211         if (error)
  212                 goto bad;
  213 #endif
  214         if ((fmode & O_CREAT) == 0) {
  215                 if (mode & VWRITE) {
  216                         error = vn_writechk(vp);
  217                         if (error)
  218                                 goto bad;
  219                 }
  220                 if (mode) {
  221                         error = VOP_ACCESS(vp, mode, cred, td);
  222                         if (error)
  223                                 goto bad;
  224                 }
  225         }
  226         if ((error = VOP_OPEN(vp, fmode, cred, td, fdidx)) != 0)
  227                 goto bad;
  228 
  229         if (fmode & FWRITE)
  230                 vp->v_writecount++;
  231         *flagp = fmode;
  232         ASSERT_VOP_ELOCKED(vp, "vn_open_cred");
  233         if (fdidx == -1)
  234                 VFS_UNLOCK_GIANT(vfslocked);
  235         return (0);
  236 bad:
  237         NDFREE(ndp, NDF_ONLY_PNBUF);
  238         vput(vp);
  239         VFS_UNLOCK_GIANT(vfslocked);
  240         *flagp = fmode;
  241         ndp->ni_vp = NULL;
  242         return (error);
  243 }
  244 
  245 /*
  246  * Check for write permissions on the specified vnode.
  247  * Prototype text segments cannot be written.
  248  */
  249 int
  250 vn_writechk(vp)
  251         register struct vnode *vp;
  252 {
  253 
  254         ASSERT_VOP_LOCKED(vp, "vn_writechk");
  255         /*
  256          * If there's shared text associated with
  257          * the vnode, try to free it up once.  If
  258          * we fail, we can't allow writing.
  259          */
  260         if (vp->v_vflag & VV_TEXT)
  261                 return (ETXTBSY);
  262 
  263         return (0);
  264 }
  265 
  266 /*
  267  * Vnode close call
  268  */
  269 int
  270 vn_close(vp, flags, file_cred, td)
  271         register struct vnode *vp;
  272         int flags;
  273         struct ucred *file_cred;
  274         struct thread *td;
  275 {
  276         struct mount *mp;
  277         int error;
  278 
  279         VFS_ASSERT_GIANT(vp->v_mount);
  280 
  281         vn_start_write(vp, &mp, V_WAIT);
  282         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  283         if (flags & FWRITE)
  284                 vp->v_writecount--;
  285         error = VOP_CLOSE(vp, flags, file_cred, td);
  286         vput(vp);
  287         vn_finished_write(mp);
  288         return (error);
  289 }
  290 
  291 /*
  292  * Sequential heuristic - detect sequential operation
  293  */
  294 static __inline
  295 int
  296 sequential_heuristic(struct uio *uio, struct file *fp)
  297 {
  298 
  299         if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
  300             uio->uio_offset == fp->f_nextoff) {
  301                 /*
  302                  * XXX we assume that the filesystem block size is
  303                  * the default.  Not true, but still gives us a pretty
  304                  * good indicator of how sequential the read operations
  305                  * are.
  306                  */
  307                 fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
  308                 if (fp->f_seqcount > IO_SEQMAX)
  309                         fp->f_seqcount = IO_SEQMAX;
  310                 return(fp->f_seqcount << IO_SEQSHIFT);
  311         }
  312 
  313         /*
  314          * Not sequential, quick draw-down of seqcount
  315          */
  316         if (fp->f_seqcount > 1)
  317                 fp->f_seqcount = 1;
  318         else
  319                 fp->f_seqcount = 0;
  320         return(0);
  321 }
  322 
  323 /*
  324  * Package up an I/O request on a vnode into a uio and do it.
  325  */
  326 int
  327 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
  328     aresid, td)
  329         enum uio_rw rw;
  330         struct vnode *vp;
  331         caddr_t base;
  332         int len;
  333         off_t offset;
  334         enum uio_seg segflg;
  335         int ioflg;
  336         struct ucred *active_cred;
  337         struct ucred *file_cred;
  338         int *aresid;
  339         struct thread *td;
  340 {
  341         struct uio auio;
  342         struct iovec aiov;
  343         struct mount *mp;
  344         struct ucred *cred;
  345         int error;
  346 
  347         VFS_ASSERT_GIANT(vp->v_mount);
  348 
  349         if ((ioflg & IO_NODELOCKED) == 0) {
  350                 mp = NULL;
  351                 if (rw == UIO_WRITE) { 
  352                         if (vp->v_type != VCHR &&
  353                             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
  354                             != 0)
  355                                 return (error);
  356                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  357                 } else {
  358                         /*
  359                          * XXX This should be LK_SHARED but I don't trust VFS
  360                          * enough to leave it like that until it has been
  361                          * reviewed further.
  362                          */
  363                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  364                 }
  365 
  366         }
  367         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
  368         auio.uio_iov = &aiov;
  369         auio.uio_iovcnt = 1;
  370         aiov.iov_base = base;
  371         aiov.iov_len = len;
  372         auio.uio_resid = len;
  373         auio.uio_offset = offset;
  374         auio.uio_segflg = segflg;
  375         auio.uio_rw = rw;
  376         auio.uio_td = td;
  377         error = 0;
  378 #ifdef MAC
  379         if ((ioflg & IO_NOMACCHECK) == 0) {
  380                 if (rw == UIO_READ)
  381                         error = mac_check_vnode_read(active_cred, file_cred,
  382                             vp);
  383                 else
  384                         error = mac_check_vnode_write(active_cred, file_cred,
  385                             vp);
  386         }
  387 #endif
  388         if (error == 0) {
  389                 if (file_cred)
  390                         cred = file_cred;
  391                 else
  392                         cred = active_cred;
  393                 if (rw == UIO_READ)
  394                         error = VOP_READ(vp, &auio, ioflg, cred);
  395                 else
  396                         error = VOP_WRITE(vp, &auio, ioflg, cred);
  397         }
  398         if (aresid)
  399                 *aresid = auio.uio_resid;
  400         else
  401                 if (auio.uio_resid && error == 0)
  402                         error = EIO;
  403         if ((ioflg & IO_NODELOCKED) == 0) {
  404                 if (rw == UIO_WRITE && vp->v_type != VCHR)
  405                         vn_finished_write(mp);
  406                 VOP_UNLOCK(vp, 0, td);
  407         }
  408         return (error);
  409 }
  410 
  411 /*
  412  * Package up an I/O request on a vnode into a uio and do it.  The I/O
  413  * request is split up into smaller chunks and we try to avoid saturating
  414  * the buffer cache while potentially holding a vnode locked, so we 
  415  * check bwillwrite() before calling vn_rdwr().  We also call uio_yield()
  416  * to give other processes a chance to lock the vnode (either other processes
  417  * core'ing the same binary, or unrelated processes scanning the directory).
  418  */
  419 int
  420 vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred,
  421     file_cred, aresid, td)
  422         enum uio_rw rw;
  423         struct vnode *vp;
  424         caddr_t base;
  425         size_t len;
  426         off_t offset;
  427         enum uio_seg segflg;
  428         int ioflg;
  429         struct ucred *active_cred;
  430         struct ucred *file_cred;
  431         size_t *aresid;
  432         struct thread *td;
  433 {
  434         int error = 0;
  435         int iaresid;
  436 
  437         VFS_ASSERT_GIANT(vp->v_mount);
  438 
  439         do {
  440                 int chunk;
  441 
  442                 /*
  443                  * Force `offset' to a multiple of MAXBSIZE except possibly
  444                  * for the first chunk, so that filesystems only need to
  445                  * write full blocks except possibly for the first and last
  446                  * chunks.
  447                  */
  448                 chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE;
  449 
  450                 if (chunk > len)
  451                         chunk = len;
  452                 if (rw != UIO_READ && vp->v_type == VREG)
  453                         bwillwrite();
  454                 iaresid = 0;
  455                 error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
  456                     ioflg, active_cred, file_cred, &iaresid, td);
  457                 len -= chunk;   /* aresid calc already includes length */
  458                 if (error)
  459                         break;
  460                 offset += chunk;
  461                 base += chunk;
  462                 uio_yield();
  463         } while (len);
  464         if (aresid)
  465                 *aresid = len + iaresid;
  466         return (error);
  467 }
  468 
  469 /*
  470  * File table vnode read routine.
  471  */
  472 static int
  473 vn_read(fp, uio, active_cred, flags, td)
  474         struct file *fp;
  475         struct uio *uio;
  476         struct ucred *active_cred;
  477         struct thread *td;
  478         int flags;
  479 {
  480         struct vnode *vp;
  481         int error, ioflag;
  482         int vfslocked;
  483 
  484         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  485             uio->uio_td, td));
  486         vp = fp->f_vnode;
  487         ioflag = 0;
  488         if (fp->f_flag & FNONBLOCK)
  489                 ioflag |= IO_NDELAY;
  490         if (fp->f_flag & O_DIRECT)
  491                 ioflag |= IO_DIRECT;
  492         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  493         VOP_LEASE(vp, td, fp->f_cred, LEASE_READ);
  494         /*
  495          * According to McKusick the vn lock was protecting f_offset here.
  496          * It is now protected by the FOFFSET_LOCKED flag.
  497          */
  498         if ((flags & FOF_OFFSET) == 0) {
  499                 FILE_LOCK(fp);
  500                 while(fp->f_vnread_flags & FOFFSET_LOCKED) {
  501                         fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
  502                         msleep(&fp->f_vnread_flags,fp->f_mtxp,PUSER -1,"vnread offlock",0);
  503                 }
  504                 fp->f_vnread_flags |= FOFFSET_LOCKED;
  505                 FILE_UNLOCK(fp);
  506                 vn_lock(vp, LK_SHARED | LK_RETRY, td);
  507                 uio->uio_offset = fp->f_offset;
  508         } else
  509                 vn_lock(vp, LK_SHARED | LK_RETRY, td);
  510 
  511         ioflag |= sequential_heuristic(uio, fp);
  512 
  513 #ifdef MAC
  514         error = mac_check_vnode_read(active_cred, fp->f_cred, vp);
  515         if (error == 0)
  516 #endif
  517                 error = VOP_READ(vp, uio, ioflag, fp->f_cred);
  518         if ((flags & FOF_OFFSET) == 0) {
  519                 fp->f_offset = uio->uio_offset;
  520                 FILE_LOCK(fp);
  521                 if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING)
  522                         wakeup(&fp->f_vnread_flags);
  523                 fp->f_vnread_flags = 0;
  524                 FILE_UNLOCK(fp);
  525         }
  526         fp->f_nextoff = uio->uio_offset;
  527         VOP_UNLOCK(vp, 0, td);
  528         VFS_UNLOCK_GIANT(vfslocked);
  529         return (error);
  530 }
  531 
  532 /*
  533  * File table vnode write routine.
  534  */
  535 static int
  536 vn_write(fp, uio, active_cred, flags, td)
  537         struct file *fp;
  538         struct uio *uio;
  539         struct ucred *active_cred;
  540         struct thread *td;
  541         int flags;
  542 {
  543         struct vnode *vp;
  544         struct mount *mp;
  545         int error, ioflag;
  546         int vfslocked;
  547 
  548         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  549             uio->uio_td, td));
  550         vp = fp->f_vnode;
  551         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  552         if (vp->v_type == VREG)
  553                 bwillwrite();
  554         ioflag = IO_UNIT;
  555         if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
  556                 ioflag |= IO_APPEND;
  557         if (fp->f_flag & FNONBLOCK)
  558                 ioflag |= IO_NDELAY;
  559         if (fp->f_flag & O_DIRECT)
  560                 ioflag |= IO_DIRECT;
  561         if ((fp->f_flag & O_FSYNC) ||
  562             (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
  563                 ioflag |= IO_SYNC;
  564         mp = NULL;
  565         if (vp->v_type != VCHR &&
  566             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
  567                 goto unlock;
  568         VOP_LEASE(vp, td, fp->f_cred, LEASE_WRITE);
  569         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  570         if ((flags & FOF_OFFSET) == 0)
  571                 uio->uio_offset = fp->f_offset;
  572         ioflag |= sequential_heuristic(uio, fp);
  573 #ifdef MAC
  574         error = mac_check_vnode_write(active_cred, fp->f_cred, vp);
  575         if (error == 0)
  576 #endif
  577                 error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
  578         if ((flags & FOF_OFFSET) == 0)
  579                 fp->f_offset = uio->uio_offset;
  580         fp->f_nextoff = uio->uio_offset;
  581         VOP_UNLOCK(vp, 0, td);
  582         if (vp->v_type != VCHR)
  583                 vn_finished_write(mp);
  584 unlock:
  585         VFS_UNLOCK_GIANT(vfslocked);
  586         return (error);
  587 }
  588 
  589 /*
  590  * File table vnode stat routine.
  591  */
  592 static int
  593 vn_statfile(fp, sb, active_cred, td)
  594         struct file *fp;
  595         struct stat *sb;
  596         struct ucred *active_cred;
  597         struct thread *td;
  598 {
  599         struct vnode *vp = fp->f_vnode;
  600         int vfslocked;
  601         int error;
  602 
  603         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  604         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  605         error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
  606         VOP_UNLOCK(vp, 0, td);
  607         VFS_UNLOCK_GIANT(vfslocked);
  608 
  609         return (error);
  610 }
  611 
  612 /*
  613  * Stat a vnode; implementation for the stat syscall
  614  */
  615 int
  616 vn_stat(vp, sb, active_cred, file_cred, td)
  617         struct vnode *vp;
  618         register struct stat *sb;
  619         struct ucred *active_cred;
  620         struct ucred *file_cred;
  621         struct thread *td;
  622 {
  623         struct vattr vattr;
  624         register struct vattr *vap;
  625         int error;
  626         u_short mode;
  627 
  628 #ifdef MAC
  629         error = mac_check_vnode_stat(active_cred, file_cred, vp);
  630         if (error)
  631                 return (error);
  632 #endif
  633 
  634         vap = &vattr;
  635         error = VOP_GETATTR(vp, vap, active_cred, td);
  636         if (error)
  637                 return (error);
  638 
  639         /*
  640          * Zero the spare stat fields
  641          */
  642         bzero(sb, sizeof *sb);
  643 
  644         /*
  645          * Copy from vattr table
  646          */
  647         if (vap->va_fsid != VNOVAL)
  648                 sb->st_dev = vap->va_fsid;
  649         else
  650                 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
  651         sb->st_ino = vap->va_fileid;
  652         mode = vap->va_mode;
  653         switch (vap->va_type) {
  654         case VREG:
  655                 mode |= S_IFREG;
  656                 break;
  657         case VDIR:
  658                 mode |= S_IFDIR;
  659                 break;
  660         case VBLK:
  661                 mode |= S_IFBLK;
  662                 break;
  663         case VCHR:
  664                 mode |= S_IFCHR;
  665                 break;
  666         case VLNK:
  667                 mode |= S_IFLNK;
  668                 /* This is a cosmetic change, symlinks do not have a mode. */
  669                 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW)
  670                         sb->st_mode &= ~ACCESSPERMS;    /* 0000 */
  671                 else
  672                         sb->st_mode |= ACCESSPERMS;     /* 0777 */
  673                 break;
  674         case VSOCK:
  675                 mode |= S_IFSOCK;
  676                 break;
  677         case VFIFO:
  678                 mode |= S_IFIFO;
  679                 break;
  680         default:
  681                 return (EBADF);
  682         };
  683         sb->st_mode = mode;
  684         sb->st_nlink = vap->va_nlink;
  685         sb->st_uid = vap->va_uid;
  686         sb->st_gid = vap->va_gid;
  687         sb->st_rdev = vap->va_rdev;
  688         if (vap->va_size > OFF_MAX)
  689                 return (EOVERFLOW);
  690         sb->st_size = vap->va_size;
  691         sb->st_atimespec = vap->va_atime;
  692         sb->st_mtimespec = vap->va_mtime;
  693         sb->st_ctimespec = vap->va_ctime;
  694         sb->st_birthtimespec = vap->va_birthtime;
  695 
  696         /*
  697          * According to www.opengroup.org, the meaning of st_blksize is 
  698          *   "a filesystem-specific preferred I/O block size for this 
  699          *    object.  In some filesystem types, this may vary from file
  700          *    to file"
  701          * Default to PAGE_SIZE after much discussion.
  702          * XXX: min(PAGE_SIZE, vp->v_bufobj.bo_bsize) may be more correct.
  703          */
  704 
  705         sb->st_blksize = PAGE_SIZE;
  706         
  707         sb->st_flags = vap->va_flags;
  708         if (suser(td))
  709                 sb->st_gen = 0;
  710         else
  711                 sb->st_gen = vap->va_gen;
  712 
  713 #if (S_BLKSIZE == 512)
  714         /* Optimize this case */
  715         sb->st_blocks = vap->va_bytes >> 9;
  716 #else
  717         sb->st_blocks = vap->va_bytes / S_BLKSIZE;
  718 #endif
  719         return (0);
  720 }
  721 
  722 /*
  723  * File table vnode ioctl routine.
  724  */
  725 static int
  726 vn_ioctl(fp, com, data, active_cred, td)
  727         struct file *fp;
  728         u_long com;
  729         void *data;
  730         struct ucred *active_cred;
  731         struct thread *td;
  732 {
  733         struct vnode *vp = fp->f_vnode;
  734         struct vattr vattr;
  735         int vfslocked;
  736         int error;
  737 
  738         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  739         error = ENOTTY;
  740         switch (vp->v_type) {
  741         case VREG:
  742         case VDIR:
  743                 if (com == FIONREAD) {
  744                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  745                         error = VOP_GETATTR(vp, &vattr, active_cred, td);
  746                         VOP_UNLOCK(vp, 0, td);
  747                         if (!error)
  748                                 *(int *)data = vattr.va_size - fp->f_offset;
  749                 }
  750                 if (com == FIONBIO || com == FIOASYNC)  /* XXX */
  751                         error = 0;
  752                 else
  753                         error = VOP_IOCTL(vp, com, data, fp->f_flag,
  754                             active_cred, td);
  755                 break;
  756 
  757         default:
  758                 break;
  759         }
  760         VFS_UNLOCK_GIANT(vfslocked);
  761         return (error);
  762 }
  763 
  764 /*
  765  * File table vnode poll routine.
  766  */
  767 static int
  768 vn_poll(fp, events, active_cred, td)
  769         struct file *fp;
  770         int events;
  771         struct ucred *active_cred;
  772         struct thread *td;
  773 {
  774         struct vnode *vp;
  775         int error;
  776 
  777         mtx_lock(&Giant);
  778 
  779         vp = fp->f_vnode;
  780 #ifdef MAC
  781         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  782         error = mac_check_vnode_poll(active_cred, fp->f_cred, vp);
  783         VOP_UNLOCK(vp, 0, td);
  784         if (!error)
  785 #endif
  786 
  787         error = VOP_POLL(vp, events, fp->f_cred, td);
  788         mtx_unlock(&Giant);
  789         return (error);
  790 }
  791 
  792 /*
  793  * Check that the vnode is still valid, and if so
  794  * acquire requested lock.
  795  */
  796 int
  797 vn_lock(vp, flags, td)
  798         struct vnode *vp;
  799         int flags;
  800         struct thread *td;
  801 {
  802         int error;
  803 
  804         do {
  805                 if ((flags & LK_INTERLOCK) == 0)
  806                         VI_LOCK(vp);
  807                 if ((flags & LK_NOWAIT || (flags & LK_TYPE_MASK) == 0) &&
  808                     vp->v_iflag & VI_DOOMED) {
  809                         VI_UNLOCK(vp);
  810                         return (ENOENT);
  811                 }
  812                 /*
  813                  * Just polling to check validity.
  814                  */
  815                 if ((flags & LK_TYPE_MASK) == 0) {
  816                         VI_UNLOCK(vp);
  817                         return (0);
  818                 }
  819                 /*
  820                  * lockmgr drops interlock before it will return for
  821                  * any reason.  So force the code above to relock it.
  822                  */
  823                 error = VOP_LOCK(vp, flags | LK_INTERLOCK, td);
  824                 flags &= ~LK_INTERLOCK;
  825                 KASSERT((flags & LK_RETRY) == 0 || error == 0,
  826                     ("LK_RETRY set with incompatible flags %d\n", flags));
  827                 /*
  828                  * Callers specify LK_RETRY if they wish to get dead vnodes.
  829                  * If RETRY is not set, we return ENOENT instead.
  830                  */
  831                 if (error == 0 && vp->v_iflag & VI_DOOMED &&
  832                     (flags & LK_RETRY) == 0) {
  833                         VOP_UNLOCK(vp, 0, td);
  834                         error = ENOENT;
  835                         break;
  836                 }
  837         } while (flags & LK_RETRY && error != 0);
  838         return (error);
  839 }
  840 
  841 /*
  842  * File table vnode close routine.
  843  */
  844 static int
  845 vn_closefile(fp, td)
  846         struct file *fp;
  847         struct thread *td;
  848 {
  849         struct vnode *vp;
  850         struct flock lf;
  851         int vfslocked;
  852         int error;
  853 
  854         vp = fp->f_vnode;
  855 
  856         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  857         if (fp->f_type == DTYPE_VNODE && fp->f_flag & FHASLOCK) {
  858                 lf.l_whence = SEEK_SET;
  859                 lf.l_start = 0;
  860                 lf.l_len = 0;
  861                 lf.l_type = F_UNLCK;
  862                 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
  863         }
  864 
  865         fp->f_ops = &badfileops;
  866 
  867         error = vn_close(vp, fp->f_flag, fp->f_cred, td);
  868         VFS_UNLOCK_GIANT(vfslocked);
  869         return (error);
  870 }
  871 
  872 /*
  873  * Preparing to start a filesystem write operation. If the operation is
  874  * permitted, then we bump the count of operations in progress and
  875  * proceed. If a suspend request is in progress, we wait until the
  876  * suspension is over, and then proceed.
  877  */
  878 int
  879 vn_start_write(vp, mpp, flags)
  880         struct vnode *vp;
  881         struct mount **mpp;
  882         int flags;
  883 {
  884         struct mount *mp;
  885         int error;
  886 
  887         error = 0;
  888         /*
  889          * If a vnode is provided, get and return the mount point that
  890          * to which it will write.
  891          */
  892         if (vp != NULL) {
  893                 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
  894                         *mpp = NULL;
  895                         if (error != EOPNOTSUPP)
  896                                 return (error);
  897                         return (0);
  898                 }
  899         }
  900         if ((mp = *mpp) == NULL)
  901                 return (0);
  902         MNT_ILOCK(mp);
  903         if (vp == NULL)
  904                 MNT_REF(mp);
  905         /*
  906          * Check on status of suspension.
  907          */
  908         while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
  909                 if (flags & V_NOWAIT) {
  910                         error = EWOULDBLOCK;
  911                         goto unlock;
  912                 }
  913                 error = msleep(&mp->mnt_flag, MNT_MTX(mp), 
  914                     (PUSER - 1) | (flags & PCATCH), "suspfs", 0);
  915                 if (error)
  916                         goto unlock;
  917         }
  918         if (flags & V_XSLEEP)
  919                 goto unlock;
  920         mp->mnt_writeopcount++;
  921 unlock:
  922         MNT_REL(mp);
  923         MNT_IUNLOCK(mp);
  924         return (error);
  925 }
  926 
  927 /*
  928  * Secondary suspension. Used by operations such as vop_inactive
  929  * routines that are needed by the higher level functions. These
  930  * are allowed to proceed until all the higher level functions have
  931  * completed (indicated by mnt_writeopcount dropping to zero). At that
  932  * time, these operations are halted until the suspension is over.
  933  */
  934 int
  935 vn_write_suspend_wait(vp, mp, flags)
  936         struct vnode *vp;
  937         struct mount *mp;
  938         int flags;
  939 {
  940         int error;
  941 
  942         if (vp != NULL) {
  943                 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) {
  944                         if (error != EOPNOTSUPP)
  945                                 return (error);
  946                         return (0);
  947                 }
  948         }
  949         /*
  950          * If we are not suspended or have not yet reached suspended
  951          * mode, then let the operation proceed.
  952          */
  953         if (mp == NULL)
  954                 return (0);
  955         MNT_ILOCK(mp);
  956         if (vp == NULL)
  957                 MNT_REF(mp);
  958         if ((mp->mnt_kern_flag & MNTK_SUSPENDED) == 0) {
  959                 MNT_REL(mp);
  960                 MNT_IUNLOCK(mp);
  961                 return (0);
  962         }
  963         if (flags & V_NOWAIT) {
  964                 MNT_REL(mp);
  965                 MNT_IUNLOCK(mp);
  966                 return (EWOULDBLOCK);
  967         }
  968         /*
  969          * Wait for the suspension to finish.
  970          */
  971         error = msleep(&mp->mnt_flag, MNT_MTX(mp),
  972             (PUSER - 1) | (flags & PCATCH) | PDROP, "suspfs", 0);
  973         vfs_rel(mp);
  974         return (error);
  975 }
  976 
  977 /*
  978  * Secondary suspension. Used by operations such as vop_inactive
  979  * routines that are needed by the higher level functions. These
  980  * are allowed to proceed until all the higher level functions have
  981  * completed (indicated by mnt_writeopcount dropping to zero). At that
  982  * time, these operations are halted until the suspension is over.
  983  */
  984 int
  985 vn_start_secondary_write(vp, mpp, flags)
  986         struct vnode *vp;
  987         struct mount **mpp;
  988         int flags;
  989 {
  990         struct mount *mp;
  991         int error;
  992 
  993  retry:
  994         if (vp != NULL) {
  995                 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
  996                         *mpp = NULL;
  997                         if (error != EOPNOTSUPP)
  998                                 return (error);
  999                         return (0);
 1000                 }
 1001         }
 1002         /*
 1003          * If we are not suspended or have not yet reached suspended
 1004          * mode, then let the operation proceed.
 1005          */
 1006         if ((mp = *mpp) == NULL)
 1007                 return (0);
 1008         MNT_ILOCK(mp);
 1009         if (vp == NULL)
 1010                 MNT_REF(mp);
 1011         if ((mp->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND2)) == 0) {
 1012                 mp->mnt_secondary_writes++;
 1013                 mp->mnt_secondary_accwrites++;
 1014                 MNT_REL(mp);
 1015                 MNT_IUNLOCK(mp);
 1016                 return (0);
 1017         }
 1018         if (flags & V_NOWAIT) {
 1019                 MNT_REL(mp);
 1020                 MNT_IUNLOCK(mp);
 1021                 return (EWOULDBLOCK);
 1022         }
 1023         /*
 1024          * Wait for the suspension to finish.
 1025          */
 1026         error = msleep(&mp->mnt_flag, MNT_MTX(mp),
 1027                        (PUSER - 1) | (flags & PCATCH) | PDROP, "suspfs", 0);
 1028         vfs_rel(mp);
 1029         if (error == 0)
 1030                 goto retry;
 1031         return (error);
 1032 }
 1033 
 1034 /*
 1035  * Filesystem write operation has completed. If we are suspending and this
 1036  * operation is the last one, notify the suspender that the suspension is
 1037  * now in effect.
 1038  */
 1039 void
 1040 vn_finished_write(mp)
 1041         struct mount *mp;
 1042 {
 1043         if (mp == NULL)
 1044                 return;
 1045         MNT_ILOCK(mp);
 1046         mp->mnt_writeopcount--;
 1047         if (mp->mnt_writeopcount < 0)
 1048                 panic("vn_finished_write: neg cnt");
 1049         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 1050             mp->mnt_writeopcount <= 0)
 1051                 wakeup(&mp->mnt_writeopcount);
 1052         MNT_IUNLOCK(mp);
 1053 }
 1054 
 1055 
 1056 /*
 1057  * Filesystem secondary write operation has completed. If we are
 1058  * suspending and this operation is the last one, notify the suspender
 1059  * that the suspension is now in effect.
 1060  */
 1061 void
 1062 vn_finished_secondary_write(mp)
 1063         struct mount *mp;
 1064 {
 1065         if (mp == NULL)
 1066                 return;
 1067         MNT_ILOCK(mp);
 1068         mp->mnt_secondary_writes--;
 1069         if (mp->mnt_secondary_writes < 0)
 1070                 panic("vn_finished_secondary_write: neg cnt");
 1071         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 1072             mp->mnt_secondary_writes <= 0)
 1073                 wakeup(&mp->mnt_secondary_writes);
 1074         MNT_IUNLOCK(mp);
 1075 }
 1076 
 1077 
 1078 
 1079 /*
 1080  * Request a filesystem to suspend write operations.
 1081  */
 1082 int
 1083 vfs_write_suspend(mp)
 1084         struct mount *mp;
 1085 {
 1086         struct thread *td = curthread;
 1087         int error;
 1088 
 1089         MNT_ILOCK(mp);
 1090         if (mp->mnt_kern_flag & MNTK_SUSPEND) {
 1091                 MNT_IUNLOCK(mp);
 1092                 return (0);
 1093         }
 1094         mp->mnt_kern_flag |= MNTK_SUSPEND;
 1095         if (mp->mnt_writeopcount > 0)
 1096                 (void) msleep(&mp->mnt_writeopcount, 
 1097                     MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt", 0);
 1098         else
 1099                 MNT_IUNLOCK(mp);
 1100         if ((error = VFS_SYNC(mp, MNT_SUSPEND, td)) != 0)
 1101                 vfs_write_resume(mp);
 1102         return (error);
 1103 }
 1104 
 1105 /*
 1106  * Request a filesystem to resume write operations.
 1107  */
 1108 void
 1109 vfs_write_resume(mp)
 1110         struct mount *mp;
 1111 {
 1112 
 1113         MNT_ILOCK(mp);
 1114         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
 1115                 mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPEND2 |
 1116                                        MNTK_SUSPENDED);
 1117                 wakeup(&mp->mnt_writeopcount);
 1118                 wakeup(&mp->mnt_flag);
 1119         }
 1120         MNT_IUNLOCK(mp);
 1121 }
 1122 
 1123 /*
 1124  * Implement kqueues for files by translating it to vnode operation.
 1125  */
 1126 static int
 1127 vn_kqfilter(struct file *fp, struct knote *kn)
 1128 {
 1129         int error;
 1130 
 1131         mtx_lock(&Giant);
 1132         error = VOP_KQFILTER(fp->f_vnode, kn);
 1133         mtx_unlock(&Giant);
 1134 
 1135         return error;
 1136 }
 1137 
 1138 /*
 1139  * Simplified in-kernel wrapper calls for extended attribute access.
 1140  * Both calls pass in a NULL credential, authorizing as "kernel" access.
 1141  * Set IO_NODELOCKED in ioflg if the vnode is already locked.
 1142  */
 1143 int
 1144 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
 1145     const char *attrname, int *buflen, char *buf, struct thread *td)
 1146 {
 1147         struct uio      auio;
 1148         struct iovec    iov;
 1149         int     error;
 1150 
 1151         iov.iov_len = *buflen;
 1152         iov.iov_base = buf;
 1153 
 1154         auio.uio_iov = &iov;
 1155         auio.uio_iovcnt = 1;
 1156         auio.uio_rw = UIO_READ;
 1157         auio.uio_segflg = UIO_SYSSPACE;
 1158         auio.uio_td = td;
 1159         auio.uio_offset = 0;
 1160         auio.uio_resid = *buflen;
 1161 
 1162         if ((ioflg & IO_NODELOCKED) == 0)
 1163                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1164 
 1165         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1166 
 1167         /* authorize attribute retrieval as kernel */
 1168         error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL,
 1169             td);
 1170 
 1171         if ((ioflg & IO_NODELOCKED) == 0)
 1172                 VOP_UNLOCK(vp, 0, td);
 1173 
 1174         if (error == 0) {
 1175                 *buflen = *buflen - auio.uio_resid;
 1176         }
 1177 
 1178         return (error);
 1179 }
 1180 
 1181 /*
 1182  * XXX failure mode if partially written?
 1183  */
 1184 int
 1185 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
 1186     const char *attrname, int buflen, char *buf, struct thread *td)
 1187 {
 1188         struct uio      auio;
 1189         struct iovec    iov;
 1190         struct mount    *mp;
 1191         int     error;
 1192 
 1193         iov.iov_len = buflen;
 1194         iov.iov_base = buf;
 1195 
 1196         auio.uio_iov = &iov;
 1197         auio.uio_iovcnt = 1;
 1198         auio.uio_rw = UIO_WRITE;
 1199         auio.uio_segflg = UIO_SYSSPACE;
 1200         auio.uio_td = td;
 1201         auio.uio_offset = 0;
 1202         auio.uio_resid = buflen;
 1203 
 1204         if ((ioflg & IO_NODELOCKED) == 0) {
 1205                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1206                         return (error);
 1207                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1208         }
 1209 
 1210         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1211 
 1212         /* authorize attribute setting as kernel */
 1213         error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td);
 1214 
 1215         if ((ioflg & IO_NODELOCKED) == 0) {
 1216                 vn_finished_write(mp);
 1217                 VOP_UNLOCK(vp, 0, td);
 1218         }
 1219 
 1220         return (error);
 1221 }
 1222 
 1223 int
 1224 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
 1225     const char *attrname, struct thread *td)
 1226 {
 1227         struct mount    *mp;
 1228         int     error;
 1229 
 1230         if ((ioflg & IO_NODELOCKED) == 0) {
 1231                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1232                         return (error);
 1233                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1234         }
 1235 
 1236         ASSERT_VOP_LOCKED(vp, "IO_NODELOCKED with no vp lock held");
 1237 
 1238         /* authorize attribute removal as kernel */
 1239         error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL, td);
 1240         if (error == EOPNOTSUPP)
 1241                 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
 1242                     NULL, td);
 1243 
 1244         if ((ioflg & IO_NODELOCKED) == 0) {
 1245                 vn_finished_write(mp);
 1246                 VOP_UNLOCK(vp, 0, td);
 1247         }
 1248 
 1249         return (error);
 1250 }
Cache object: 8c8bfa6e1e9184ea7469cf320eab6315
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_vnops.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_vnops.c