vfs_vnops.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
   39  * $FreeBSD: releng/5.1/sys/kern/vfs_vnops.c 114216 2003-04-29 13:36:06Z kan $
   40  */
   41 
   42 #include "opt_mac.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/fcntl.h>
   47 #include <sys/file.h>
   48 #include <sys/stat.h>
   49 #include <sys/proc.h>
   50 #include <sys/limits.h>
   51 #include <sys/lock.h>
   52 #include <sys/mac.h>
   53 #include <sys/mount.h>
   54 #include <sys/mutex.h>
   55 #include <sys/namei.h>
   56 #include <sys/vnode.h>
   57 #include <sys/bio.h>
   58 #include <sys/buf.h>
   59 #include <sys/filio.h>
   60 #include <sys/sx.h>
   61 #include <sys/ttycom.h>
   62 #include <sys/conf.h>
   63 #include <sys/syslog.h>
   64 
   65 static fo_rdwr_t        vn_read;
   66 static fo_rdwr_t        vn_write;
   67 static fo_ioctl_t       vn_ioctl;
   68 static fo_poll_t        vn_poll;
   69 static fo_kqfilter_t    vn_kqfilter;
   70 static fo_stat_t        vn_statfile;
   71 static fo_close_t       vn_closefile;
   72 
   73 struct  fileops vnops = {
   74         vn_read, vn_write, vn_ioctl, vn_poll, vn_kqfilter,
   75         vn_statfile, vn_closefile, DFLAG_PASSABLE
   76 };
   77 
   78 int
   79 vn_open(ndp, flagp, cmode)
   80         register struct nameidata *ndp;
   81         int *flagp, cmode;
   82 {
   83         struct thread *td = ndp->ni_cnd.cn_thread;
   84 
   85         return (vn_open_cred(ndp, flagp, cmode, td->td_ucred));
   86 }
   87 
   88 /*
   89  * Common code for vnode open operations.
   90  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
   91  * 
   92  * Note that this does NOT free nameidata for the successful case,
   93  * due to the NDINIT being done elsewhere.
   94  */
   95 int
   96 vn_open_cred(ndp, flagp, cmode, cred)
   97         register struct nameidata *ndp;
   98         int *flagp, cmode;
   99         struct ucred *cred;
  100 {
  101         struct vnode *vp;
  102         struct mount *mp;
  103         struct thread *td = ndp->ni_cnd.cn_thread;
  104         struct vattr vat;
  105         struct vattr *vap = &vat;
  106         int mode, fmode, error;
  107 #ifdef LOOKUP_SHARED
  108         int exclusive;  /* The current intended lock state */
  109 
  110         exclusive = 0;
  111 #endif
  112 
  113 restart:
  114         fmode = *flagp;
  115         if (fmode & O_CREAT) {
  116                 ndp->ni_cnd.cn_nameiop = CREATE;
  117                 ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
  118                 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
  119                         ndp->ni_cnd.cn_flags |= FOLLOW;
  120                 bwillwrite();
  121                 if ((error = namei(ndp)) != 0)
  122                         return (error);
  123                 if (ndp->ni_vp == NULL) {
  124                         VATTR_NULL(vap);
  125                         vap->va_type = VREG;
  126                         vap->va_mode = cmode;
  127                         if (fmode & O_EXCL)
  128                                 vap->va_vaflags |= VA_EXCLUSIVE;
  129                         if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
  130                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  131                                 vput(ndp->ni_dvp);
  132                                 if ((error = vn_start_write(NULL, &mp,
  133                                     V_XSLEEP | PCATCH)) != 0)
  134                                         return (error);
  135                                 goto restart;
  136                         }
  137 #ifdef MAC
  138                         error = mac_check_vnode_create(cred, ndp->ni_dvp,
  139                             &ndp->ni_cnd, vap);
  140                         if (error == 0) {
  141 #endif
  142                                 VOP_LEASE(ndp->ni_dvp, td, cred, LEASE_WRITE);
  143                                 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
  144                                                    &ndp->ni_cnd, vap);
  145 #ifdef MAC
  146                         }
  147 #endif
  148                         vput(ndp->ni_dvp);
  149                         vn_finished_write(mp);
  150                         if (error) {
  151                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  152                                 return (error);
  153                         }
  154                         ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create");
  155                         ASSERT_VOP_LOCKED(ndp->ni_vp, "create");
  156                         fmode &= ~O_TRUNC;
  157                         vp = ndp->ni_vp;
  158 #ifdef LOOKUP_SHARED
  159                         exclusive = 1;
  160 #endif
  161                 } else {
  162                         if (ndp->ni_dvp == ndp->ni_vp)
  163                                 vrele(ndp->ni_dvp);
  164                         else
  165                                 vput(ndp->ni_dvp);
  166                         ndp->ni_dvp = NULL;
  167                         vp = ndp->ni_vp;
  168                         if (fmode & O_EXCL) {
  169                                 error = EEXIST;
  170                                 goto bad;
  171                         }
  172                         fmode &= ~O_CREAT;
  173                 }
  174         } else {
  175                 ndp->ni_cnd.cn_nameiop = LOOKUP;
  176 #ifdef LOOKUP_SHARED
  177                 ndp->ni_cnd.cn_flags =
  178                     ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) |
  179                     LOCKSHARED | LOCKLEAF;
  180 #else
  181                 ndp->ni_cnd.cn_flags =
  182                     ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
  183 #endif
  184                 if ((error = namei(ndp)) != 0)
  185                         return (error);
  186                 vp = ndp->ni_vp;
  187         }
  188         if (vp->v_type == VLNK) {
  189                 error = EMLINK;
  190                 goto bad;
  191         }
  192         if (vp->v_type == VSOCK) {
  193                 error = EOPNOTSUPP;
  194                 goto bad;
  195         }
  196         mode = 0;
  197         if (fmode & (FWRITE | O_TRUNC)) {
  198                 if (vp->v_type == VDIR) {
  199                         error = EISDIR;
  200                         goto bad;
  201                 }
  202                 mode |= VWRITE;
  203         }
  204         if (fmode & FREAD)
  205                 mode |= VREAD;
  206         if (fmode & O_APPEND)
  207                 mode |= VAPPEND;
  208 #ifdef MAC
  209         error = mac_check_vnode_open(cred, vp, mode);
  210         if (error)
  211                 goto bad;
  212 #endif
  213         if ((fmode & O_CREAT) == 0) {
  214                 if (mode & VWRITE) {
  215                         error = vn_writechk(vp);
  216                         if (error)
  217                                 goto bad;
  218                 }
  219                 if (mode) {
  220                         error = VOP_ACCESS(vp, mode, cred, td);
  221                         if (error)
  222                                 goto bad;
  223                 }
  224         }
  225         if ((error = VOP_GETATTR(vp, vap, cred, td)) == 0) {
  226                 vp->v_cachedfs = vap->va_fsid;
  227                 vp->v_cachedid = vap->va_fileid;
  228         }
  229         if ((error = VOP_OPEN(vp, fmode, cred, td)) != 0)
  230                 goto bad;
  231         /*
  232          * Make sure that a VM object is created for VMIO support.
  233          */
  234         if (vn_canvmio(vp) == TRUE) {
  235 #ifdef LOOKUP_SHARED
  236                 int flock;
  237 
  238                 if (!exclusive && VOP_GETVOBJECT(vp, NULL) != 0)
  239                         VOP_LOCK(vp, LK_UPGRADE, td);
  240                 /*
  241                  * In cases where the object is marked as dead object_create
  242                  * will unlock and relock exclusive.  It is safe to call in
  243                  * here with a shared lock because we only examine fields that
  244                  * the shared lock guarantees will be stable.  In the UPGRADE
  245                  * case it is not likely that anyone has used this vnode yet
  246                  * so there will be no contention.  The logic after this call
  247                  * restores the requested locking state.
  248                  */
  249 #endif
  250                 if ((error = vfs_object_create(vp, td, cred)) != 0) {
  251                         VOP_UNLOCK(vp, 0, td);
  252                         VOP_CLOSE(vp, fmode, cred, td);
  253                         NDFREE(ndp, NDF_ONLY_PNBUF);
  254                         vrele(vp);
  255                         *flagp = fmode;
  256                         return (error);
  257                 }
  258 #ifdef LOOKUP_SHARED
  259                 flock = VOP_ISLOCKED(vp, td);
  260                 if (!exclusive && flock == LK_EXCLUSIVE)
  261                         VOP_LOCK(vp, LK_DOWNGRADE, td);
  262 #endif
  263         }
  264 
  265         if (fmode & FWRITE)
  266                 vp->v_writecount++;
  267         *flagp = fmode;
  268         return (0);
  269 bad:
  270         NDFREE(ndp, NDF_ONLY_PNBUF);
  271         vput(vp);
  272         *flagp = fmode;
  273         ndp->ni_vp = NULL;
  274         return (error);
  275 }
  276 
  277 /*
  278  * Check for write permissions on the specified vnode.
  279  * Prototype text segments cannot be written.
  280  */
  281 int
  282 vn_writechk(vp)
  283         register struct vnode *vp;
  284 {
  285 
  286         ASSERT_VOP_LOCKED(vp, "vn_writechk");
  287         /*
  288          * If there's shared text associated with
  289          * the vnode, try to free it up once.  If
  290          * we fail, we can't allow writing.
  291          */
  292         if (vp->v_vflag & VV_TEXT)
  293                 return (ETXTBSY);
  294 
  295         return (0);
  296 }
  297 
  298 /*
  299  * Vnode close call
  300  */
  301 int
  302 vn_close(vp, flags, file_cred, td)
  303         register struct vnode *vp;
  304         int flags;
  305         struct ucred *file_cred;
  306         struct thread *td;
  307 {
  308         int error;
  309 
  310         if (flags & FWRITE)
  311                 vp->v_writecount--;
  312         error = VOP_CLOSE(vp, flags, file_cred, td);
  313         /*
  314          * XXX - In certain instances VOP_CLOSE has to do the vrele
  315          * itself. If the vrele has been done, it will return EAGAIN
  316          * to indicate that the vrele should not be done again. When
  317          * this happens, we just return success. The correct thing to
  318          * do would be to have all VOP_CLOSE instances do the vrele.
  319          */
  320         if (error == EAGAIN)
  321                 return (0);
  322         vrele(vp);
  323         return (error);
  324 }
  325 
  326 /*
  327  * Sequential heuristic - detect sequential operation
  328  */
  329 static __inline
  330 int
  331 sequential_heuristic(struct uio *uio, struct file *fp)
  332 {
  333 
  334         if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
  335             uio->uio_offset == fp->f_nextoff) {
  336                 /*
  337                  * XXX we assume that the filesystem block size is
  338                  * the default.  Not true, but still gives us a pretty
  339                  * good indicator of how sequential the read operations
  340                  * are.
  341                  */
  342                 fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
  343                 if (fp->f_seqcount > IO_SEQMAX)
  344                         fp->f_seqcount = IO_SEQMAX;
  345                 return(fp->f_seqcount << IO_SEQSHIFT);
  346         }
  347 
  348         /*
  349          * Not sequential, quick draw-down of seqcount
  350          */
  351         if (fp->f_seqcount > 1)
  352                 fp->f_seqcount = 1;
  353         else
  354                 fp->f_seqcount = 0;
  355         return(0);
  356 }
  357 
  358 /*
  359  * Package up an I/O request on a vnode into a uio and do it.
  360  */
  361 int
  362 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
  363     aresid, td)
  364         enum uio_rw rw;
  365         struct vnode *vp;
  366         caddr_t base;
  367         int len;
  368         off_t offset;
  369         enum uio_seg segflg;
  370         int ioflg;
  371         struct ucred *active_cred;
  372         struct ucred *file_cred;
  373         int *aresid;
  374         struct thread *td;
  375 {
  376         struct uio auio;
  377         struct iovec aiov;
  378         struct mount *mp;
  379         struct ucred *cred;
  380         int error;
  381 
  382         if ((ioflg & IO_NODELOCKED) == 0) {
  383                 mp = NULL;
  384                 if (rw == UIO_WRITE) { 
  385                         if (vp->v_type != VCHR &&
  386                             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
  387                             != 0)
  388                                 return (error);
  389                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  390                 } else {
  391                         /*
  392                          * XXX This should be LK_SHARED but I don't trust VFS
  393                          * enough to leave it like that until it has been
  394                          * reviewed further.
  395                          */
  396                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  397                 }
  398 
  399         }
  400         auio.uio_iov = &aiov;
  401         auio.uio_iovcnt = 1;
  402         aiov.iov_base = base;
  403         aiov.iov_len = len;
  404         auio.uio_resid = len;
  405         auio.uio_offset = offset;
  406         auio.uio_segflg = segflg;
  407         auio.uio_rw = rw;
  408         auio.uio_td = td;
  409         error = 0;
  410 #ifdef MAC
  411         if ((ioflg & IO_NOMACCHECK) == 0) {
  412                 if (rw == UIO_READ)
  413                         error = mac_check_vnode_read(active_cred, file_cred,
  414                             vp);
  415                 else
  416                         error = mac_check_vnode_write(active_cred, file_cred,
  417                             vp);
  418         }
  419 #endif
  420         if (error == 0) {
  421                 if (file_cred)
  422                         cred = file_cred;
  423                 else
  424                         cred = active_cred;
  425                 if (rw == UIO_READ)
  426                         error = VOP_READ(vp, &auio, ioflg, cred);
  427                 else
  428                         error = VOP_WRITE(vp, &auio, ioflg, cred);
  429         }
  430         if (aresid)
  431                 *aresid = auio.uio_resid;
  432         else
  433                 if (auio.uio_resid && error == 0)
  434                         error = EIO;
  435         if ((ioflg & IO_NODELOCKED) == 0) {
  436                 if (rw == UIO_WRITE)
  437                         vn_finished_write(mp);
  438                 VOP_UNLOCK(vp, 0, td);
  439         }
  440         return (error);
  441 }
  442 
  443 /*
  444  * Package up an I/O request on a vnode into a uio and do it.  The I/O
  445  * request is split up into smaller chunks and we try to avoid saturating
  446  * the buffer cache while potentially holding a vnode locked, so we 
  447  * check bwillwrite() before calling vn_rdwr().  We also call uio_yield()
  448  * to give other processes a chance to lock the vnode (either other processes
  449  * core'ing the same binary, or unrelated processes scanning the directory).
  450  */
  451 int
  452 vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred,
  453     file_cred, aresid, td)
  454         enum uio_rw rw;
  455         struct vnode *vp;
  456         caddr_t base;
  457         int len;
  458         off_t offset;
  459         enum uio_seg segflg;
  460         int ioflg;
  461         struct ucred *active_cred;
  462         struct ucred *file_cred;
  463         int *aresid;
  464         struct thread *td;
  465 {
  466         int error = 0;
  467 
  468         do {
  469                 int chunk = (len > MAXBSIZE) ? MAXBSIZE : len;
  470 
  471                 if (rw != UIO_READ && vp->v_type == VREG)
  472                         bwillwrite();
  473                 error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
  474                     ioflg, active_cred, file_cred, aresid, td);
  475                 len -= chunk;   /* aresid calc already includes length */
  476                 if (error)
  477                         break;
  478                 offset += chunk;
  479                 base += chunk;
  480                 uio_yield();
  481         } while (len);
  482         if (aresid)
  483                 *aresid += len;
  484         return (error);
  485 }
  486 
  487 /*
  488  * File table vnode read routine.
  489  */
  490 static int
  491 vn_read(fp, uio, active_cred, flags, td)
  492         struct file *fp;
  493         struct uio *uio;
  494         struct ucred *active_cred;
  495         struct thread *td;
  496         int flags;
  497 {
  498         struct vnode *vp;
  499         int error, ioflag;
  500 
  501         mtx_lock(&Giant);
  502         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  503             uio->uio_td, td));
  504         vp = fp->f_data;
  505         ioflag = 0;
  506         if (fp->f_flag & FNONBLOCK)
  507                 ioflag |= IO_NDELAY;
  508         if (fp->f_flag & O_DIRECT)
  509                 ioflag |= IO_DIRECT;
  510         VOP_LEASE(vp, td, fp->f_cred, LEASE_READ);
  511         /*
  512          * According to McKusick the vn lock is protecting f_offset here.
  513          * Once this field has it's own lock we can acquire this shared.
  514          */
  515         if ((flags & FOF_OFFSET) == 0) {
  516                 vn_lock(vp, LK_EXCLUSIVE | LK_NOPAUSE | LK_RETRY, td);
  517                 uio->uio_offset = fp->f_offset;
  518         } else
  519                 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
  520 
  521         ioflag |= sequential_heuristic(uio, fp);
  522 
  523 #ifdef MAC
  524         error = mac_check_vnode_read(active_cred, fp->f_cred, vp);
  525         if (error == 0)
  526 #endif
  527                 error = VOP_READ(vp, uio, ioflag, fp->f_cred);
  528         if ((flags & FOF_OFFSET) == 0)
  529                 fp->f_offset = uio->uio_offset;
  530         fp->f_nextoff = uio->uio_offset;
  531         VOP_UNLOCK(vp, 0, td);
  532         mtx_unlock(&Giant);
  533         return (error);
  534 }
  535 
  536 /*
  537  * File table vnode write routine.
  538  */
  539 static int
  540 vn_write(fp, uio, active_cred, flags, td)
  541         struct file *fp;
  542         struct uio *uio;
  543         struct ucred *active_cred;
  544         struct thread *td;
  545         int flags;
  546 {
  547         struct vnode *vp;
  548         struct mount *mp;
  549         int error, ioflag;
  550 
  551         mtx_lock(&Giant);
  552         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  553             uio->uio_td, td));
  554         vp = fp->f_data;
  555         if (vp->v_type == VREG)
  556                 bwillwrite();
  557         ioflag = IO_UNIT;
  558         if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
  559                 ioflag |= IO_APPEND;
  560         if (fp->f_flag & FNONBLOCK)
  561                 ioflag |= IO_NDELAY;
  562         if (fp->f_flag & O_DIRECT)
  563                 ioflag |= IO_DIRECT;
  564         if ((fp->f_flag & O_FSYNC) ||
  565             (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
  566                 ioflag |= IO_SYNC;
  567         mp = NULL;
  568         if (vp->v_type != VCHR &&
  569             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
  570                 mtx_unlock(&Giant);
  571                 return (error);
  572         }
  573         VOP_LEASE(vp, td, fp->f_cred, LEASE_WRITE);
  574         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  575         if ((flags & FOF_OFFSET) == 0)
  576                 uio->uio_offset = fp->f_offset;
  577         ioflag |= sequential_heuristic(uio, fp);
  578 #ifdef MAC
  579         error = mac_check_vnode_write(active_cred, fp->f_cred, vp);
  580         if (error == 0)
  581 #endif
  582                 error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
  583         if ((flags & FOF_OFFSET) == 0)
  584                 fp->f_offset = uio->uio_offset;
  585         fp->f_nextoff = uio->uio_offset;
  586         VOP_UNLOCK(vp, 0, td);
  587         vn_finished_write(mp);
  588         mtx_unlock(&Giant);
  589         return (error);
  590 }
  591 
  592 /*
  593  * File table vnode stat routine.
  594  */
  595 static int
  596 vn_statfile(fp, sb, active_cred, td)
  597         struct file *fp;
  598         struct stat *sb;
  599         struct ucred *active_cred;
  600         struct thread *td;
  601 {
  602         struct vnode *vp = fp->f_data;
  603         int error;
  604 
  605         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  606         error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
  607         VOP_UNLOCK(vp, 0, td);
  608 
  609         return (error);
  610 }
  611 
  612 /*
  613  * Stat a vnode; implementation for the stat syscall
  614  */
  615 int
  616 vn_stat(vp, sb, active_cred, file_cred, td)
  617         struct vnode *vp;
  618         register struct stat *sb;
  619         struct ucred *active_cred;
  620         struct ucred *file_cred;
  621         struct thread *td;
  622 {
  623         struct vattr vattr;
  624         register struct vattr *vap;
  625         int error;
  626         u_short mode;
  627 
  628 #ifdef MAC
  629         error = mac_check_vnode_stat(active_cred, file_cred, vp);
  630         if (error)
  631                 return (error);
  632 #endif
  633 
  634         vap = &vattr;
  635         error = VOP_GETATTR(vp, vap, active_cred, td);
  636         if (error)
  637                 return (error);
  638 
  639         vp->v_cachedfs = vap->va_fsid;
  640         vp->v_cachedid = vap->va_fileid;
  641 
  642         /*
  643          * Zero the spare stat fields
  644          */
  645         bzero(sb, sizeof *sb);
  646 
  647         /*
  648          * Copy from vattr table
  649          */
  650         if (vap->va_fsid != VNOVAL)
  651                 sb->st_dev = vap->va_fsid;
  652         else
  653                 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
  654         sb->st_ino = vap->va_fileid;
  655         mode = vap->va_mode;
  656         switch (vap->va_type) {
  657         case VREG:
  658                 mode |= S_IFREG;
  659                 break;
  660         case VDIR:
  661                 mode |= S_IFDIR;
  662                 break;
  663         case VBLK:
  664                 mode |= S_IFBLK;
  665                 break;
  666         case VCHR:
  667                 mode |= S_IFCHR;
  668                 break;
  669         case VLNK:
  670                 mode |= S_IFLNK;
  671                 /* This is a cosmetic change, symlinks do not have a mode. */
  672                 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW)
  673                         sb->st_mode &= ~ACCESSPERMS;    /* 0000 */
  674                 else
  675                         sb->st_mode |= ACCESSPERMS;     /* 0777 */
  676                 break;
  677         case VSOCK:
  678                 mode |= S_IFSOCK;
  679                 break;
  680         case VFIFO:
  681                 mode |= S_IFIFO;
  682                 break;
  683         default:
  684                 return (EBADF);
  685         };
  686         sb->st_mode = mode;
  687         sb->st_nlink = vap->va_nlink;
  688         sb->st_uid = vap->va_uid;
  689         sb->st_gid = vap->va_gid;
  690         sb->st_rdev = vap->va_rdev;
  691         if (vap->va_size > OFF_MAX)
  692                 return (EOVERFLOW);
  693         sb->st_size = vap->va_size;
  694         sb->st_atimespec = vap->va_atime;
  695         sb->st_mtimespec = vap->va_mtime;
  696         sb->st_ctimespec = vap->va_ctime;
  697         sb->st_birthtimespec = vap->va_birthtime;
  698 
  699         /*
  700          * According to www.opengroup.org, the meaning of st_blksize is 
  701          *   "a filesystem-specific preferred I/O block size for this 
  702          *    object.  In some filesystem types, this may vary from file
  703          *    to file"
  704          * Default to PAGE_SIZE after much discussion.
  705          */
  706 
  707         if (vap->va_type == VREG) {
  708                 sb->st_blksize = vap->va_blocksize;
  709         } else if (vn_isdisk(vp, NULL)) {
  710                 sb->st_blksize = vp->v_rdev->si_bsize_best;
  711                 if (sb->st_blksize < vp->v_rdev->si_bsize_phys)
  712                         sb->st_blksize = vp->v_rdev->si_bsize_phys;
  713                 if (sb->st_blksize < BLKDEV_IOSIZE)
  714                         sb->st_blksize = BLKDEV_IOSIZE;
  715         } else {
  716                 sb->st_blksize = PAGE_SIZE;
  717         }
  718         
  719         sb->st_flags = vap->va_flags;
  720         if (suser(td))
  721                 sb->st_gen = 0;
  722         else
  723                 sb->st_gen = vap->va_gen;
  724 
  725 #if (S_BLKSIZE == 512)
  726         /* Optimize this case */
  727         sb->st_blocks = vap->va_bytes >> 9;
  728 #else
  729         sb->st_blocks = vap->va_bytes / S_BLKSIZE;
  730 #endif
  731         return (0);
  732 }
  733 
  734 /*
  735  * File table vnode ioctl routine.
  736  */
  737 static int
  738 vn_ioctl(fp, com, data, active_cred, td)
  739         struct file *fp;
  740         u_long com;
  741         void *data;
  742         struct ucred *active_cred;
  743         struct thread *td;
  744 {
  745         struct vnode *vp = fp->f_data;
  746         struct vnode *vpold;
  747         struct vattr vattr;
  748         int error;
  749 
  750         switch (vp->v_type) {
  751 
  752         case VREG:
  753         case VDIR:
  754                 if (com == FIONREAD) {
  755                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  756                         error = VOP_GETATTR(vp, &vattr, active_cred, td);
  757                         VOP_UNLOCK(vp, 0, td);
  758                         if (error)
  759                                 return (error);
  760                         *(int *)data = vattr.va_size - fp->f_offset;
  761                         return (0);
  762                 }
  763                 if (com == FIONBIO || com == FIOASYNC)  /* XXX */
  764                         return (0);                     /* XXX */
  765                 /* FALLTHROUGH */
  766 
  767         default:
  768 #if 0
  769                 return (ENOTTY);
  770 #endif
  771         case VFIFO:
  772         case VCHR:
  773         case VBLK:
  774                 if (com == FIODTYPE) {
  775                         if (vp->v_type != VCHR && vp->v_type != VBLK)
  776                                 return (ENOTTY);
  777                         *(int *)data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK;
  778                         return (0);
  779                 }
  780                 error = VOP_IOCTL(vp, com, data, fp->f_flag, active_cred, td);
  781                 if (error == ENOIOCTL) {
  782 #ifdef DIAGNOSTIC
  783                         Debugger("ENOIOCTL leaked through");
  784 #endif
  785                         error = ENOTTY;
  786                 }
  787                 if (error == 0 && com == TIOCSCTTY) {
  788 
  789                         /* Do nothing if reassigning same control tty */
  790                         sx_slock(&proctree_lock);
  791                         if (td->td_proc->p_session->s_ttyvp == vp) {
  792                                 sx_sunlock(&proctree_lock);
  793                                 return (0);
  794                         }
  795 
  796                         vpold = td->td_proc->p_session->s_ttyvp;
  797                         VREF(vp);
  798                         SESS_LOCK(td->td_proc->p_session);
  799                         td->td_proc->p_session->s_ttyvp = vp;
  800                         SESS_UNLOCK(td->td_proc->p_session);
  801 
  802                         sx_sunlock(&proctree_lock);
  803 
  804                         /* Get rid of reference to old control tty */
  805                         if (vpold)
  806                                 vrele(vpold);
  807                 }
  808                 return (error);
  809         }
  810 }
  811 
  812 /*
  813  * File table vnode poll routine.
  814  */
  815 static int
  816 vn_poll(fp, events, active_cred, td)
  817         struct file *fp;
  818         int events;
  819         struct ucred *active_cred;
  820         struct thread *td;
  821 {
  822         struct vnode *vp;
  823 #ifdef MAC
  824         int error;
  825 #endif
  826 
  827         vp = fp->f_data;
  828 #ifdef MAC
  829         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  830         error = mac_check_vnode_poll(active_cred, fp->f_cred, vp);
  831         VOP_UNLOCK(vp, 0, td);
  832         if (error)
  833                 return (error);
  834 #endif
  835 
  836         return (VOP_POLL(vp, events, fp->f_cred, td));
  837 }
  838 
  839 /*
  840  * Check that the vnode is still valid, and if so
  841  * acquire requested lock.
  842  */
  843 int
  844 #ifndef DEBUG_LOCKS
  845 vn_lock(vp, flags, td)
  846 #else
  847 debug_vn_lock(vp, flags, td, filename, line)
  848 #endif
  849         struct vnode *vp;
  850         int flags;
  851         struct thread *td;
  852 #ifdef  DEBUG_LOCKS
  853         const char *filename;
  854         int line;
  855 #endif
  856 {
  857         int error;
  858 
  859         do {
  860                 if ((flags & LK_INTERLOCK) == 0)
  861                         VI_LOCK(vp);
  862                 if ((vp->v_iflag & VI_XLOCK) && vp->v_vxproc != curthread) {
  863                         vp->v_iflag |= VI_XWANT;
  864                         msleep(vp, VI_MTX(vp), PINOD, "vn_lock", 0);
  865                         error = ENOENT;
  866                         if ((flags & LK_RETRY) == 0) {
  867                                 VI_UNLOCK(vp);
  868                                 return (error);
  869                         }
  870                 } 
  871 #ifdef  DEBUG_LOCKS
  872                 vp->filename = filename;
  873                 vp->line = line;
  874 #endif
  875                 /*
  876                  * lockmgr drops interlock before it will return for
  877                  * any reason.  So force the code above to relock it.
  878                  */
  879                 error = VOP_LOCK(vp, flags | LK_NOPAUSE | LK_INTERLOCK, td);
  880                 flags &= ~LK_INTERLOCK;
  881         } while (flags & LK_RETRY && error != 0);
  882         return (error);
  883 }
  884 
  885 /*
  886  * File table vnode close routine.
  887  */
  888 static int
  889 vn_closefile(fp, td)
  890         struct file *fp;
  891         struct thread *td;
  892 {
  893 
  894         fp->f_ops = &badfileops;
  895         return (vn_close(fp->f_data, fp->f_flag, fp->f_cred, td));
  896 }
  897 
  898 /*
  899  * Preparing to start a filesystem write operation. If the operation is
  900  * permitted, then we bump the count of operations in progress and
  901  * proceed. If a suspend request is in progress, we wait until the
  902  * suspension is over, and then proceed.
  903  */
  904 int
  905 vn_start_write(vp, mpp, flags)
  906         struct vnode *vp;
  907         struct mount **mpp;
  908         int flags;
  909 {
  910         struct mount *mp;
  911         int error;
  912 
  913         /*
  914          * If a vnode is provided, get and return the mount point that
  915          * to which it will write.
  916          */
  917         if (vp != NULL) {
  918                 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
  919                         *mpp = NULL;
  920                         if (error != EOPNOTSUPP)
  921                                 return (error);
  922                         return (0);
  923                 }
  924         }
  925         if ((mp = *mpp) == NULL)
  926                 return (0);
  927         /*
  928          * Check on status of suspension.
  929          */
  930         while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
  931                 if (flags & V_NOWAIT)
  932                         return (EWOULDBLOCK);
  933                 error = tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH),
  934                     "suspfs", 0);
  935                 if (error)
  936                         return (error);
  937         }
  938         if (flags & V_XSLEEP)
  939                 return (0);
  940         mp->mnt_writeopcount++;
  941         return (0);
  942 }
  943 
  944 /*
  945  * Secondary suspension. Used by operations such as vop_inactive
  946  * routines that are needed by the higher level functions. These
  947  * are allowed to proceed until all the higher level functions have
  948  * completed (indicated by mnt_writeopcount dropping to zero). At that
  949  * time, these operations are halted until the suspension is over.
  950  */
  951 int
  952 vn_write_suspend_wait(vp, mp, flags)
  953         struct vnode *vp;
  954         struct mount *mp;
  955         int flags;
  956 {
  957         int error;
  958 
  959         if (vp != NULL) {
  960                 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) {
  961                         if (error != EOPNOTSUPP)
  962                                 return (error);
  963                         return (0);
  964                 }
  965         }
  966         /*
  967          * If we are not suspended or have not yet reached suspended
  968          * mode, then let the operation proceed.
  969          */
  970         if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0)
  971                 return (0);
  972         if (flags & V_NOWAIT)
  973                 return (EWOULDBLOCK);
  974         /*
  975          * Wait for the suspension to finish.
  976          */
  977         return (tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH),
  978             "suspfs", 0));
  979 }
  980 
  981 /*
  982  * Filesystem write operation has completed. If we are suspending and this
  983  * operation is the last one, notify the suspender that the suspension is
  984  * now in effect.
  985  */
  986 void
  987 vn_finished_write(mp)
  988         struct mount *mp;
  989 {
  990 
  991         if (mp == NULL)
  992                 return;
  993         mp->mnt_writeopcount--;
  994         if (mp->mnt_writeopcount < 0)
  995                 panic("vn_finished_write: neg cnt");
  996         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
  997             mp->mnt_writeopcount <= 0)
  998                 wakeup(&mp->mnt_writeopcount);
  999 }
 1000 
 1001 /*
 1002  * Request a filesystem to suspend write operations.
 1003  */
 1004 int
 1005 vfs_write_suspend(mp)
 1006         struct mount *mp;
 1007 {
 1008         struct thread *td = curthread;
 1009         int error;
 1010 
 1011         if (mp->mnt_kern_flag & MNTK_SUSPEND)
 1012                 return (0);
 1013         mp->mnt_kern_flag |= MNTK_SUSPEND;
 1014         if (mp->mnt_writeopcount > 0)
 1015                 (void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0);
 1016         if ((error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) != 0) {
 1017                 vfs_write_resume(mp);
 1018                 return (error);
 1019         }
 1020         mp->mnt_kern_flag |= MNTK_SUSPENDED;
 1021         return (0);
 1022 }
 1023 
 1024 /*
 1025  * Request a filesystem to resume write operations.
 1026  */
 1027 void
 1028 vfs_write_resume(mp)
 1029         struct mount *mp;
 1030 {
 1031 
 1032         if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0)
 1033                 return;
 1034         mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPENDED);
 1035         wakeup(&mp->mnt_writeopcount);
 1036         wakeup(&mp->mnt_flag);
 1037 }
 1038 
 1039 /*
 1040  * Implement kqueues for files by translating it to vnode operation.
 1041  */
 1042 static int
 1043 vn_kqfilter(struct file *fp, struct knote *kn)
 1044 {
 1045 
 1046         return (VOP_KQFILTER(fp->f_data, kn));
 1047 }
 1048 
 1049 /*
 1050  * Simplified in-kernel wrapper calls for extended attribute access.
 1051  * Both calls pass in a NULL credential, authorizing as "kernel" access.
 1052  * Set IO_NODELOCKED in ioflg if the vnode is already locked.
 1053  */
 1054 int
 1055 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
 1056     const char *attrname, int *buflen, char *buf, struct thread *td)
 1057 {
 1058         struct uio      auio;
 1059         struct iovec    iov;
 1060         int     error;
 1061 
 1062         iov.iov_len = *buflen;
 1063         iov.iov_base = buf;
 1064 
 1065         auio.uio_iov = &iov;
 1066         auio.uio_iovcnt = 1;
 1067         auio.uio_rw = UIO_READ;
 1068         auio.uio_segflg = UIO_SYSSPACE;
 1069         auio.uio_td = td;
 1070         auio.uio_offset = 0;
 1071         auio.uio_resid = *buflen;
 1072 
 1073         if ((ioflg & IO_NODELOCKED) == 0)
 1074                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1075 
 1076         /* authorize attribute retrieval as kernel */
 1077         error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL,
 1078             td);
 1079 
 1080         if ((ioflg & IO_NODELOCKED) == 0)
 1081                 VOP_UNLOCK(vp, 0, td);
 1082 
 1083         if (error == 0) {
 1084                 *buflen = *buflen - auio.uio_resid;
 1085         }
 1086 
 1087         return (error);
 1088 }
 1089 
 1090 /*
 1091  * XXX failure mode if partially written?
 1092  */
 1093 int
 1094 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
 1095     const char *attrname, int buflen, char *buf, struct thread *td)
 1096 {
 1097         struct uio      auio;
 1098         struct iovec    iov;
 1099         struct mount    *mp;
 1100         int     error;
 1101 
 1102         iov.iov_len = buflen;
 1103         iov.iov_base = buf;
 1104 
 1105         auio.uio_iov = &iov;
 1106         auio.uio_iovcnt = 1;
 1107         auio.uio_rw = UIO_WRITE;
 1108         auio.uio_segflg = UIO_SYSSPACE;
 1109         auio.uio_td = td;
 1110         auio.uio_offset = 0;
 1111         auio.uio_resid = buflen;
 1112 
 1113         if ((ioflg & IO_NODELOCKED) == 0) {
 1114                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1115                         return (error);
 1116                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1117         }
 1118 
 1119         /* authorize attribute setting as kernel */
 1120         error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td);
 1121 
 1122         if ((ioflg & IO_NODELOCKED) == 0) {
 1123                 vn_finished_write(mp);
 1124                 VOP_UNLOCK(vp, 0, td);
 1125         }
 1126 
 1127         return (error);
 1128 }
 1129 
 1130 int
 1131 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
 1132     const char *attrname, struct thread *td)
 1133 {
 1134         struct mount    *mp;
 1135         int     error;
 1136 
 1137         if ((ioflg & IO_NODELOCKED) == 0) {
 1138                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1139                         return (error);
 1140                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1141         }
 1142 
 1143         /* authorize attribute removal as kernel */
 1144         error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, NULL, td);
 1145 
 1146         if ((ioflg & IO_NODELOCKED) == 0) {
 1147                 vn_finished_write(mp);
 1148                 VOP_UNLOCK(vp, 0, td);
 1149         }
 1150 
 1151         return (error);
 1152 }
Cache object: 307f016ab4b0014061d745e56c77a8d9
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_vnops.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_vnops.c