vfs_vnops.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
   39  */
   40 
   41 #include <sys/cdefs.h>
   42 __FBSDID("$FreeBSD: releng/5.2/sys/kern/vfs_vnops.c 120743 2003-10-04 14:35:22Z jeff $");
   43 
   44 #include "opt_mac.h"
   45 
   46 #include <sys/param.h>
   47 #include <sys/systm.h>
   48 #include <sys/fcntl.h>
   49 #include <sys/file.h>
   50 #include <sys/stat.h>
   51 #include <sys/proc.h>
   52 #include <sys/limits.h>
   53 #include <sys/lock.h>
   54 #include <sys/mac.h>
   55 #include <sys/mount.h>
   56 #include <sys/mutex.h>
   57 #include <sys/namei.h>
   58 #include <sys/vnode.h>
   59 #include <sys/bio.h>
   60 #include <sys/buf.h>
   61 #include <sys/filio.h>
   62 #include <sys/sx.h>
   63 #include <sys/ttycom.h>
   64 #include <sys/conf.h>
   65 #include <sys/syslog.h>
   66 
   67 static fo_rdwr_t        vn_read;
   68 static fo_rdwr_t        vn_write;
   69 static fo_ioctl_t       vn_ioctl;
   70 static fo_poll_t        vn_poll;
   71 static fo_kqfilter_t    vn_kqfilter;
   72 static fo_stat_t        vn_statfile;
   73 static fo_close_t       vn_closefile;
   74 
   75 struct  fileops vnops = {
   76         .fo_read = vn_read,
   77         .fo_write = vn_write,
   78         .fo_ioctl = vn_ioctl,
   79         .fo_poll = vn_poll,
   80         .fo_kqfilter = vn_kqfilter,
   81         .fo_stat = vn_statfile,
   82         .fo_close = vn_closefile,
   83         .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
   84 };
   85 
   86 int
   87 vn_open(ndp, flagp, cmode, fdidx)
   88         struct nameidata *ndp;
   89         int *flagp, cmode, fdidx;
   90 {
   91         struct thread *td = ndp->ni_cnd.cn_thread;
   92 
   93         return (vn_open_cred(ndp, flagp, cmode, td->td_ucred, fdidx));
   94 }
   95 
   96 /*
   97  * Common code for vnode open operations.
   98  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
   99  * 
  100  * Note that this does NOT free nameidata for the successful case,
  101  * due to the NDINIT being done elsewhere.
  102  */
  103 int
  104 vn_open_cred(ndp, flagp, cmode, cred, fdidx)
  105         struct nameidata *ndp;
  106         int *flagp, cmode;
  107         struct ucred *cred;
  108         int fdidx;
  109 {
  110         struct vnode *vp;
  111         struct mount *mp;
  112         struct thread *td = ndp->ni_cnd.cn_thread;
  113         struct vattr vat;
  114         struct vattr *vap = &vat;
  115         int mode, fmode, error;
  116 #ifdef LOOKUP_SHARED
  117         int exclusive;  /* The current intended lock state */
  118 
  119         exclusive = 0;
  120 #endif
  121 
  122 restart:
  123         fmode = *flagp;
  124         if (fmode & O_CREAT) {
  125                 ndp->ni_cnd.cn_nameiop = CREATE;
  126                 ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
  127                 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
  128                         ndp->ni_cnd.cn_flags |= FOLLOW;
  129                 bwillwrite();
  130                 if ((error = namei(ndp)) != 0)
  131                         return (error);
  132                 if (ndp->ni_vp == NULL) {
  133                         VATTR_NULL(vap);
  134                         vap->va_type = VREG;
  135                         vap->va_mode = cmode;
  136                         if (fmode & O_EXCL)
  137                                 vap->va_vaflags |= VA_EXCLUSIVE;
  138                         if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
  139                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  140                                 vput(ndp->ni_dvp);
  141                                 if ((error = vn_start_write(NULL, &mp,
  142                                     V_XSLEEP | PCATCH)) != 0)
  143                                         return (error);
  144                                 goto restart;
  145                         }
  146 #ifdef MAC
  147                         error = mac_check_vnode_create(cred, ndp->ni_dvp,
  148                             &ndp->ni_cnd, vap);
  149                         if (error == 0) {
  150 #endif
  151                                 VOP_LEASE(ndp->ni_dvp, td, cred, LEASE_WRITE);
  152                                 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
  153                                                    &ndp->ni_cnd, vap);
  154 #ifdef MAC
  155                         }
  156 #endif
  157                         vput(ndp->ni_dvp);
  158                         vn_finished_write(mp);
  159                         if (error) {
  160                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  161                                 return (error);
  162                         }
  163                         ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create");
  164                         ASSERT_VOP_LOCKED(ndp->ni_vp, "create");
  165                         fmode &= ~O_TRUNC;
  166                         vp = ndp->ni_vp;
  167 #ifdef LOOKUP_SHARED
  168                         exclusive = 1;
  169 #endif
  170                 } else {
  171                         if (ndp->ni_dvp == ndp->ni_vp)
  172                                 vrele(ndp->ni_dvp);
  173                         else
  174                                 vput(ndp->ni_dvp);
  175                         ndp->ni_dvp = NULL;
  176                         vp = ndp->ni_vp;
  177                         if (fmode & O_EXCL) {
  178                                 error = EEXIST;
  179                                 goto bad;
  180                         }
  181                         fmode &= ~O_CREAT;
  182                 }
  183         } else {
  184                 ndp->ni_cnd.cn_nameiop = LOOKUP;
  185 #ifdef LOOKUP_SHARED
  186                 ndp->ni_cnd.cn_flags =
  187                     ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) |
  188                     LOCKSHARED | LOCKLEAF;
  189 #else
  190                 ndp->ni_cnd.cn_flags =
  191                     ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
  192 #endif
  193                 if ((error = namei(ndp)) != 0)
  194                         return (error);
  195                 vp = ndp->ni_vp;
  196         }
  197         if (vp->v_type == VLNK) {
  198                 error = EMLINK;
  199                 goto bad;
  200         }
  201         if (vp->v_type == VSOCK) {
  202                 error = EOPNOTSUPP;
  203                 goto bad;
  204         }
  205         mode = 0;
  206         if (fmode & (FWRITE | O_TRUNC)) {
  207                 if (vp->v_type == VDIR) {
  208                         error = EISDIR;
  209                         goto bad;
  210                 }
  211                 mode |= VWRITE;
  212         }
  213         if (fmode & FREAD)
  214                 mode |= VREAD;
  215         if (fmode & O_APPEND)
  216                 mode |= VAPPEND;
  217 #ifdef MAC
  218         error = mac_check_vnode_open(cred, vp, mode);
  219         if (error)
  220                 goto bad;
  221 #endif
  222         if ((fmode & O_CREAT) == 0) {
  223                 if (mode & VWRITE) {
  224                         error = vn_writechk(vp);
  225                         if (error)
  226                                 goto bad;
  227                 }
  228                 if (mode) {
  229                         error = VOP_ACCESS(vp, mode, cred, td);
  230                         if (error)
  231                                 goto bad;
  232                 }
  233         }
  234         if ((error = VOP_GETATTR(vp, vap, cred, td)) == 0) {
  235                 vp->v_cachedfs = vap->va_fsid;
  236                 vp->v_cachedid = vap->va_fileid;
  237         }
  238         if ((error = VOP_OPEN(vp, fmode, cred, td, fdidx)) != 0)
  239                 goto bad;
  240         /*
  241          * Make sure that a VM object is created for VMIO support.
  242          */
  243         if (vn_canvmio(vp) == TRUE) {
  244 #ifdef LOOKUP_SHARED
  245                 int flock;
  246 
  247                 if (!exclusive && VOP_GETVOBJECT(vp, NULL) != 0)
  248                         VOP_LOCK(vp, LK_UPGRADE, td);
  249                 /*
  250                  * In cases where the object is marked as dead object_create
  251                  * will unlock and relock exclusive.  It is safe to call in
  252                  * here with a shared lock because we only examine fields that
  253                  * the shared lock guarantees will be stable.  In the UPGRADE
  254                  * case it is not likely that anyone has used this vnode yet
  255                  * so there will be no contention.  The logic after this call
  256                  * restores the requested locking state.
  257                  */
  258 #endif
  259                 if ((error = vfs_object_create(vp, td, cred)) != 0) {
  260                         VOP_UNLOCK(vp, 0, td);
  261                         VOP_CLOSE(vp, fmode, cred, td);
  262                         NDFREE(ndp, NDF_ONLY_PNBUF);
  263                         vrele(vp);
  264                         *flagp = fmode;
  265                         return (error);
  266                 }
  267 #ifdef LOOKUP_SHARED
  268                 flock = VOP_ISLOCKED(vp, td);
  269                 if (!exclusive && flock == LK_EXCLUSIVE)
  270                         VOP_LOCK(vp, LK_DOWNGRADE, td);
  271 #endif
  272         }
  273 
  274         if (fmode & FWRITE)
  275                 vp->v_writecount++;
  276         *flagp = fmode;
  277         ASSERT_VOP_LOCKED(vp, "vn_open_cred");
  278         return (0);
  279 bad:
  280         NDFREE(ndp, NDF_ONLY_PNBUF);
  281         vput(vp);
  282         *flagp = fmode;
  283         ndp->ni_vp = NULL;
  284         return (error);
  285 }
  286 
  287 /*
  288  * Check for write permissions on the specified vnode.
  289  * Prototype text segments cannot be written.
  290  */
  291 int
  292 vn_writechk(vp)
  293         register struct vnode *vp;
  294 {
  295 
  296         ASSERT_VOP_LOCKED(vp, "vn_writechk");
  297         /*
  298          * If there's shared text associated with
  299          * the vnode, try to free it up once.  If
  300          * we fail, we can't allow writing.
  301          */
  302         if (vp->v_vflag & VV_TEXT)
  303                 return (ETXTBSY);
  304 
  305         return (0);
  306 }
  307 
  308 /*
  309  * Vnode close call
  310  */
  311 int
  312 vn_close(vp, flags, file_cred, td)
  313         register struct vnode *vp;
  314         int flags;
  315         struct ucred *file_cred;
  316         struct thread *td;
  317 {
  318         int error;
  319 
  320         if (flags & FWRITE)
  321                 vp->v_writecount--;
  322         error = VOP_CLOSE(vp, flags, file_cred, td);
  323         /*
  324          * XXX - In certain instances VOP_CLOSE has to do the vrele
  325          * itself. If the vrele has been done, it will return EAGAIN
  326          * to indicate that the vrele should not be done again. When
  327          * this happens, we just return success. The correct thing to
  328          * do would be to have all VOP_CLOSE instances do the vrele.
  329          */
  330         if (error == EAGAIN)
  331                 return (0);
  332         vrele(vp);
  333         return (error);
  334 }
  335 
  336 /*
  337  * Sequential heuristic - detect sequential operation
  338  */
  339 static __inline
  340 int
  341 sequential_heuristic(struct uio *uio, struct file *fp)
  342 {
  343 
  344         if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
  345             uio->uio_offset == fp->f_nextoff) {
  346                 /*
  347                  * XXX we assume that the filesystem block size is
  348                  * the default.  Not true, but still gives us a pretty
  349                  * good indicator of how sequential the read operations
  350                  * are.
  351                  */
  352                 fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
  353                 if (fp->f_seqcount > IO_SEQMAX)
  354                         fp->f_seqcount = IO_SEQMAX;
  355                 return(fp->f_seqcount << IO_SEQSHIFT);
  356         }
  357 
  358         /*
  359          * Not sequential, quick draw-down of seqcount
  360          */
  361         if (fp->f_seqcount > 1)
  362                 fp->f_seqcount = 1;
  363         else
  364                 fp->f_seqcount = 0;
  365         return(0);
  366 }
  367 
  368 /*
  369  * Package up an I/O request on a vnode into a uio and do it.
  370  */
  371 int
  372 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
  373     aresid, td)
  374         enum uio_rw rw;
  375         struct vnode *vp;
  376         caddr_t base;
  377         int len;
  378         off_t offset;
  379         enum uio_seg segflg;
  380         int ioflg;
  381         struct ucred *active_cred;
  382         struct ucred *file_cred;
  383         int *aresid;
  384         struct thread *td;
  385 {
  386         struct uio auio;
  387         struct iovec aiov;
  388         struct mount *mp;
  389         struct ucred *cred;
  390         int error;
  391 
  392         if ((ioflg & IO_NODELOCKED) == 0) {
  393                 mp = NULL;
  394                 if (rw == UIO_WRITE) { 
  395                         if (vp->v_type != VCHR &&
  396                             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
  397                             != 0)
  398                                 return (error);
  399                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  400                 } else {
  401                         /*
  402                          * XXX This should be LK_SHARED but I don't trust VFS
  403                          * enough to leave it like that until it has been
  404                          * reviewed further.
  405                          */
  406                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  407                 }
  408 
  409         }
  410         auio.uio_iov = &aiov;
  411         auio.uio_iovcnt = 1;
  412         aiov.iov_base = base;
  413         aiov.iov_len = len;
  414         auio.uio_resid = len;
  415         auio.uio_offset = offset;
  416         auio.uio_segflg = segflg;
  417         auio.uio_rw = rw;
  418         auio.uio_td = td;
  419         error = 0;
  420 #ifdef MAC
  421         if ((ioflg & IO_NOMACCHECK) == 0) {
  422                 if (rw == UIO_READ)
  423                         error = mac_check_vnode_read(active_cred, file_cred,
  424                             vp);
  425                 else
  426                         error = mac_check_vnode_write(active_cred, file_cred,
  427                             vp);
  428         }
  429 #endif
  430         if (error == 0) {
  431                 if (file_cred)
  432                         cred = file_cred;
  433                 else
  434                         cred = active_cred;
  435                 if (rw == UIO_READ)
  436                         error = VOP_READ(vp, &auio, ioflg, cred);
  437                 else
  438                         error = VOP_WRITE(vp, &auio, ioflg, cred);
  439         }
  440         if (aresid)
  441                 *aresid = auio.uio_resid;
  442         else
  443                 if (auio.uio_resid && error == 0)
  444                         error = EIO;
  445         if ((ioflg & IO_NODELOCKED) == 0) {
  446                 if (rw == UIO_WRITE)
  447                         vn_finished_write(mp);
  448                 VOP_UNLOCK(vp, 0, td);
  449         }
  450         return (error);
  451 }
  452 
  453 /*
  454  * Package up an I/O request on a vnode into a uio and do it.  The I/O
  455  * request is split up into smaller chunks and we try to avoid saturating
  456  * the buffer cache while potentially holding a vnode locked, so we 
  457  * check bwillwrite() before calling vn_rdwr().  We also call uio_yield()
  458  * to give other processes a chance to lock the vnode (either other processes
  459  * core'ing the same binary, or unrelated processes scanning the directory).
  460  */
  461 int
  462 vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred,
  463     file_cred, aresid, td)
  464         enum uio_rw rw;
  465         struct vnode *vp;
  466         caddr_t base;
  467         int len;
  468         off_t offset;
  469         enum uio_seg segflg;
  470         int ioflg;
  471         struct ucred *active_cred;
  472         struct ucred *file_cred;
  473         int *aresid;
  474         struct thread *td;
  475 {
  476         int error = 0;
  477 
  478         do {
  479                 int chunk = (len > MAXBSIZE) ? MAXBSIZE : len;
  480 
  481                 if (rw != UIO_READ && vp->v_type == VREG)
  482                         bwillwrite();
  483                 error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
  484                     ioflg, active_cred, file_cred, aresid, td);
  485                 len -= chunk;   /* aresid calc already includes length */
  486                 if (error)
  487                         break;
  488                 offset += chunk;
  489                 base += chunk;
  490                 uio_yield();
  491         } while (len);
  492         if (aresid)
  493                 *aresid += len;
  494         return (error);
  495 }
  496 
  497 /*
  498  * File table vnode read routine.
  499  */
  500 static int
  501 vn_read(fp, uio, active_cred, flags, td)
  502         struct file *fp;
  503         struct uio *uio;
  504         struct ucred *active_cred;
  505         struct thread *td;
  506         int flags;
  507 {
  508         struct vnode *vp;
  509         int error, ioflag;
  510 
  511         mtx_lock(&Giant);
  512         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  513             uio->uio_td, td));
  514         vp = fp->f_vnode;
  515         ioflag = 0;
  516         if (fp->f_flag & FNONBLOCK)
  517                 ioflag |= IO_NDELAY;
  518         if (fp->f_flag & O_DIRECT)
  519                 ioflag |= IO_DIRECT;
  520         VOP_LEASE(vp, td, fp->f_cred, LEASE_READ);
  521         /*
  522          * According to McKusick the vn lock is protecting f_offset here.
  523          * Once this field has it's own lock we can acquire this shared.
  524          */
  525         if ((flags & FOF_OFFSET) == 0) {
  526                 vn_lock(vp, LK_EXCLUSIVE | LK_NOPAUSE | LK_RETRY, td);
  527                 uio->uio_offset = fp->f_offset;
  528         } else
  529                 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
  530 
  531         ioflag |= sequential_heuristic(uio, fp);
  532 
  533 #ifdef MAC
  534         error = mac_check_vnode_read(active_cred, fp->f_cred, vp);
  535         if (error == 0)
  536 #endif
  537                 error = VOP_READ(vp, uio, ioflag, fp->f_cred);
  538         if ((flags & FOF_OFFSET) == 0)
  539                 fp->f_offset = uio->uio_offset;
  540         fp->f_nextoff = uio->uio_offset;
  541         VOP_UNLOCK(vp, 0, td);
  542         mtx_unlock(&Giant);
  543         return (error);
  544 }
  545 
  546 /*
  547  * File table vnode write routine.
  548  */
  549 static int
  550 vn_write(fp, uio, active_cred, flags, td)
  551         struct file *fp;
  552         struct uio *uio;
  553         struct ucred *active_cred;
  554         struct thread *td;
  555         int flags;
  556 {
  557         struct vnode *vp;
  558         struct mount *mp;
  559         int error, ioflag;
  560 
  561         mtx_lock(&Giant);
  562         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  563             uio->uio_td, td));
  564         vp = fp->f_vnode;
  565         if (vp->v_type == VREG)
  566                 bwillwrite();
  567         ioflag = IO_UNIT;
  568         if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
  569                 ioflag |= IO_APPEND;
  570         if (fp->f_flag & FNONBLOCK)
  571                 ioflag |= IO_NDELAY;
  572         if (fp->f_flag & O_DIRECT)
  573                 ioflag |= IO_DIRECT;
  574         if ((fp->f_flag & O_FSYNC) ||
  575             (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
  576                 ioflag |= IO_SYNC;
  577         mp = NULL;
  578         if (vp->v_type != VCHR &&
  579             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
  580                 mtx_unlock(&Giant);
  581                 return (error);
  582         }
  583         VOP_LEASE(vp, td, fp->f_cred, LEASE_WRITE);
  584         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  585         if ((flags & FOF_OFFSET) == 0)
  586                 uio->uio_offset = fp->f_offset;
  587         ioflag |= sequential_heuristic(uio, fp);
  588 #ifdef MAC
  589         error = mac_check_vnode_write(active_cred, fp->f_cred, vp);
  590         if (error == 0)
  591 #endif
  592                 error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
  593         if ((flags & FOF_OFFSET) == 0)
  594                 fp->f_offset = uio->uio_offset;
  595         fp->f_nextoff = uio->uio_offset;
  596         VOP_UNLOCK(vp, 0, td);
  597         vn_finished_write(mp);
  598         mtx_unlock(&Giant);
  599         return (error);
  600 }
  601 
  602 /*
  603  * File table vnode stat routine.
  604  */
  605 static int
  606 vn_statfile(fp, sb, active_cred, td)
  607         struct file *fp;
  608         struct stat *sb;
  609         struct ucred *active_cred;
  610         struct thread *td;
  611 {
  612         struct vnode *vp = fp->f_vnode;
  613         int error;
  614 
  615         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  616         error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
  617         VOP_UNLOCK(vp, 0, td);
  618 
  619         return (error);
  620 }
  621 
  622 /*
  623  * Stat a vnode; implementation for the stat syscall
  624  */
  625 int
  626 vn_stat(vp, sb, active_cred, file_cred, td)
  627         struct vnode *vp;
  628         register struct stat *sb;
  629         struct ucred *active_cred;
  630         struct ucred *file_cred;
  631         struct thread *td;
  632 {
  633         struct vattr vattr;
  634         register struct vattr *vap;
  635         int error;
  636         u_short mode;
  637 
  638 #ifdef MAC
  639         error = mac_check_vnode_stat(active_cred, file_cred, vp);
  640         if (error)
  641                 return (error);
  642 #endif
  643 
  644         vap = &vattr;
  645         error = VOP_GETATTR(vp, vap, active_cred, td);
  646         if (error)
  647                 return (error);
  648 
  649         vp->v_cachedfs = vap->va_fsid;
  650         vp->v_cachedid = vap->va_fileid;
  651 
  652         /*
  653          * Zero the spare stat fields
  654          */
  655         bzero(sb, sizeof *sb);
  656 
  657         /*
  658          * Copy from vattr table
  659          */
  660         if (vap->va_fsid != VNOVAL)
  661                 sb->st_dev = vap->va_fsid;
  662         else
  663                 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
  664         sb->st_ino = vap->va_fileid;
  665         mode = vap->va_mode;
  666         switch (vap->va_type) {
  667         case VREG:
  668                 mode |= S_IFREG;
  669                 break;
  670         case VDIR:
  671                 mode |= S_IFDIR;
  672                 break;
  673         case VBLK:
  674                 mode |= S_IFBLK;
  675                 break;
  676         case VCHR:
  677                 mode |= S_IFCHR;
  678                 break;
  679         case VLNK:
  680                 mode |= S_IFLNK;
  681                 /* This is a cosmetic change, symlinks do not have a mode. */
  682                 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW)
  683                         sb->st_mode &= ~ACCESSPERMS;    /* 0000 */
  684                 else
  685                         sb->st_mode |= ACCESSPERMS;     /* 0777 */
  686                 break;
  687         case VSOCK:
  688                 mode |= S_IFSOCK;
  689                 break;
  690         case VFIFO:
  691                 mode |= S_IFIFO;
  692                 break;
  693         default:
  694                 return (EBADF);
  695         };
  696         sb->st_mode = mode;
  697         sb->st_nlink = vap->va_nlink;
  698         sb->st_uid = vap->va_uid;
  699         sb->st_gid = vap->va_gid;
  700         sb->st_rdev = vap->va_rdev;
  701         if (vap->va_size > OFF_MAX)
  702                 return (EOVERFLOW);
  703         sb->st_size = vap->va_size;
  704         sb->st_atimespec = vap->va_atime;
  705         sb->st_mtimespec = vap->va_mtime;
  706         sb->st_ctimespec = vap->va_ctime;
  707         sb->st_birthtimespec = vap->va_birthtime;
  708 
  709         /*
  710          * According to www.opengroup.org, the meaning of st_blksize is 
  711          *   "a filesystem-specific preferred I/O block size for this 
  712          *    object.  In some filesystem types, this may vary from file
  713          *    to file"
  714          * Default to PAGE_SIZE after much discussion.
  715          */
  716 
  717         if (vap->va_type == VREG) {
  718                 sb->st_blksize = vap->va_blocksize;
  719         } else if (vn_isdisk(vp, NULL)) {
  720                 sb->st_blksize = vp->v_rdev->si_bsize_best;
  721                 if (sb->st_blksize < vp->v_rdev->si_bsize_phys)
  722                         sb->st_blksize = vp->v_rdev->si_bsize_phys;
  723                 if (sb->st_blksize < BLKDEV_IOSIZE)
  724                         sb->st_blksize = BLKDEV_IOSIZE;
  725         } else {
  726                 sb->st_blksize = PAGE_SIZE;
  727         }
  728         
  729         sb->st_flags = vap->va_flags;
  730         if (suser(td))
  731                 sb->st_gen = 0;
  732         else
  733                 sb->st_gen = vap->va_gen;
  734 
  735 #if (S_BLKSIZE == 512)
  736         /* Optimize this case */
  737         sb->st_blocks = vap->va_bytes >> 9;
  738 #else
  739         sb->st_blocks = vap->va_bytes / S_BLKSIZE;
  740 #endif
  741         return (0);
  742 }
  743 
  744 /*
  745  * File table vnode ioctl routine.
  746  */
  747 static int
  748 vn_ioctl(fp, com, data, active_cred, td)
  749         struct file *fp;
  750         u_long com;
  751         void *data;
  752         struct ucred *active_cred;
  753         struct thread *td;
  754 {
  755         struct vnode *vp = fp->f_vnode;
  756         struct vnode *vpold;
  757         struct vattr vattr;
  758         int error;
  759 
  760         switch (vp->v_type) {
  761 
  762         case VREG:
  763         case VDIR:
  764                 if (com == FIONREAD) {
  765                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  766                         error = VOP_GETATTR(vp, &vattr, active_cred, td);
  767                         VOP_UNLOCK(vp, 0, td);
  768                         if (error)
  769                                 return (error);
  770                         *(int *)data = vattr.va_size - fp->f_offset;
  771                         return (0);
  772                 }
  773                 if (com == FIONBIO || com == FIOASYNC)  /* XXX */
  774                         return (0);                     /* XXX */
  775                 /* FALLTHROUGH */
  776 
  777         default:
  778 #if 0
  779                 return (ENOTTY);
  780 #endif
  781         case VFIFO:
  782         case VCHR:
  783         case VBLK:
  784                 if (com == FIODTYPE) {
  785                         if (vp->v_type != VCHR && vp->v_type != VBLK)
  786                                 return (ENOTTY);
  787                         *(int *)data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK;
  788                         return (0);
  789                 }
  790                 error = VOP_IOCTL(vp, com, data, fp->f_flag, active_cred, td);
  791                 if (error == ENOIOCTL) {
  792 #ifdef DIAGNOSTIC
  793                         Debugger("ENOIOCTL leaked through");
  794 #endif
  795                         error = ENOTTY;
  796                 }
  797                 if (error == 0 && com == TIOCSCTTY) {
  798 
  799                         /* Do nothing if reassigning same control tty */
  800                         sx_slock(&proctree_lock);
  801                         if (td->td_proc->p_session->s_ttyvp == vp) {
  802                                 sx_sunlock(&proctree_lock);
  803                                 return (0);
  804                         }
  805 
  806                         vpold = td->td_proc->p_session->s_ttyvp;
  807                         VREF(vp);
  808                         SESS_LOCK(td->td_proc->p_session);
  809                         td->td_proc->p_session->s_ttyvp = vp;
  810                         SESS_UNLOCK(td->td_proc->p_session);
  811 
  812                         sx_sunlock(&proctree_lock);
  813 
  814                         /* Get rid of reference to old control tty */
  815                         if (vpold)
  816                                 vrele(vpold);
  817                 }
  818                 return (error);
  819         }
  820 }
  821 
  822 /*
  823  * File table vnode poll routine.
  824  */
  825 static int
  826 vn_poll(fp, events, active_cred, td)
  827         struct file *fp;
  828         int events;
  829         struct ucred *active_cred;
  830         struct thread *td;
  831 {
  832         struct vnode *vp;
  833 #ifdef MAC
  834         int error;
  835 #endif
  836 
  837         vp = fp->f_vnode;
  838 #ifdef MAC
  839         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  840         error = mac_check_vnode_poll(active_cred, fp->f_cred, vp);
  841         VOP_UNLOCK(vp, 0, td);
  842         if (error)
  843                 return (error);
  844 #endif
  845 
  846         return (VOP_POLL(vp, events, fp->f_cred, td));
  847 }
  848 
  849 /*
  850  * Check that the vnode is still valid, and if so
  851  * acquire requested lock.
  852  */
  853 int
  854 #ifndef DEBUG_LOCKS
  855 vn_lock(vp, flags, td)
  856 #else
  857 debug_vn_lock(vp, flags, td, filename, line)
  858 #endif
  859         struct vnode *vp;
  860         int flags;
  861         struct thread *td;
  862 #ifdef  DEBUG_LOCKS
  863         const char *filename;
  864         int line;
  865 #endif
  866 {
  867         int error;
  868 
  869         do {
  870                 if ((flags & LK_INTERLOCK) == 0)
  871                         VI_LOCK(vp);
  872                 if ((vp->v_iflag & VI_XLOCK) && vp->v_vxproc != curthread) {
  873                         if ((flags & LK_NOWAIT) != 0) {
  874                                 VI_UNLOCK(vp);
  875                                 return (ENOENT);
  876                         }
  877                         vp->v_iflag |= VI_XWANT;
  878                         msleep(vp, VI_MTX(vp), PINOD, "vn_lock", 0);
  879                         if ((flags & LK_RETRY) == 0) {
  880                                 VI_UNLOCK(vp);
  881                                 return (ENOENT);
  882                         }
  883                 } 
  884 #ifdef  DEBUG_LOCKS
  885                 vp->filename = filename;
  886                 vp->line = line;
  887 #endif
  888                 /*
  889                  * lockmgr drops interlock before it will return for
  890                  * any reason.  So force the code above to relock it.
  891                  */
  892                 error = VOP_LOCK(vp, flags | LK_NOPAUSE | LK_INTERLOCK, td);
  893                 flags &= ~LK_INTERLOCK;
  894         } while (flags & LK_RETRY && error != 0);
  895         return (error);
  896 }
  897 
  898 /*
  899  * File table vnode close routine.
  900  */
  901 static int
  902 vn_closefile(fp, td)
  903         struct file *fp;
  904         struct thread *td;
  905 {
  906 
  907         fp->f_ops = &badfileops;
  908         return (vn_close(fp->f_vnode, fp->f_flag, fp->f_cred, td));
  909 }
  910 
  911 /*
  912  * Preparing to start a filesystem write operation. If the operation is
  913  * permitted, then we bump the count of operations in progress and
  914  * proceed. If a suspend request is in progress, we wait until the
  915  * suspension is over, and then proceed.
  916  */
  917 int
  918 vn_start_write(vp, mpp, flags)
  919         struct vnode *vp;
  920         struct mount **mpp;
  921         int flags;
  922 {
  923         struct mount *mp;
  924         int error;
  925 
  926         /*
  927          * If a vnode is provided, get and return the mount point that
  928          * to which it will write.
  929          */
  930         if (vp != NULL) {
  931                 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
  932                         *mpp = NULL;
  933                         if (error != EOPNOTSUPP)
  934                                 return (error);
  935                         return (0);
  936                 }
  937         }
  938         if ((mp = *mpp) == NULL)
  939                 return (0);
  940         /*
  941          * Check on status of suspension.
  942          */
  943         while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
  944                 if (flags & V_NOWAIT)
  945                         return (EWOULDBLOCK);
  946                 error = tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH),
  947                     "suspfs", 0);
  948                 if (error)
  949                         return (error);
  950         }
  951         if (flags & V_XSLEEP)
  952                 return (0);
  953         mp->mnt_writeopcount++;
  954         return (0);
  955 }
  956 
  957 /*
  958  * Secondary suspension. Used by operations such as vop_inactive
  959  * routines that are needed by the higher level functions. These
  960  * are allowed to proceed until all the higher level functions have
  961  * completed (indicated by mnt_writeopcount dropping to zero). At that
  962  * time, these operations are halted until the suspension is over.
  963  */
  964 int
  965 vn_write_suspend_wait(vp, mp, flags)
  966         struct vnode *vp;
  967         struct mount *mp;
  968         int flags;
  969 {
  970         int error;
  971 
  972         if (vp != NULL) {
  973                 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) {
  974                         if (error != EOPNOTSUPP)
  975                                 return (error);
  976                         return (0);
  977                 }
  978         }
  979         /*
  980          * If we are not suspended or have not yet reached suspended
  981          * mode, then let the operation proceed.
  982          */
  983         if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0)
  984                 return (0);
  985         if (flags & V_NOWAIT)
  986                 return (EWOULDBLOCK);
  987         /*
  988          * Wait for the suspension to finish.
  989          */
  990         return (tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH),
  991             "suspfs", 0));
  992 }
  993 
  994 /*
  995  * Filesystem write operation has completed. If we are suspending and this
  996  * operation is the last one, notify the suspender that the suspension is
  997  * now in effect.
  998  */
  999 void
 1000 vn_finished_write(mp)
 1001         struct mount *mp;
 1002 {
 1003 
 1004         if (mp == NULL)
 1005                 return;
 1006         mp->mnt_writeopcount--;
 1007         if (mp->mnt_writeopcount < 0)
 1008                 panic("vn_finished_write: neg cnt");
 1009         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 1010             mp->mnt_writeopcount <= 0)
 1011                 wakeup(&mp->mnt_writeopcount);
 1012 }
 1013 
 1014 /*
 1015  * Request a filesystem to suspend write operations.
 1016  */
 1017 int
 1018 vfs_write_suspend(mp)
 1019         struct mount *mp;
 1020 {
 1021         struct thread *td = curthread;
 1022         int error;
 1023 
 1024         if (mp->mnt_kern_flag & MNTK_SUSPEND)
 1025                 return (0);
 1026         mp->mnt_kern_flag |= MNTK_SUSPEND;
 1027         if (mp->mnt_writeopcount > 0)
 1028                 (void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0);
 1029         if ((error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) != 0) {
 1030                 vfs_write_resume(mp);
 1031                 return (error);
 1032         }
 1033         mp->mnt_kern_flag |= MNTK_SUSPENDED;
 1034         return (0);
 1035 }
 1036 
 1037 /*
 1038  * Request a filesystem to resume write operations.
 1039  */
 1040 void
 1041 vfs_write_resume(mp)
 1042         struct mount *mp;
 1043 {
 1044 
 1045         if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0)
 1046                 return;
 1047         mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPENDED);
 1048         wakeup(&mp->mnt_writeopcount);
 1049         wakeup(&mp->mnt_flag);
 1050 }
 1051 
 1052 /*
 1053  * Implement kqueues for files by translating it to vnode operation.
 1054  */
 1055 static int
 1056 vn_kqfilter(struct file *fp, struct knote *kn)
 1057 {
 1058 
 1059         return (VOP_KQFILTER(fp->f_vnode, kn));
 1060 }
 1061 
 1062 /*
 1063  * Simplified in-kernel wrapper calls for extended attribute access.
 1064  * Both calls pass in a NULL credential, authorizing as "kernel" access.
 1065  * Set IO_NODELOCKED in ioflg if the vnode is already locked.
 1066  */
 1067 int
 1068 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
 1069     const char *attrname, int *buflen, char *buf, struct thread *td)
 1070 {
 1071         struct uio      auio;
 1072         struct iovec    iov;
 1073         int     error;
 1074 
 1075         iov.iov_len = *buflen;
 1076         iov.iov_base = buf;
 1077 
 1078         auio.uio_iov = &iov;
 1079         auio.uio_iovcnt = 1;
 1080         auio.uio_rw = UIO_READ;
 1081         auio.uio_segflg = UIO_SYSSPACE;
 1082         auio.uio_td = td;
 1083         auio.uio_offset = 0;
 1084         auio.uio_resid = *buflen;
 1085 
 1086         if ((ioflg & IO_NODELOCKED) == 0)
 1087                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1088 
 1089         /* authorize attribute retrieval as kernel */
 1090         error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL,
 1091             td);
 1092 
 1093         if ((ioflg & IO_NODELOCKED) == 0)
 1094                 VOP_UNLOCK(vp, 0, td);
 1095 
 1096         if (error == 0) {
 1097                 *buflen = *buflen - auio.uio_resid;
 1098         }
 1099 
 1100         return (error);
 1101 }
 1102 
 1103 /*
 1104  * XXX failure mode if partially written?
 1105  */
 1106 int
 1107 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
 1108     const char *attrname, int buflen, char *buf, struct thread *td)
 1109 {
 1110         struct uio      auio;
 1111         struct iovec    iov;
 1112         struct mount    *mp;
 1113         int     error;
 1114 
 1115         iov.iov_len = buflen;
 1116         iov.iov_base = buf;
 1117 
 1118         auio.uio_iov = &iov;
 1119         auio.uio_iovcnt = 1;
 1120         auio.uio_rw = UIO_WRITE;
 1121         auio.uio_segflg = UIO_SYSSPACE;
 1122         auio.uio_td = td;
 1123         auio.uio_offset = 0;
 1124         auio.uio_resid = buflen;
 1125 
 1126         if ((ioflg & IO_NODELOCKED) == 0) {
 1127                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1128                         return (error);
 1129                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1130         }
 1131 
 1132         /* authorize attribute setting as kernel */
 1133         error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td);
 1134 
 1135         if ((ioflg & IO_NODELOCKED) == 0) {
 1136                 vn_finished_write(mp);
 1137                 VOP_UNLOCK(vp, 0, td);
 1138         }
 1139 
 1140         return (error);
 1141 }
 1142 
 1143 int
 1144 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
 1145     const char *attrname, struct thread *td)
 1146 {
 1147         struct mount    *mp;
 1148         int     error;
 1149 
 1150         if ((ioflg & IO_NODELOCKED) == 0) {
 1151                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1152                         return (error);
 1153                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1154         }
 1155 
 1156         /* authorize attribute removal as kernel */
 1157         error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL, td);
 1158         if (error == EOPNOTSUPP)
 1159                 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
 1160                     NULL, td);
 1161 
 1162         if ((ioflg & IO_NODELOCKED) == 0) {
 1163                 vn_finished_write(mp);
 1164                 VOP_UNLOCK(vp, 0, td);
 1165         }
 1166 
 1167         return (error);
 1168 }
Cache object: 3d2337c3a6999d5499e53f357975f588
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_vnops.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_vnops.c