vfs_vnops.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
   39  * $FreeBSD: releng/5.0/sys/kern/vfs_vnops.c 105902 2002-10-25 00:20:37Z mckusick $
   40  */
   41 
   42 #include "opt_mac.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/fcntl.h>
   47 #include <sys/file.h>
   48 #include <sys/stat.h>
   49 #include <sys/proc.h>
   50 #include <sys/lock.h>
   51 #include <sys/mac.h>
   52 #include <sys/mount.h>
   53 #include <sys/mutex.h>
   54 #include <sys/namei.h>
   55 #include <sys/vnode.h>
   56 #include <sys/bio.h>
   57 #include <sys/buf.h>
   58 #include <sys/filio.h>
   59 #include <sys/sx.h>
   60 #include <sys/ttycom.h>
   61 #include <sys/conf.h>
   62 #include <sys/syslog.h>
   63 
   64 #include <machine/limits.h>
   65 
   66 static int vn_closefile(struct file *fp, struct thread *td);
   67 static int vn_ioctl(struct file *fp, u_long com, void *data, 
   68                 struct ucred *active_cred, struct thread *td);
   69 static int vn_read(struct file *fp, struct uio *uio, 
   70                 struct ucred *active_cred, int flags, struct thread *td);
   71 static int vn_poll(struct file *fp, int events, struct ucred *active_cred,
   72                 struct thread *td);
   73 static int vn_kqfilter(struct file *fp, struct knote *kn);
   74 static int vn_statfile(struct file *fp, struct stat *sb,
   75                 struct ucred *active_cred, struct thread *td);
   76 static int vn_write(struct file *fp, struct uio *uio, 
   77                 struct ucred *active_cred, int flags, struct thread *td);
   78 
   79 struct  fileops vnops = {
   80         vn_read, vn_write, vn_ioctl, vn_poll, vn_kqfilter,
   81         vn_statfile, vn_closefile
   82 };
   83 
   84 int
   85 vn_open(ndp, flagp, cmode)
   86         register struct nameidata *ndp;
   87         int *flagp, cmode;
   88 {
   89         struct thread *td = ndp->ni_cnd.cn_thread;
   90 
   91         return (vn_open_cred(ndp, flagp, cmode, td->td_ucred));
   92 }
   93 
   94 /*
   95  * Common code for vnode open operations.
   96  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
   97  * 
   98  * Note that this does NOT free nameidata for the successful case,
   99  * due to the NDINIT being done elsewhere.
  100  */
  101 int
  102 vn_open_cred(ndp, flagp, cmode, cred)
  103         register struct nameidata *ndp;
  104         int *flagp, cmode;
  105         struct ucred *cred;
  106 {
  107         struct vnode *vp;
  108         struct mount *mp;
  109         struct thread *td = ndp->ni_cnd.cn_thread;
  110         struct vattr vat;
  111         struct vattr *vap = &vat;
  112         int mode, fmode, error;
  113 #ifdef LOOKUP_SHARED
  114         int exclusive;  /* The current intended lock state */
  115 
  116         exclusive = 0;
  117 #endif
  118 
  119 restart:
  120         fmode = *flagp;
  121         if (fmode & O_CREAT) {
  122                 ndp->ni_cnd.cn_nameiop = CREATE;
  123                 ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
  124                 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
  125                         ndp->ni_cnd.cn_flags |= FOLLOW;
  126                 bwillwrite();
  127                 if ((error = namei(ndp)) != 0)
  128                         return (error);
  129                 if (ndp->ni_vp == NULL) {
  130                         VATTR_NULL(vap);
  131                         vap->va_type = VREG;
  132                         vap->va_mode = cmode;
  133                         if (fmode & O_EXCL)
  134                                 vap->va_vaflags |= VA_EXCLUSIVE;
  135                         if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
  136                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  137                                 vput(ndp->ni_dvp);
  138                                 if ((error = vn_start_write(NULL, &mp,
  139                                     V_XSLEEP | PCATCH)) != 0)
  140                                         return (error);
  141                                 goto restart;
  142                         }
  143 #ifdef MAC
  144                         error = mac_check_vnode_create(cred, ndp->ni_dvp,
  145                             &ndp->ni_cnd, vap);
  146                         if (error == 0) {
  147 #endif
  148                                 VOP_LEASE(ndp->ni_dvp, td, cred, LEASE_WRITE);
  149                                 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
  150                                                    &ndp->ni_cnd, vap);
  151 #ifdef MAC
  152                         }
  153 #endif
  154                         vput(ndp->ni_dvp);
  155                         vn_finished_write(mp);
  156                         if (error) {
  157                                 NDFREE(ndp, NDF_ONLY_PNBUF);
  158                                 return (error);
  159                         }
  160                         ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create");
  161                         ASSERT_VOP_LOCKED(ndp->ni_vp, "create");
  162                         fmode &= ~O_TRUNC;
  163                         vp = ndp->ni_vp;
  164 #ifdef LOOKUP_SHARED
  165                         exclusive = 1;
  166 #endif
  167                 } else {
  168                         if (ndp->ni_dvp == ndp->ni_vp)
  169                                 vrele(ndp->ni_dvp);
  170                         else
  171                                 vput(ndp->ni_dvp);
  172                         ndp->ni_dvp = NULL;
  173                         vp = ndp->ni_vp;
  174                         if (fmode & O_EXCL) {
  175                                 error = EEXIST;
  176                                 goto bad;
  177                         }
  178                         fmode &= ~O_CREAT;
  179                 }
  180         } else {
  181                 ndp->ni_cnd.cn_nameiop = LOOKUP;
  182 #ifdef LOOKUP_SHARED
  183                 ndp->ni_cnd.cn_flags =
  184                     ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) |
  185                     LOCKSHARED | LOCKLEAF;
  186 #else
  187                 ndp->ni_cnd.cn_flags =
  188                     ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
  189 #endif
  190                 if ((error = namei(ndp)) != 0)
  191                         return (error);
  192                 vp = ndp->ni_vp;
  193         }
  194         if (vp->v_type == VLNK) {
  195                 error = EMLINK;
  196                 goto bad;
  197         }
  198         if (vp->v_type == VSOCK) {
  199                 error = EOPNOTSUPP;
  200                 goto bad;
  201         }
  202         mode = 0;
  203         if (fmode & (FWRITE | O_TRUNC)) {
  204                 if (vp->v_type == VDIR) {
  205                         error = EISDIR;
  206                         goto bad;
  207                 }
  208                 mode |= VWRITE;
  209         }
  210         if (fmode & FREAD)
  211                 mode |= VREAD;
  212         if (fmode & O_APPEND)
  213                 mode |= VAPPEND;
  214 #ifdef MAC
  215         error = mac_check_vnode_open(cred, vp, mode);
  216         if (error)
  217                 goto bad;
  218 #endif
  219         if ((fmode & O_CREAT) == 0) {
  220                 if (mode & VWRITE) {
  221                         error = vn_writechk(vp);
  222                         if (error)
  223                                 goto bad;
  224                 }
  225                 if (mode) {
  226                         error = VOP_ACCESS(vp, mode, cred, td);
  227                         if (error)
  228                                 goto bad;
  229                 }
  230         }
  231         if ((error = VOP_GETATTR(vp, vap, cred, td)) == 0) {
  232                 vp->v_cachedfs = vap->va_fsid;
  233                 vp->v_cachedid = vap->va_fileid;
  234         }
  235         if ((error = VOP_OPEN(vp, fmode, cred, td)) != 0)
  236                 goto bad;
  237         /*
  238          * Make sure that a VM object is created for VMIO support.
  239          */
  240         if (vn_canvmio(vp) == TRUE) {
  241 #ifdef LOOKUP_SHARED
  242                 int flock;
  243 
  244                 if (!exclusive && VOP_GETVOBJECT(vp, NULL) != 0)
  245                         VOP_LOCK(vp, LK_UPGRADE, td);
  246                 /*
  247                  * In cases where the object is marked as dead object_create
  248                  * will unlock and relock exclusive.  It is safe to call in
  249                  * here with a shared lock because we only examine fields that
  250                  * the shared lock guarantees will be stable.  In the UPGRADE
  251                  * case it is not likely that anyone has used this vnode yet
  252                  * so there will be no contention.  The logic after this call
  253                  * restores the requested locking state.
  254                  */
  255 #endif
  256                 if ((error = vfs_object_create(vp, td, cred)) != 0) {
  257                         VOP_UNLOCK(vp, 0, td);
  258                         VOP_CLOSE(vp, fmode, cred, td);
  259                         NDFREE(ndp, NDF_ONLY_PNBUF);
  260                         vrele(vp);
  261                         *flagp = fmode;
  262                         return (error);
  263                 }
  264 #ifdef LOOKUP_SHARED
  265                 flock = VOP_ISLOCKED(vp, td);
  266                 if (!exclusive && flock == LK_EXCLUSIVE)
  267                         VOP_LOCK(vp, LK_DOWNGRADE, td);
  268 #endif
  269         }
  270 
  271         if (fmode & FWRITE)
  272                 vp->v_writecount++;
  273         *flagp = fmode;
  274         return (0);
  275 bad:
  276         NDFREE(ndp, NDF_ONLY_PNBUF);
  277         vput(vp);
  278         *flagp = fmode;
  279         return (error);
  280 }
  281 
  282 /*
  283  * Check for write permissions on the specified vnode.
  284  * Prototype text segments cannot be written.
  285  */
  286 int
  287 vn_writechk(vp)
  288         register struct vnode *vp;
  289 {
  290 
  291         ASSERT_VOP_LOCKED(vp, "vn_writechk");
  292         /*
  293          * If there's shared text associated with
  294          * the vnode, try to free it up once.  If
  295          * we fail, we can't allow writing.
  296          */
  297         if (vp->v_vflag & VV_TEXT)
  298                 return (ETXTBSY);
  299 
  300         return (0);
  301 }
  302 
  303 /*
  304  * Vnode close call
  305  */
  306 int
  307 vn_close(vp, flags, file_cred, td)
  308         register struct vnode *vp;
  309         int flags;
  310         struct ucred *file_cred;
  311         struct thread *td;
  312 {
  313         int error;
  314 
  315         if (flags & FWRITE)
  316                 vp->v_writecount--;
  317         error = VOP_CLOSE(vp, flags, file_cred, td);
  318         /*
  319          * XXX - In certain instances VOP_CLOSE has to do the vrele
  320          * itself. If the vrele has been done, it will return EAGAIN
  321          * to indicate that the vrele should not be done again. When
  322          * this happens, we just return success. The correct thing to
  323          * do would be to have all VOP_CLOSE instances do the vrele.
  324          */
  325         if (error == EAGAIN)
  326                 return (0);
  327         vrele(vp);
  328         return (error);
  329 }
  330 
  331 /*
  332  * Sequential heuristic - detect sequential operation
  333  */
  334 static __inline
  335 int
  336 sequential_heuristic(struct uio *uio, struct file *fp)
  337 {
  338 
  339         if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
  340             uio->uio_offset == fp->f_nextoff) {
  341                 /*
  342                  * XXX we assume that the filesystem block size is
  343                  * the default.  Not true, but still gives us a pretty
  344                  * good indicator of how sequential the read operations
  345                  * are.
  346                  */
  347                 fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
  348                 if (fp->f_seqcount >= 127)
  349                         fp->f_seqcount = 127;
  350                 return(fp->f_seqcount << 16);
  351         }
  352 
  353         /*
  354          * Not sequential, quick draw-down of seqcount
  355          */
  356         if (fp->f_seqcount > 1)
  357                 fp->f_seqcount = 1;
  358         else
  359                 fp->f_seqcount = 0;
  360         return(0);
  361 }
  362 
  363 /*
  364  * Package up an I/O request on a vnode into a uio and do it.
  365  */
  366 int
  367 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, active_cred, file_cred,
  368     aresid, td)
  369         enum uio_rw rw;
  370         struct vnode *vp;
  371         caddr_t base;
  372         int len;
  373         off_t offset;
  374         enum uio_seg segflg;
  375         int ioflg;
  376         struct ucred *active_cred;
  377         struct ucred *file_cred;
  378         int *aresid;
  379         struct thread *td;
  380 {
  381         struct uio auio;
  382         struct iovec aiov;
  383         struct mount *mp;
  384         struct ucred *cred;
  385         int error;
  386 
  387         if ((ioflg & IO_NODELOCKED) == 0) {
  388                 mp = NULL;
  389                 if (rw == UIO_WRITE) { 
  390                         if (vp->v_type != VCHR &&
  391                             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH))
  392                             != 0)
  393                                 return (error);
  394                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  395                 } else {
  396                         /*
  397                          * XXX This should be LK_SHARED but I don't trust VFS
  398                          * enough to leave it like that until it has been
  399                          * reviewed further.
  400                          */
  401                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  402                 }
  403 
  404         }
  405         auio.uio_iov = &aiov;
  406         auio.uio_iovcnt = 1;
  407         aiov.iov_base = base;
  408         aiov.iov_len = len;
  409         auio.uio_resid = len;
  410         auio.uio_offset = offset;
  411         auio.uio_segflg = segflg;
  412         auio.uio_rw = rw;
  413         auio.uio_td = td;
  414         error = 0;
  415 #ifdef MAC
  416         if ((ioflg & IO_NOMACCHECK) == 0) {
  417                 if (rw == UIO_READ)
  418                         error = mac_check_vnode_read(active_cred, file_cred,
  419                             vp);
  420                 else
  421                         error = mac_check_vnode_write(active_cred, file_cred,
  422                             vp);
  423         }
  424 #endif
  425         if (error == 0) {
  426                 if (file_cred)
  427                         cred = file_cred;
  428                 else
  429                         cred = active_cred;
  430                 if (rw == UIO_READ)
  431                         error = VOP_READ(vp, &auio, ioflg, cred);
  432                 else
  433                         error = VOP_WRITE(vp, &auio, ioflg, cred);
  434         }
  435         if (aresid)
  436                 *aresid = auio.uio_resid;
  437         else
  438                 if (auio.uio_resid && error == 0)
  439                         error = EIO;
  440         if ((ioflg & IO_NODELOCKED) == 0) {
  441                 if (rw == UIO_WRITE)
  442                         vn_finished_write(mp);
  443                 VOP_UNLOCK(vp, 0, td);
  444         }
  445         return (error);
  446 }
  447 
  448 /*
  449  * Package up an I/O request on a vnode into a uio and do it.  The I/O
  450  * request is split up into smaller chunks and we try to avoid saturating
  451  * the buffer cache while potentially holding a vnode locked, so we 
  452  * check bwillwrite() before calling vn_rdwr().  We also call uio_yield()
  453  * to give other processes a chance to lock the vnode (either other processes
  454  * core'ing the same binary, or unrelated processes scanning the directory).
  455  */
  456 int
  457 vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, active_cred,
  458     file_cred, aresid, td)
  459         enum uio_rw rw;
  460         struct vnode *vp;
  461         caddr_t base;
  462         int len;
  463         off_t offset;
  464         enum uio_seg segflg;
  465         int ioflg;
  466         struct ucred *active_cred;
  467         struct ucred *file_cred;
  468         int *aresid;
  469         struct thread *td;
  470 {
  471         int error = 0;
  472 
  473         do {
  474                 int chunk = (len > MAXBSIZE) ? MAXBSIZE : len;
  475 
  476                 if (rw != UIO_READ && vp->v_type == VREG)
  477                         bwillwrite();
  478                 error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
  479                     ioflg, active_cred, file_cred, aresid, td);
  480                 len -= chunk;   /* aresid calc already includes length */
  481                 if (error)
  482                         break;
  483                 offset += chunk;
  484                 base += chunk;
  485                 uio_yield();
  486         } while (len);
  487         if (aresid)
  488                 *aresid += len;
  489         return (error);
  490 }
  491 
  492 /*
  493  * File table vnode read routine.
  494  */
  495 static int
  496 vn_read(fp, uio, active_cred, flags, td)
  497         struct file *fp;
  498         struct uio *uio;
  499         struct ucred *active_cred;
  500         struct thread *td;
  501         int flags;
  502 {
  503         struct vnode *vp;
  504         int error, ioflag;
  505 
  506         mtx_lock(&Giant);
  507         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  508             uio->uio_td, td));
  509         vp = (struct vnode *)fp->f_data;
  510         ioflag = 0;
  511         if (fp->f_flag & FNONBLOCK)
  512                 ioflag |= IO_NDELAY;
  513         if (fp->f_flag & O_DIRECT)
  514                 ioflag |= IO_DIRECT;
  515         VOP_LEASE(vp, td, fp->f_cred, LEASE_READ);
  516         /*
  517          * According to McKusick the vn lock is protecting f_offset here.
  518          * Once this field has it's own lock we can acquire this shared.
  519          */
  520         vn_lock(vp, LK_EXCLUSIVE | LK_NOPAUSE | LK_RETRY, td);
  521         if ((flags & FOF_OFFSET) == 0)
  522                 uio->uio_offset = fp->f_offset;
  523 
  524         ioflag |= sequential_heuristic(uio, fp);
  525 
  526 #ifdef MAC
  527         error = mac_check_vnode_read(active_cred, fp->f_cred, vp);
  528         if (error == 0)
  529 #endif
  530                 error = VOP_READ(vp, uio, ioflag, fp->f_cred);
  531         if ((flags & FOF_OFFSET) == 0)
  532                 fp->f_offset = uio->uio_offset;
  533         fp->f_nextoff = uio->uio_offset;
  534         VOP_UNLOCK(vp, 0, td);
  535         mtx_unlock(&Giant);
  536         return (error);
  537 }
  538 
  539 /*
  540  * File table vnode write routine.
  541  */
  542 static int
  543 vn_write(fp, uio, active_cred, flags, td)
  544         struct file *fp;
  545         struct uio *uio;
  546         struct ucred *active_cred;
  547         struct thread *td;
  548         int flags;
  549 {
  550         struct vnode *vp;
  551         struct mount *mp;
  552         int error, ioflag;
  553 
  554         mtx_lock(&Giant);
  555         KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
  556             uio->uio_td, td));
  557         vp = (struct vnode *)fp->f_data;
  558         if (vp->v_type == VREG)
  559                 bwillwrite();
  560         ioflag = IO_UNIT;
  561         if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
  562                 ioflag |= IO_APPEND;
  563         if (fp->f_flag & FNONBLOCK)
  564                 ioflag |= IO_NDELAY;
  565         if (fp->f_flag & O_DIRECT)
  566                 ioflag |= IO_DIRECT;
  567         if ((fp->f_flag & O_FSYNC) ||
  568             (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
  569                 ioflag |= IO_SYNC;
  570         mp = NULL;
  571         if (vp->v_type != VCHR &&
  572             (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
  573                 mtx_unlock(&Giant);
  574                 return (error);
  575         }
  576         VOP_LEASE(vp, td, fp->f_cred, LEASE_WRITE);
  577         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  578         if ((flags & FOF_OFFSET) == 0)
  579                 uio->uio_offset = fp->f_offset;
  580         ioflag |= sequential_heuristic(uio, fp);
  581 #ifdef MAC
  582         error = mac_check_vnode_write(active_cred, fp->f_cred, vp);
  583         if (error == 0)
  584 #endif
  585                 error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
  586         if ((flags & FOF_OFFSET) == 0)
  587                 fp->f_offset = uio->uio_offset;
  588         fp->f_nextoff = uio->uio_offset;
  589         VOP_UNLOCK(vp, 0, td);
  590         vn_finished_write(mp);
  591         mtx_unlock(&Giant);
  592         return (error);
  593 }
  594 
  595 /*
  596  * File table vnode stat routine.
  597  */
  598 static int
  599 vn_statfile(fp, sb, active_cred, td)
  600         struct file *fp;
  601         struct stat *sb;
  602         struct ucred *active_cred;
  603         struct thread *td;
  604 {
  605         struct vnode *vp = (struct vnode *)fp->f_data;
  606         int error;
  607 
  608         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  609         error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
  610         VOP_UNLOCK(vp, 0, td);
  611 
  612         return (error);
  613 }
  614 
  615 /*
  616  * Stat a vnode; implementation for the stat syscall
  617  */
  618 int
  619 vn_stat(vp, sb, active_cred, file_cred, td)
  620         struct vnode *vp;
  621         register struct stat *sb;
  622         struct ucred *active_cred;
  623         struct ucred *file_cred;
  624         struct thread *td;
  625 {
  626         struct vattr vattr;
  627         register struct vattr *vap;
  628         int error;
  629         u_short mode;
  630 
  631 #ifdef MAC
  632         error = mac_check_vnode_stat(active_cred, file_cred, vp);
  633         if (error)
  634                 return (error);
  635 #endif
  636 
  637         vap = &vattr;
  638         error = VOP_GETATTR(vp, vap, active_cred, td);
  639         if (error)
  640                 return (error);
  641 
  642         vp->v_cachedfs = vap->va_fsid;
  643         vp->v_cachedid = vap->va_fileid;
  644 
  645         /*
  646          * Zero the spare stat fields
  647          */
  648         bzero(sb, sizeof *sb);
  649 
  650         /*
  651          * Copy from vattr table
  652          */
  653         if (vap->va_fsid != VNOVAL)
  654                 sb->st_dev = vap->va_fsid;
  655         else
  656                 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
  657         sb->st_ino = vap->va_fileid;
  658         mode = vap->va_mode;
  659         switch (vap->va_type) {
  660         case VREG:
  661                 mode |= S_IFREG;
  662                 break;
  663         case VDIR:
  664                 mode |= S_IFDIR;
  665                 break;
  666         case VBLK:
  667                 mode |= S_IFBLK;
  668                 break;
  669         case VCHR:
  670                 mode |= S_IFCHR;
  671                 break;
  672         case VLNK:
  673                 mode |= S_IFLNK;
  674                 /* This is a cosmetic change, symlinks do not have a mode. */
  675                 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW)
  676                         sb->st_mode &= ~ACCESSPERMS;    /* 0000 */
  677                 else
  678                         sb->st_mode |= ACCESSPERMS;     /* 0777 */
  679                 break;
  680         case VSOCK:
  681                 mode |= S_IFSOCK;
  682                 break;
  683         case VFIFO:
  684                 mode |= S_IFIFO;
  685                 break;
  686         default:
  687                 return (EBADF);
  688         };
  689         sb->st_mode = mode;
  690         sb->st_nlink = vap->va_nlink;
  691         sb->st_uid = vap->va_uid;
  692         sb->st_gid = vap->va_gid;
  693         sb->st_rdev = vap->va_rdev;
  694         if (vap->va_size > OFF_MAX)
  695                 return (EOVERFLOW);
  696         sb->st_size = vap->va_size;
  697         sb->st_atimespec = vap->va_atime;
  698         sb->st_mtimespec = vap->va_mtime;
  699         sb->st_ctimespec = vap->va_ctime;
  700         sb->st_birthtimespec = vap->va_birthtime;
  701 
  702         /*
  703          * According to www.opengroup.org, the meaning of st_blksize is 
  704          *   "a filesystem-specific preferred I/O block size for this 
  705          *    object.  In some filesystem types, this may vary from file
  706          *    to file"
  707          * Default to PAGE_SIZE after much discussion.
  708          */
  709 
  710         if (vap->va_type == VREG) {
  711                 sb->st_blksize = vap->va_blocksize;
  712         } else if (vn_isdisk(vp, NULL)) {
  713                 sb->st_blksize = vp->v_rdev->si_bsize_best;
  714                 if (sb->st_blksize < vp->v_rdev->si_bsize_phys)
  715                         sb->st_blksize = vp->v_rdev->si_bsize_phys;
  716                 if (sb->st_blksize < BLKDEV_IOSIZE)
  717                         sb->st_blksize = BLKDEV_IOSIZE;
  718         } else {
  719                 sb->st_blksize = PAGE_SIZE;
  720         }
  721         
  722         sb->st_flags = vap->va_flags;
  723         if (suser(td))
  724                 sb->st_gen = 0;
  725         else
  726                 sb->st_gen = vap->va_gen;
  727 
  728 #if (S_BLKSIZE == 512)
  729         /* Optimize this case */
  730         sb->st_blocks = vap->va_bytes >> 9;
  731 #else
  732         sb->st_blocks = vap->va_bytes / S_BLKSIZE;
  733 #endif
  734         return (0);
  735 }
  736 
  737 /*
  738  * File table vnode ioctl routine.
  739  */
  740 static int
  741 vn_ioctl(fp, com, data, active_cred, td)
  742         struct file *fp;
  743         u_long com;
  744         void *data;
  745         struct ucred *active_cred;
  746         struct thread *td;
  747 {
  748         register struct vnode *vp = ((struct vnode *)fp->f_data);
  749         struct vnode *vpold;
  750         struct vattr vattr;
  751         int error;
  752 
  753         switch (vp->v_type) {
  754 
  755         case VREG:
  756         case VDIR:
  757                 if (com == FIONREAD) {
  758                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  759                         error = VOP_GETATTR(vp, &vattr, active_cred, td);
  760                         VOP_UNLOCK(vp, 0, td);
  761                         if (error)
  762                                 return (error);
  763                         *(int *)data = vattr.va_size - fp->f_offset;
  764                         return (0);
  765                 }
  766                 if (com == FIONBIO || com == FIOASYNC)  /* XXX */
  767                         return (0);                     /* XXX */
  768                 /* FALLTHROUGH */
  769 
  770         default:
  771 #if 0
  772                 return (ENOTTY);
  773 #endif
  774         case VFIFO:
  775         case VCHR:
  776         case VBLK:
  777                 if (com == FIODTYPE) {
  778                         if (vp->v_type != VCHR && vp->v_type != VBLK)
  779                                 return (ENOTTY);
  780                         *(int *)data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK;
  781                         return (0);
  782                 }
  783                 error = VOP_IOCTL(vp, com, data, fp->f_flag, active_cred, td);
  784                 if (error == ENOIOCTL) {
  785 #ifdef DIAGNOSTIC
  786                         Debugger("ENOIOCTL leaked through");
  787 #endif
  788                         error = ENOTTY;
  789                 }
  790                 if (error == 0 && com == TIOCSCTTY) {
  791 
  792                         /* Do nothing if reassigning same control tty */
  793                         sx_slock(&proctree_lock);
  794                         if (td->td_proc->p_session->s_ttyvp == vp) {
  795                                 sx_sunlock(&proctree_lock);
  796                                 return (0);
  797                         }
  798 
  799                         vpold = td->td_proc->p_session->s_ttyvp;
  800                         VREF(vp);
  801                         SESS_LOCK(td->td_proc->p_session);
  802                         td->td_proc->p_session->s_ttyvp = vp;
  803                         SESS_UNLOCK(td->td_proc->p_session);
  804 
  805                         sx_sunlock(&proctree_lock);
  806 
  807                         /* Get rid of reference to old control tty */
  808                         if (vpold)
  809                                 vrele(vpold);
  810                 }
  811                 return (error);
  812         }
  813 }
  814 
  815 /*
  816  * File table vnode poll routine.
  817  */
  818 static int
  819 vn_poll(fp, events, active_cred, td)
  820         struct file *fp;
  821         int events;
  822         struct ucred *active_cred;
  823         struct thread *td;
  824 {
  825         struct vnode *vp;
  826 #ifdef MAC
  827         int error;
  828 #endif
  829 
  830         vp = (struct vnode *)fp->f_data;
  831 #ifdef MAC
  832         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  833         error = mac_check_vnode_poll(active_cred, fp->f_cred, vp);
  834         VOP_UNLOCK(vp, 0, td);
  835         if (error)
  836                 return (error);
  837 #endif
  838 
  839         return (VOP_POLL(vp, events, fp->f_cred, td));
  840 }
  841 
  842 /*
  843  * Check that the vnode is still valid, and if so
  844  * acquire requested lock.
  845  */
  846 int
  847 #ifndef DEBUG_LOCKS
  848 vn_lock(vp, flags, td)
  849 #else
  850 debug_vn_lock(vp, flags, td, filename, line)
  851 #endif
  852         struct vnode *vp;
  853         int flags;
  854         struct thread *td;
  855 #ifdef  DEBUG_LOCKS
  856         const char *filename;
  857         int line;
  858 #endif
  859 {
  860         int error;
  861 
  862         do {
  863                 if ((flags & LK_INTERLOCK) == 0)
  864                         VI_LOCK(vp);
  865                 if ((vp->v_iflag & VI_XLOCK) && vp->v_vxproc != curthread) {
  866                         vp->v_iflag |= VI_XWANT;
  867                         msleep(vp, VI_MTX(vp), PINOD, "vn_lock", 0);
  868                         error = ENOENT;
  869                         if ((flags & LK_RETRY) == 0) {
  870                                 VI_UNLOCK(vp);
  871                                 return (error);
  872                         }
  873                 } 
  874 #ifdef  DEBUG_LOCKS
  875                 vp->filename = filename;
  876                 vp->line = line;
  877 #endif
  878                 /*
  879                  * lockmgr drops interlock before it will return for
  880                  * any reason.  So force the code above to relock it.
  881                  */
  882                 error = VOP_LOCK(vp, flags | LK_NOPAUSE | LK_INTERLOCK, td);
  883                 flags &= ~LK_INTERLOCK;
  884         } while (flags & LK_RETRY && error != 0);
  885         return (error);
  886 }
  887 
  888 /*
  889  * File table vnode close routine.
  890  */
  891 static int
  892 vn_closefile(fp, td)
  893         struct file *fp;
  894         struct thread *td;
  895 {
  896 
  897         fp->f_ops = &badfileops;
  898         return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
  899                 fp->f_cred, td));
  900 }
  901 
  902 /*
  903  * Preparing to start a filesystem write operation. If the operation is
  904  * permitted, then we bump the count of operations in progress and
  905  * proceed. If a suspend request is in progress, we wait until the
  906  * suspension is over, and then proceed.
  907  */
  908 int
  909 vn_start_write(vp, mpp, flags)
  910         struct vnode *vp;
  911         struct mount **mpp;
  912         int flags;
  913 {
  914         struct mount *mp;
  915         int error;
  916 
  917         /*
  918          * If a vnode is provided, get and return the mount point that
  919          * to which it will write.
  920          */
  921         if (vp != NULL) {
  922                 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
  923                         *mpp = NULL;
  924                         if (error != EOPNOTSUPP)
  925                                 return (error);
  926                         return (0);
  927                 }
  928         }
  929         if ((mp = *mpp) == NULL)
  930                 return (0);
  931         /*
  932          * Check on status of suspension.
  933          */
  934         while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
  935                 if (flags & V_NOWAIT)
  936                         return (EWOULDBLOCK);
  937                 error = tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH),
  938                     "suspfs", 0);
  939                 if (error)
  940                         return (error);
  941         }
  942         if (flags & V_XSLEEP)
  943                 return (0);
  944         mp->mnt_writeopcount++;
  945         return (0);
  946 }
  947 
  948 /*
  949  * Secondary suspension. Used by operations such as vop_inactive
  950  * routines that are needed by the higher level functions. These
  951  * are allowed to proceed until all the higher level functions have
  952  * completed (indicated by mnt_writeopcount dropping to zero). At that
  953  * time, these operations are halted until the suspension is over.
  954  */
  955 int
  956 vn_write_suspend_wait(vp, mp, flags)
  957         struct vnode *vp;
  958         struct mount *mp;
  959         int flags;
  960 {
  961         int error;
  962 
  963         if (vp != NULL) {
  964                 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) {
  965                         if (error != EOPNOTSUPP)
  966                                 return (error);
  967                         return (0);
  968                 }
  969         }
  970         /*
  971          * If we are not suspended or have not yet reached suspended
  972          * mode, then let the operation proceed.
  973          */
  974         if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0)
  975                 return (0);
  976         if (flags & V_NOWAIT)
  977                 return (EWOULDBLOCK);
  978         /*
  979          * Wait for the suspension to finish.
  980          */
  981         return (tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH),
  982             "suspfs", 0));
  983 }
  984 
  985 /*
  986  * Filesystem write operation has completed. If we are suspending and this
  987  * operation is the last one, notify the suspender that the suspension is
  988  * now in effect.
  989  */
  990 void
  991 vn_finished_write(mp)
  992         struct mount *mp;
  993 {
  994 
  995         if (mp == NULL)
  996                 return;
  997         mp->mnt_writeopcount--;
  998         if (mp->mnt_writeopcount < 0)
  999                 panic("vn_finished_write: neg cnt");
 1000         if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
 1001             mp->mnt_writeopcount <= 0)
 1002                 wakeup(&mp->mnt_writeopcount);
 1003 }
 1004 
 1005 /*
 1006  * Request a filesystem to suspend write operations.
 1007  */
 1008 int
 1009 vfs_write_suspend(mp)
 1010         struct mount *mp;
 1011 {
 1012         struct thread *td = curthread;
 1013         int error;
 1014 
 1015         if (mp->mnt_kern_flag & MNTK_SUSPEND)
 1016                 return (0);
 1017         mp->mnt_kern_flag |= MNTK_SUSPEND;
 1018         if (mp->mnt_writeopcount > 0)
 1019                 (void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0);
 1020         if ((error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) != 0) {
 1021                 vfs_write_resume(mp);
 1022                 return (error);
 1023         }
 1024         mp->mnt_kern_flag |= MNTK_SUSPENDED;
 1025         return (0);
 1026 }
 1027 
 1028 /*
 1029  * Request a filesystem to resume write operations.
 1030  */
 1031 void
 1032 vfs_write_resume(mp)
 1033         struct mount *mp;
 1034 {
 1035 
 1036         if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0)
 1037                 return;
 1038         mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPENDED);
 1039         wakeup(&mp->mnt_writeopcount);
 1040         wakeup(&mp->mnt_flag);
 1041 }
 1042 
 1043 /*
 1044  * Implement kqueues for files by translating it to vnode operation.
 1045  */
 1046 static int
 1047 vn_kqfilter(struct file *fp, struct knote *kn)
 1048 {
 1049 
 1050         return (VOP_KQFILTER(((struct vnode *)fp->f_data), kn));
 1051 }
 1052 
 1053 /*
 1054  * Simplified in-kernel wrapper calls for extended attribute access.
 1055  * Both calls pass in a NULL credential, authorizing as "kernel" access.
 1056  * Set IO_NODELOCKED in ioflg if the vnode is already locked.
 1057  */
 1058 int
 1059 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
 1060     const char *attrname, int *buflen, char *buf, struct thread *td)
 1061 {
 1062         struct uio      auio;
 1063         struct iovec    iov;
 1064         int     error;
 1065 
 1066         iov.iov_len = *buflen;
 1067         iov.iov_base = buf;
 1068 
 1069         auio.uio_iov = &iov;
 1070         auio.uio_iovcnt = 1;
 1071         auio.uio_rw = UIO_READ;
 1072         auio.uio_segflg = UIO_SYSSPACE;
 1073         auio.uio_td = td;
 1074         auio.uio_offset = 0;
 1075         auio.uio_resid = *buflen;
 1076 
 1077         if ((ioflg & IO_NODELOCKED) == 0)
 1078                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1079 
 1080         /* authorize attribute retrieval as kernel */
 1081         error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL,
 1082             td);
 1083 
 1084         if ((ioflg & IO_NODELOCKED) == 0)
 1085                 VOP_UNLOCK(vp, 0, td);
 1086 
 1087         if (error == 0) {
 1088                 *buflen = *buflen - auio.uio_resid;
 1089         }
 1090 
 1091         return (error);
 1092 }
 1093 
 1094 /*
 1095  * XXX failure mode if partially written?
 1096  */
 1097 int
 1098 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
 1099     const char *attrname, int buflen, char *buf, struct thread *td)
 1100 {
 1101         struct uio      auio;
 1102         struct iovec    iov;
 1103         struct mount    *mp;
 1104         int     error;
 1105 
 1106         iov.iov_len = buflen;
 1107         iov.iov_base = buf;
 1108 
 1109         auio.uio_iov = &iov;
 1110         auio.uio_iovcnt = 1;
 1111         auio.uio_rw = UIO_WRITE;
 1112         auio.uio_segflg = UIO_SYSSPACE;
 1113         auio.uio_td = td;
 1114         auio.uio_offset = 0;
 1115         auio.uio_resid = buflen;
 1116 
 1117         if ((ioflg & IO_NODELOCKED) == 0) {
 1118                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1119                         return (error);
 1120                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1121         }
 1122 
 1123         /* authorize attribute setting as kernel */
 1124         error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td);
 1125 
 1126         if ((ioflg & IO_NODELOCKED) == 0) {
 1127                 vn_finished_write(mp);
 1128                 VOP_UNLOCK(vp, 0, td);
 1129         }
 1130 
 1131         return (error);
 1132 }
 1133 
 1134 int
 1135 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
 1136     const char *attrname, struct thread *td)
 1137 {
 1138         struct mount    *mp;
 1139         int     error;
 1140 
 1141         if ((ioflg & IO_NODELOCKED) == 0) {
 1142                 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0)
 1143                         return (error);
 1144                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1145         }
 1146 
 1147         /* authorize attribute removal as kernel */
 1148         error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, NULL, td);
 1149 
 1150         if ((ioflg & IO_NODELOCKED) == 0) {
 1151                 vn_finished_write(mp);
 1152                 VOP_UNLOCK(vp, 0, td);
 1153         }
 1154 
 1155         return (error);
 1156 }
Cache object: 5b45fdf2ef07c660675182988e5da7db
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_vnops.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_vnops.c