The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_default.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * This code is derived from software contributed
    8  * to Berkeley by John Heidemann of the UCLA Ficus project.
    9  *
   10  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD$");
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/bio.h>
   43 #include <sys/buf.h>
   44 #include <sys/conf.h>
   45 #include <sys/event.h>
   46 #include <sys/filio.h>
   47 #include <sys/kernel.h>
   48 #include <sys/limits.h>
   49 #include <sys/lock.h>
   50 #include <sys/lockf.h>
   51 #include <sys/malloc.h>
   52 #include <sys/mount.h>
   53 #include <sys/namei.h>
   54 #include <sys/rwlock.h>
   55 #include <sys/fcntl.h>
   56 #include <sys/unistd.h>
   57 #include <sys/vnode.h>
   58 #include <sys/dirent.h>
   59 #include <sys/poll.h>
   60 #include <sys/stat.h>
   61 #include <security/audit/audit.h>
   62 #include <sys/priv.h>
   63 
   64 #include <security/mac/mac_framework.h>
   65 
   66 #include <vm/vm.h>
   67 #include <vm/vm_object.h>
   68 #include <vm/vm_extern.h>
   69 #include <vm/pmap.h>
   70 #include <vm/vm_map.h>
   71 #include <vm/vm_page.h>
   72 #include <vm/vm_pager.h>
   73 #include <vm/vnode_pager.h>
   74 
   75 static int      vop_nolookup(struct vop_lookup_args *);
   76 static int      vop_norename(struct vop_rename_args *);
   77 static int      vop_nostrategy(struct vop_strategy_args *);
   78 static int      get_next_dirent(struct vnode *vp, struct dirent **dpp,
   79                                 char *dirbuf, int dirbuflen, off_t *off,
   80                                 char **cpos, int *len, int *eofflag,
   81                                 struct thread *td);
   82 static int      dirent_exists(struct vnode *vp, const char *dirname,
   83                               struct thread *td);
   84 
   85 #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
   86 
   87 static int vop_stdis_text(struct vop_is_text_args *ap);
   88 static int vop_stdunset_text(struct vop_unset_text_args *ap);
   89 static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
   90 static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap);
   91 static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
   92 static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
   93 static int vop_stdread_pgcache(struct vop_read_pgcache_args *ap);
   94 static int vop_stdstat(struct vop_stat_args *ap);
   95 static int vop_stdvput_pair(struct vop_vput_pair_args *ap);
   96 
   97 /*
   98  * This vnode table stores what we want to do if the filesystem doesn't
   99  * implement a particular VOP.
  100  *
  101  * If there is no specific entry here, we will return EOPNOTSUPP.
  102  *
  103  * Note that every filesystem has to implement either vop_access
  104  * or vop_accessx; failing to do so will result in immediate crash
  105  * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
  106  * which calls vop_stdaccess() etc.
  107  */
  108 
  109 struct vop_vector default_vnodeops = {
  110         .vop_default =          NULL,
  111         .vop_bypass =           VOP_EOPNOTSUPP,
  112 
  113         .vop_access =           vop_stdaccess,
  114         .vop_accessx =          vop_stdaccessx,
  115         .vop_advise =           vop_stdadvise,
  116         .vop_advlock =          vop_stdadvlock,
  117         .vop_advlockasync =     vop_stdadvlockasync,
  118         .vop_advlockpurge =     vop_stdadvlockpurge,
  119         .vop_allocate =         vop_stdallocate,
  120         .vop_deallocate =       vop_stddeallocate,
  121         .vop_bmap =             vop_stdbmap,
  122         .vop_close =            VOP_NULL,
  123         .vop_fsync =            VOP_NULL,
  124         .vop_stat =             vop_stdstat,
  125         .vop_fdatasync =        vop_stdfdatasync,
  126         .vop_getpages =         vop_stdgetpages,
  127         .vop_getpages_async =   vop_stdgetpages_async,
  128         .vop_getwritemount =    vop_stdgetwritemount,
  129         .vop_inactive =         VOP_NULL,
  130         .vop_need_inactive =    vop_stdneed_inactive,
  131         .vop_ioctl =            vop_stdioctl,
  132         .vop_kqfilter =         vop_stdkqfilter,
  133         .vop_islocked =         vop_stdislocked,
  134         .vop_lock1 =            vop_stdlock,
  135         .vop_lookup =           vop_nolookup,
  136         .vop_open =             VOP_NULL,
  137         .vop_pathconf =         VOP_EINVAL,
  138         .vop_poll =             vop_nopoll,
  139         .vop_putpages =         vop_stdputpages,
  140         .vop_readlink =         VOP_EINVAL,
  141         .vop_read_pgcache =     vop_stdread_pgcache,
  142         .vop_rename =           vop_norename,
  143         .vop_revoke =           VOP_PANIC,
  144         .vop_strategy =         vop_nostrategy,
  145         .vop_unlock =           vop_stdunlock,
  146         .vop_vptocnp =          vop_stdvptocnp,
  147         .vop_vptofh =           vop_stdvptofh,
  148         .vop_unp_bind =         vop_stdunp_bind,
  149         .vop_unp_connect =      vop_stdunp_connect,
  150         .vop_unp_detach =       vop_stdunp_detach,
  151         .vop_is_text =          vop_stdis_text,
  152         .vop_set_text =         vop_stdset_text,
  153         .vop_unset_text =       vop_stdunset_text,
  154         .vop_add_writecount =   vop_stdadd_writecount,
  155         .vop_copy_file_range =  vop_stdcopy_file_range,
  156         .vop_vput_pair =        vop_stdvput_pair,
  157 };
  158 VFS_VOP_VECTOR_REGISTER(default_vnodeops);
  159 
  160 /*
  161  * Series of placeholder functions for various error returns for
  162  * VOPs.
  163  */
  164 
  165 int
  166 vop_eopnotsupp(struct vop_generic_args *ap)
  167 {
  168         /*
  169         printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
  170         */
  171 
  172         return (EOPNOTSUPP);
  173 }
  174 
  175 int
  176 vop_ebadf(struct vop_generic_args *ap)
  177 {
  178 
  179         return (EBADF);
  180 }
  181 
  182 int
  183 vop_enotty(struct vop_generic_args *ap)
  184 {
  185 
  186         return (ENOTTY);
  187 }
  188 
  189 int
  190 vop_einval(struct vop_generic_args *ap)
  191 {
  192 
  193         return (EINVAL);
  194 }
  195 
  196 int
  197 vop_enoent(struct vop_generic_args *ap)
  198 {
  199 
  200         return (ENOENT);
  201 }
  202 
  203 int
  204 vop_eagain(struct vop_generic_args *ap)
  205 {
  206 
  207         return (EAGAIN);
  208 }
  209 
  210 int
  211 vop_null(struct vop_generic_args *ap)
  212 {
  213 
  214         return (0);
  215 }
  216 
  217 /*
  218  * Helper function to panic on some bad VOPs in some filesystems.
  219  */
  220 int
  221 vop_panic(struct vop_generic_args *ap)
  222 {
  223 
  224         panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
  225 }
  226 
  227 /*
  228  * vop_std<something> and vop_no<something> are default functions for use by
  229  * filesystems that need the "default reasonable" implementation for a
  230  * particular operation.
  231  *
  232  * The documentation for the operations they implement exists (if it exists)
  233  * in the VOP_<SOMETHING>(9) manpage (all uppercase).
  234  */
  235 
  236 /*
  237  * Default vop for filesystems that do not support name lookup
  238  */
  239 static int
  240 vop_nolookup(ap)
  241         struct vop_lookup_args /* {
  242                 struct vnode *a_dvp;
  243                 struct vnode **a_vpp;
  244                 struct componentname *a_cnp;
  245         } */ *ap;
  246 {
  247 
  248         *ap->a_vpp = NULL;
  249         return (ENOTDIR);
  250 }
  251 
  252 /*
  253  * vop_norename:
  254  *
  255  * Handle unlock and reference counting for arguments of vop_rename
  256  * for filesystems that do not implement rename operation.
  257  */
  258 static int
  259 vop_norename(struct vop_rename_args *ap)
  260 {
  261 
  262         vop_rename_fail(ap);
  263         return (EOPNOTSUPP);
  264 }
  265 
  266 /*
  267  *      vop_nostrategy:
  268  *
  269  *      Strategy routine for VFS devices that have none.
  270  *
  271  *      BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
  272  *      routine.  Typically this is done for a BIO_READ strategy call.
  273  *      Typically B_INVAL is assumed to already be clear prior to a write
  274  *      and should not be cleared manually unless you just made the buffer
  275  *      invalid.  BIO_ERROR should be cleared either way.
  276  */
  277 
  278 static int
  279 vop_nostrategy (struct vop_strategy_args *ap)
  280 {
  281         printf("No strategy for buffer at %p\n", ap->a_bp);
  282         vn_printf(ap->a_vp, "vnode ");
  283         ap->a_bp->b_ioflags |= BIO_ERROR;
  284         ap->a_bp->b_error = EOPNOTSUPP;
  285         bufdone(ap->a_bp);
  286         return (EOPNOTSUPP);
  287 }
  288 
  289 static int
  290 get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
  291                 int dirbuflen, off_t *off, char **cpos, int *len,
  292                 int *eofflag, struct thread *td)
  293 {
  294         int error, reclen;
  295         struct uio uio;
  296         struct iovec iov;
  297         struct dirent *dp;
  298 
  299         KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
  300         KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
  301 
  302         if (*len == 0) {
  303                 iov.iov_base = dirbuf;
  304                 iov.iov_len = dirbuflen;
  305 
  306                 uio.uio_iov = &iov;
  307                 uio.uio_iovcnt = 1;
  308                 uio.uio_offset = *off;
  309                 uio.uio_resid = dirbuflen;
  310                 uio.uio_segflg = UIO_SYSSPACE;
  311                 uio.uio_rw = UIO_READ;
  312                 uio.uio_td = td;
  313 
  314                 *eofflag = 0;
  315 
  316 #ifdef MAC
  317                 error = mac_vnode_check_readdir(td->td_ucred, vp);
  318                 if (error == 0)
  319 #endif
  320                         error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
  321                                 NULL, NULL);
  322                 if (error)
  323                         return (error);
  324 
  325                 *off = uio.uio_offset;
  326 
  327                 *cpos = dirbuf;
  328                 *len = (dirbuflen - uio.uio_resid);
  329 
  330                 if (*len == 0)
  331                         return (ENOENT);
  332         }
  333 
  334         dp = (struct dirent *)(*cpos);
  335         reclen = dp->d_reclen;
  336         *dpp = dp;
  337 
  338         /* check for malformed directory.. */
  339         if (reclen < DIRENT_MINSIZE)
  340                 return (EINVAL);
  341 
  342         *cpos += reclen;
  343         *len -= reclen;
  344 
  345         return (0);
  346 }
  347 
  348 /*
  349  * Check if a named file exists in a given directory vnode.
  350  */
  351 static int
  352 dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
  353 {
  354         char *dirbuf, *cpos;
  355         int error, eofflag, dirbuflen, len, found;
  356         off_t off;
  357         struct dirent *dp;
  358         struct vattr va;
  359 
  360         KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
  361         KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
  362 
  363         found = 0;
  364 
  365         error = VOP_GETATTR(vp, &va, td->td_ucred);
  366         if (error)
  367                 return (found);
  368 
  369         dirbuflen = DEV_BSIZE;
  370         if (dirbuflen < va.va_blocksize)
  371                 dirbuflen = va.va_blocksize;
  372         dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
  373 
  374         off = 0;
  375         len = 0;
  376         do {
  377                 error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
  378                                         &cpos, &len, &eofflag, td);
  379                 if (error)
  380                         goto out;
  381 
  382                 if (dp->d_type != DT_WHT && dp->d_fileno != 0 &&
  383                     strcmp(dp->d_name, dirname) == 0) {
  384                         found = 1;
  385                         goto out;
  386                 }
  387         } while (len > 0 || !eofflag);
  388 
  389 out:
  390         free(dirbuf, M_TEMP);
  391         return (found);
  392 }
  393 
  394 int
  395 vop_stdaccess(struct vop_access_args *ap)
  396 {
  397 
  398         KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
  399             VAPPEND)) == 0, ("invalid bit in accmode"));
  400 
  401         return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
  402 }
  403 
  404 int
  405 vop_stdaccessx(struct vop_accessx_args *ap)
  406 {
  407         int error;
  408         accmode_t accmode = ap->a_accmode;
  409 
  410         error = vfs_unixify_accmode(&accmode);
  411         if (error != 0)
  412                 return (error);
  413 
  414         if (accmode == 0)
  415                 return (0);
  416 
  417         return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
  418 }
  419 
  420 /*
  421  * Advisory record locking support
  422  */
  423 int
  424 vop_stdadvlock(struct vop_advlock_args *ap)
  425 {
  426         struct vnode *vp;
  427         struct mount *mp;
  428         struct vattr vattr;
  429         int error;
  430 
  431         vp = ap->a_vp;
  432 
  433         /*
  434          * Provide atomicity of open(O_CREAT | O_EXCL | O_EXLOCK) for
  435          * local filesystems.  See vn_open_cred() for reciprocal part.
  436          */
  437         mp = vp->v_mount;
  438         if (mp != NULL && (mp->mnt_flag & MNT_LOCAL) != 0 &&
  439             ap->a_op == F_SETLK && (ap->a_flags & F_FIRSTOPEN) == 0) {
  440                 VI_LOCK(vp);
  441                 while ((vp->v_iflag & VI_FOPENING) != 0)
  442                         msleep(vp, VI_MTX(vp), PLOCK, "lockfo", 0);
  443                 VI_UNLOCK(vp);
  444         }
  445 
  446         if (ap->a_fl->l_whence == SEEK_END) {
  447                 /*
  448                  * The NFSv4 server must avoid doing a vn_lock() here, since it
  449                  * can deadlock the nfsd threads, due to a LOR.  Fortunately
  450                  * the NFSv4 server always uses SEEK_SET and this code is
  451                  * only required for the SEEK_END case.
  452                  */
  453                 vn_lock(vp, LK_SHARED | LK_RETRY);
  454                 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
  455                 VOP_UNLOCK(vp);
  456                 if (error)
  457                         return (error);
  458         } else
  459                 vattr.va_size = 0;
  460 
  461         return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
  462 }
  463 
  464 int
  465 vop_stdadvlockasync(struct vop_advlockasync_args *ap)
  466 {
  467         struct vnode *vp;
  468         struct vattr vattr;
  469         int error;
  470 
  471         vp = ap->a_vp;
  472         if (ap->a_fl->l_whence == SEEK_END) {
  473                 /* The size argument is only needed for SEEK_END. */
  474                 vn_lock(vp, LK_SHARED | LK_RETRY);
  475                 error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
  476                 VOP_UNLOCK(vp);
  477                 if (error)
  478                         return (error);
  479         } else
  480                 vattr.va_size = 0;
  481 
  482         return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
  483 }
  484 
  485 int
  486 vop_stdadvlockpurge(struct vop_advlockpurge_args *ap)
  487 {
  488         struct vnode *vp;
  489 
  490         vp = ap->a_vp;
  491         lf_purgelocks(vp, &vp->v_lockf);
  492         return (0);
  493 }
  494 
  495 /*
  496  * vop_stdpathconf:
  497  *
  498  * Standard implementation of POSIX pathconf, to get information about limits
  499  * for a filesystem.
  500  * Override per filesystem for the case where the filesystem has smaller
  501  * limits.
  502  */
  503 int
  504 vop_stdpathconf(ap)
  505         struct vop_pathconf_args /* {
  506         struct vnode *a_vp;
  507         int a_name;
  508         int *a_retval;
  509         } */ *ap;
  510 {
  511 
  512         switch (ap->a_name) {
  513                 case _PC_ASYNC_IO:
  514                         *ap->a_retval = _POSIX_ASYNCHRONOUS_IO;
  515                         return (0);
  516                 case _PC_PATH_MAX:
  517                         *ap->a_retval = PATH_MAX;
  518                         return (0);
  519                 case _PC_ACL_EXTENDED:
  520                 case _PC_ACL_NFS4:
  521                 case _PC_CAP_PRESENT:
  522                 case _PC_DEALLOC_PRESENT:
  523                 case _PC_INF_PRESENT:
  524                 case _PC_MAC_PRESENT:
  525                         *ap->a_retval = 0;
  526                         return (0);
  527                 default:
  528                         return (EINVAL);
  529         }
  530         /* NOTREACHED */
  531 }
  532 
  533 /*
  534  * Standard lock, unlock and islocked functions.
  535  */
  536 int
  537 vop_stdlock(ap)
  538         struct vop_lock1_args /* {
  539                 struct vnode *a_vp;
  540                 int a_flags;
  541                 char *file;
  542                 int line;
  543         } */ *ap;
  544 {
  545         struct vnode *vp = ap->a_vp;
  546         struct mtx *ilk;
  547 
  548         ilk = VI_MTX(vp);
  549         return (lockmgr_lock_flags(vp->v_vnlock, ap->a_flags,
  550             &ilk->lock_object, ap->a_file, ap->a_line));
  551 }
  552 
  553 /* See above. */
  554 int
  555 vop_stdunlock(ap)
  556         struct vop_unlock_args /* {
  557                 struct vnode *a_vp;
  558         } */ *ap;
  559 {
  560         struct vnode *vp = ap->a_vp;
  561 
  562         return (lockmgr_unlock(vp->v_vnlock));
  563 }
  564 
  565 /* See above. */
  566 int
  567 vop_stdislocked(ap)
  568         struct vop_islocked_args /* {
  569                 struct vnode *a_vp;
  570         } */ *ap;
  571 {
  572 
  573         return (lockstatus(ap->a_vp->v_vnlock));
  574 }
  575 
  576 /*
  577  * Variants of the above set.
  578  *
  579  * Differences are:
  580  * - shared locking disablement is not supported
  581  * - v_vnlock pointer is not honored
  582  */
  583 int
  584 vop_lock(ap)
  585         struct vop_lock1_args /* {
  586                 struct vnode *a_vp;
  587                 int a_flags;
  588                 char *file;
  589                 int line;
  590         } */ *ap;
  591 {
  592         struct vnode *vp = ap->a_vp;
  593         int flags = ap->a_flags;
  594         struct mtx *ilk;
  595 
  596         MPASS(vp->v_vnlock == &vp->v_lock);
  597 
  598         if (__predict_false((flags & ~(LK_TYPE_MASK | LK_NODDLKTREAT | LK_RETRY)) != 0))
  599                 goto other;
  600 
  601         switch (flags & LK_TYPE_MASK) {
  602         case LK_SHARED:
  603                 return (lockmgr_slock(&vp->v_lock, flags, ap->a_file, ap->a_line));
  604         case LK_EXCLUSIVE:
  605                 return (lockmgr_xlock(&vp->v_lock, flags, ap->a_file, ap->a_line));
  606         }
  607 other:
  608         ilk = VI_MTX(vp);
  609         return (lockmgr_lock_flags(&vp->v_lock, flags,
  610             &ilk->lock_object, ap->a_file, ap->a_line));
  611 }
  612 
  613 int
  614 vop_unlock(ap)
  615         struct vop_unlock_args /* {
  616                 struct vnode *a_vp;
  617         } */ *ap;
  618 {
  619         struct vnode *vp = ap->a_vp;
  620 
  621         MPASS(vp->v_vnlock == &vp->v_lock);
  622 
  623         return (lockmgr_unlock(&vp->v_lock));
  624 }
  625 
  626 int
  627 vop_islocked(ap)
  628         struct vop_islocked_args /* {
  629                 struct vnode *a_vp;
  630         } */ *ap;
  631 {
  632         struct vnode *vp = ap->a_vp;
  633 
  634         MPASS(vp->v_vnlock == &vp->v_lock);
  635 
  636         return (lockstatus(&vp->v_lock));
  637 }
  638 
  639 /*
  640  * Return true for select/poll.
  641  */
  642 int
  643 vop_nopoll(ap)
  644         struct vop_poll_args /* {
  645                 struct vnode *a_vp;
  646                 int  a_events;
  647                 struct ucred *a_cred;
  648                 struct thread *a_td;
  649         } */ *ap;
  650 {
  651 
  652         if (ap->a_events & ~POLLSTANDARD)
  653                 return (POLLNVAL);
  654         return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
  655 }
  656 
  657 /*
  658  * Implement poll for local filesystems that support it.
  659  */
  660 int
  661 vop_stdpoll(ap)
  662         struct vop_poll_args /* {
  663                 struct vnode *a_vp;
  664                 int  a_events;
  665                 struct ucred *a_cred;
  666                 struct thread *a_td;
  667         } */ *ap;
  668 {
  669         if (ap->a_events & ~POLLSTANDARD)
  670                 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
  671         return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
  672 }
  673 
  674 /*
  675  * Return our mount point, as we will take charge of the writes.
  676  */
  677 int
  678 vop_stdgetwritemount(ap)
  679         struct vop_getwritemount_args /* {
  680                 struct vnode *a_vp;
  681                 struct mount **a_mpp;
  682         } */ *ap;
  683 {
  684         struct mount *mp;
  685         struct vnode *vp;
  686 
  687         /*
  688          * Note that having a reference does not prevent forced unmount from
  689          * setting ->v_mount to NULL after the lock gets released. This is of
  690          * no consequence for typical consumers (most notably vn_start_write)
  691          * since in this case the vnode is VIRF_DOOMED. Unmount might have
  692          * progressed far enough that its completion is only delayed by the
  693          * reference obtained here. The consumer only needs to concern itself
  694          * with releasing it.
  695          */
  696         vp = ap->a_vp;
  697         mp = vfs_ref_from_vp(vp);
  698         *(ap->a_mpp) = mp;
  699         return (0);
  700 }
  701 
  702 /*
  703  * If the file system doesn't implement VOP_BMAP, then return sensible defaults:
  704  * - Return the vnode's bufobj instead of any underlying device's bufobj
  705  * - Calculate the physical block number as if there were equal size
  706  *   consecutive blocks, but
  707  * - Report no contiguous runs of blocks.
  708  */
  709 int
  710 vop_stdbmap(ap)
  711         struct vop_bmap_args /* {
  712                 struct vnode *a_vp;
  713                 daddr_t  a_bn;
  714                 struct bufobj **a_bop;
  715                 daddr_t *a_bnp;
  716                 int *a_runp;
  717                 int *a_runb;
  718         } */ *ap;
  719 {
  720 
  721         if (ap->a_bop != NULL)
  722                 *ap->a_bop = &ap->a_vp->v_bufobj;
  723         if (ap->a_bnp != NULL)
  724                 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
  725         if (ap->a_runp != NULL)
  726                 *ap->a_runp = 0;
  727         if (ap->a_runb != NULL)
  728                 *ap->a_runb = 0;
  729         return (0);
  730 }
  731 
  732 int
  733 vop_stdfsync(ap)
  734         struct vop_fsync_args /* {
  735                 struct vnode *a_vp;
  736                 int a_waitfor;
  737                 struct thread *a_td;
  738         } */ *ap;
  739 {
  740 
  741         return (vn_fsync_buf(ap->a_vp, ap->a_waitfor));
  742 }
  743 
  744 static int
  745 vop_stdfdatasync(struct vop_fdatasync_args *ap)
  746 {
  747 
  748         return (VOP_FSYNC(ap->a_vp, MNT_WAIT, ap->a_td));
  749 }
  750 
  751 int
  752 vop_stdfdatasync_buf(struct vop_fdatasync_args *ap)
  753 {
  754 
  755         return (vn_fsync_buf(ap->a_vp, MNT_WAIT));
  756 }
  757 
  758 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
  759 int
  760 vop_stdgetpages(ap)
  761         struct vop_getpages_args /* {
  762                 struct vnode *a_vp;
  763                 vm_page_t *a_m;
  764                 int a_count;
  765                 int *a_rbehind;
  766                 int *a_rahead;
  767         } */ *ap;
  768 {
  769 
  770         return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
  771             ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL);
  772 }
  773 
  774 static int
  775 vop_stdgetpages_async(struct vop_getpages_async_args *ap)
  776 {
  777         int error;
  778 
  779         error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
  780             ap->a_rahead);
  781         if (ap->a_iodone != NULL)
  782                 ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error);
  783         return (error);
  784 }
  785 
  786 int
  787 vop_stdkqfilter(struct vop_kqfilter_args *ap)
  788 {
  789         return vfs_kqfilter(ap);
  790 }
  791 
  792 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
  793 int
  794 vop_stdputpages(ap)
  795         struct vop_putpages_args /* {
  796                 struct vnode *a_vp;
  797                 vm_page_t *a_m;
  798                 int a_count;
  799                 int a_sync;
  800                 int *a_rtvals;
  801         } */ *ap;
  802 {
  803 
  804         return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
  805              ap->a_sync, ap->a_rtvals);
  806 }
  807 
  808 int
  809 vop_stdvptofh(struct vop_vptofh_args *ap)
  810 {
  811         return (EOPNOTSUPP);
  812 }
  813 
  814 int
  815 vop_stdvptocnp(struct vop_vptocnp_args *ap)
  816 {
  817         struct vnode *vp = ap->a_vp;
  818         struct vnode **dvp = ap->a_vpp;
  819         struct ucred *cred;
  820         char *buf = ap->a_buf;
  821         size_t *buflen = ap->a_buflen;
  822         char *dirbuf, *cpos;
  823         int i, error, eofflag, dirbuflen, flags, locked, len, covered;
  824         off_t off;
  825         ino_t fileno;
  826         struct vattr va;
  827         struct nameidata nd;
  828         struct thread *td;
  829         struct dirent *dp;
  830         struct vnode *mvp;
  831 
  832         i = *buflen;
  833         error = 0;
  834         covered = 0;
  835         td = curthread;
  836         cred = td->td_ucred;
  837 
  838         if (vp->v_type != VDIR)
  839                 return (ENOENT);
  840 
  841         error = VOP_GETATTR(vp, &va, cred);
  842         if (error)
  843                 return (error);
  844 
  845         VREF(vp);
  846         locked = VOP_ISLOCKED(vp);
  847         VOP_UNLOCK(vp);
  848         NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
  849             "..", vp);
  850         flags = FREAD;
  851         error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
  852         if (error) {
  853                 vn_lock(vp, locked | LK_RETRY);
  854                 return (error);
  855         }
  856         NDFREE_PNBUF(&nd);
  857 
  858         mvp = *dvp = nd.ni_vp;
  859 
  860         if (vp->v_mount != (*dvp)->v_mount &&
  861             ((*dvp)->v_vflag & VV_ROOT) &&
  862             ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
  863                 *dvp = (*dvp)->v_mount->mnt_vnodecovered;
  864                 VREF(mvp);
  865                 VOP_UNLOCK(mvp);
  866                 vn_close(mvp, FREAD, cred, td);
  867                 VREF(*dvp);
  868                 vn_lock(*dvp, LK_SHARED | LK_RETRY);
  869                 covered = 1;
  870         }
  871 
  872         fileno = va.va_fileid;
  873 
  874         dirbuflen = DEV_BSIZE;
  875         if (dirbuflen < va.va_blocksize)
  876                 dirbuflen = va.va_blocksize;
  877         dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
  878 
  879         if ((*dvp)->v_type != VDIR) {
  880                 error = ENOENT;
  881                 goto out;
  882         }
  883 
  884         off = 0;
  885         len = 0;
  886         do {
  887                 /* call VOP_READDIR of parent */
  888                 error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
  889                                         &cpos, &len, &eofflag, td);
  890                 if (error)
  891                         goto out;
  892 
  893                 if ((dp->d_type != DT_WHT) &&
  894                     (dp->d_fileno == fileno)) {
  895                         if (covered) {
  896                                 VOP_UNLOCK(*dvp);
  897                                 vn_lock(mvp, LK_SHARED | LK_RETRY);
  898                                 if (dirent_exists(mvp, dp->d_name, td)) {
  899                                         error = ENOENT;
  900                                         VOP_UNLOCK(mvp);
  901                                         vn_lock(*dvp, LK_SHARED | LK_RETRY);
  902                                         goto out;
  903                                 }
  904                                 VOP_UNLOCK(mvp);
  905                                 vn_lock(*dvp, LK_SHARED | LK_RETRY);
  906                         }
  907                         i -= dp->d_namlen;
  908 
  909                         if (i < 0) {
  910                                 error = ENOMEM;
  911                                 goto out;
  912                         }
  913                         if (dp->d_namlen == 1 && dp->d_name[0] == '.') {
  914                                 error = ENOENT;
  915                         } else {
  916                                 bcopy(dp->d_name, buf + i, dp->d_namlen);
  917                                 error = 0;
  918                         }
  919                         goto out;
  920                 }
  921         } while (len > 0 || !eofflag);
  922         error = ENOENT;
  923 
  924 out:
  925         free(dirbuf, M_TEMP);
  926         if (!error) {
  927                 *buflen = i;
  928                 vref(*dvp);
  929         }
  930         if (covered) {
  931                 vput(*dvp);
  932                 vrele(mvp);
  933         } else {
  934                 VOP_UNLOCK(mvp);
  935                 vn_close(mvp, FREAD, cred, td);
  936         }
  937         vn_lock(vp, locked | LK_RETRY);
  938         return (error);
  939 }
  940 
  941 int
  942 vop_stdallocate(struct vop_allocate_args *ap)
  943 {
  944 #ifdef __notyet__
  945         struct statfs *sfs;
  946         off_t maxfilesize = 0;
  947 #endif
  948         struct iovec aiov;
  949         struct vattr vattr, *vap;
  950         struct uio auio;
  951         off_t fsize, len, cur, offset;
  952         uint8_t *buf;
  953         struct thread *td;
  954         struct vnode *vp;
  955         size_t iosize;
  956         int error;
  957 
  958         buf = NULL;
  959         error = 0;
  960         td = curthread;
  961         vap = &vattr;
  962         vp = ap->a_vp;
  963         len = *ap->a_len;
  964         offset = *ap->a_offset;
  965 
  966         error = VOP_GETATTR(vp, vap, ap->a_cred);
  967         if (error != 0)
  968                 goto out;
  969         fsize = vap->va_size;
  970         iosize = vap->va_blocksize;
  971         if (iosize == 0)
  972                 iosize = BLKDEV_IOSIZE;
  973         if (iosize > maxphys)
  974                 iosize = maxphys;
  975         buf = malloc(iosize, M_TEMP, M_WAITOK);
  976 
  977 #ifdef __notyet__
  978         /*
  979          * Check if the filesystem sets f_maxfilesize; if not use
  980          * VOP_SETATTR to perform the check.
  981          */
  982         sfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
  983         error = VFS_STATFS(vp->v_mount, sfs, td);
  984         if (error == 0)
  985                 maxfilesize = sfs->f_maxfilesize;
  986         free(sfs, M_STATFS);
  987         if (error != 0)
  988                 goto out;
  989         if (maxfilesize) {
  990                 if (offset > maxfilesize || len > maxfilesize ||
  991                     offset + len > maxfilesize) {
  992                         error = EFBIG;
  993                         goto out;
  994                 }
  995         } else
  996 #endif
  997         if (offset + len > vap->va_size) {
  998                 /*
  999                  * Test offset + len against the filesystem's maxfilesize.
 1000                  */
 1001                 VATTR_NULL(vap);
 1002                 vap->va_size = offset + len;
 1003                 error = VOP_SETATTR(vp, vap, ap->a_cred);
 1004                 if (error != 0)
 1005                         goto out;
 1006                 VATTR_NULL(vap);
 1007                 vap->va_size = fsize;
 1008                 error = VOP_SETATTR(vp, vap, ap->a_cred);
 1009                 if (error != 0)
 1010                         goto out;
 1011         }
 1012 
 1013         for (;;) {
 1014                 /*
 1015                  * Read and write back anything below the nominal file
 1016                  * size.  There's currently no way outside the filesystem
 1017                  * to know whether this area is sparse or not.
 1018                  */
 1019                 cur = iosize;
 1020                 if ((offset % iosize) != 0)
 1021                         cur -= (offset % iosize);
 1022                 if (cur > len)
 1023                         cur = len;
 1024                 if (offset < fsize) {
 1025                         aiov.iov_base = buf;
 1026                         aiov.iov_len = cur;
 1027                         auio.uio_iov = &aiov;
 1028                         auio.uio_iovcnt = 1;
 1029                         auio.uio_offset = offset;
 1030                         auio.uio_resid = cur;
 1031                         auio.uio_segflg = UIO_SYSSPACE;
 1032                         auio.uio_rw = UIO_READ;
 1033                         auio.uio_td = td;
 1034                         error = VOP_READ(vp, &auio, ap->a_ioflag, ap->a_cred);
 1035                         if (error != 0)
 1036                                 break;
 1037                         if (auio.uio_resid > 0) {
 1038                                 bzero(buf + cur - auio.uio_resid,
 1039                                     auio.uio_resid);
 1040                         }
 1041                 } else {
 1042                         bzero(buf, cur);
 1043                 }
 1044 
 1045                 aiov.iov_base = buf;
 1046                 aiov.iov_len = cur;
 1047                 auio.uio_iov = &aiov;
 1048                 auio.uio_iovcnt = 1;
 1049                 auio.uio_offset = offset;
 1050                 auio.uio_resid = cur;
 1051                 auio.uio_segflg = UIO_SYSSPACE;
 1052                 auio.uio_rw = UIO_WRITE;
 1053                 auio.uio_td = td;
 1054 
 1055                 error = VOP_WRITE(vp, &auio, ap->a_ioflag, ap->a_cred);
 1056                 if (error != 0)
 1057                         break;
 1058 
 1059                 len -= cur;
 1060                 offset += cur;
 1061                 if (len == 0)
 1062                         break;
 1063                 if (should_yield())
 1064                         break;
 1065         }
 1066 
 1067  out:
 1068         *ap->a_len = len;
 1069         *ap->a_offset = offset;
 1070         free(buf, M_TEMP);
 1071         return (error);
 1072 }
 1073 
 1074 static int
 1075 vp_zerofill(struct vnode *vp, struct vattr *vap, off_t *offsetp, off_t *lenp,
 1076     int ioflag, struct ucred *cred)
 1077 {
 1078         int iosize;
 1079         int error = 0;
 1080         struct iovec aiov;
 1081         struct uio auio;
 1082         struct thread *td;
 1083         off_t offset, len;
 1084 
 1085         iosize = vap->va_blocksize;
 1086         td = curthread;
 1087         offset = *offsetp;
 1088         len = *lenp;
 1089 
 1090         if (iosize == 0)
 1091                 iosize = BLKDEV_IOSIZE;
 1092         /* If va_blocksize is 512 bytes, iosize will be 4 kilobytes */
 1093         iosize = min(iosize * 8, ZERO_REGION_SIZE);
 1094 
 1095         while (len > 0) {
 1096                 int xfersize = iosize;
 1097                 if (offset % iosize != 0)
 1098                         xfersize -= offset % iosize;
 1099                 if (xfersize > len)
 1100                         xfersize = len;
 1101 
 1102                 aiov.iov_base = __DECONST(void *, zero_region);
 1103                 aiov.iov_len = xfersize;
 1104                 auio.uio_iov = &aiov;
 1105                 auio.uio_iovcnt = 1;
 1106                 auio.uio_offset = offset;
 1107                 auio.uio_resid = xfersize;
 1108                 auio.uio_segflg = UIO_SYSSPACE;
 1109                 auio.uio_rw = UIO_WRITE;
 1110                 auio.uio_td = td;
 1111 
 1112                 error = VOP_WRITE(vp, &auio, ioflag, cred);
 1113                 if (error != 0) {
 1114                         len -= xfersize - auio.uio_resid;
 1115                         offset += xfersize - auio.uio_resid;
 1116                         break;
 1117                 }
 1118 
 1119                 len -= xfersize;
 1120                 offset += xfersize;
 1121         }
 1122 
 1123         *offsetp = offset;
 1124         *lenp = len;
 1125         return (error);
 1126 }
 1127 
 1128 int
 1129 vop_stddeallocate(struct vop_deallocate_args *ap)
 1130 {
 1131         struct vnode *vp;
 1132         off_t offset, len;
 1133         struct ucred *cred;
 1134         int error;
 1135         struct vattr va;
 1136         off_t noff, xfersize, rem;
 1137 
 1138         vp = ap->a_vp;
 1139         offset = *ap->a_offset;
 1140         cred = ap->a_cred;
 1141 
 1142         error = VOP_GETATTR(vp, &va, cred);
 1143         if (error)
 1144                 return (error);
 1145 
 1146         len = omin((off_t)va.va_size - offset, *ap->a_len);
 1147         while (len > 0) {
 1148                 noff = offset;
 1149                 error = vn_bmap_seekhole_locked(vp, FIOSEEKDATA, &noff, cred);
 1150                 if (error) {
 1151                         if (error != ENXIO)
 1152                                 /* XXX: Is it okay to fallback further? */
 1153                                 goto out;
 1154 
 1155                         /*
 1156                          * No more data region to be filled
 1157                          */
 1158                         offset += len;
 1159                         len = 0;
 1160                         error = 0;
 1161                         break;
 1162                 }
 1163                 KASSERT(noff >= offset, ("FIOSEEKDATA going backward"));
 1164                 if (noff != offset) {
 1165                         xfersize = omin(noff - offset, len);
 1166                         len -= xfersize;
 1167                         offset += xfersize;
 1168                         if (len == 0)
 1169                                 break;
 1170                 }
 1171                 error = vn_bmap_seekhole_locked(vp, FIOSEEKHOLE, &noff, cred);
 1172                 if (error)
 1173                         goto out;
 1174 
 1175                 /* Fill zeroes */
 1176                 xfersize = rem = omin(noff - offset, len);
 1177                 error = vp_zerofill(vp, &va, &offset, &rem, ap->a_ioflag, cred);
 1178                 if (error) {
 1179                         len -= xfersize - rem;
 1180                         goto out;
 1181                 }
 1182 
 1183                 len -= xfersize;
 1184                 if (should_yield())
 1185                         break;
 1186         }
 1187         /* Handle the case when offset is beyond EOF */
 1188         if (len < 0)
 1189                 len = 0;
 1190 out:
 1191         *ap->a_offset = offset;
 1192         *ap->a_len = len;
 1193         return (error);
 1194 }
 1195 
 1196 int
 1197 vop_stdadvise(struct vop_advise_args *ap)
 1198 {
 1199         struct vnode *vp;
 1200         struct bufobj *bo;
 1201         daddr_t startn, endn;
 1202         off_t bstart, bend, start, end;
 1203         int bsize, error;
 1204 
 1205         vp = ap->a_vp;
 1206         switch (ap->a_advice) {
 1207         case POSIX_FADV_WILLNEED:
 1208                 /*
 1209                  * Do nothing for now.  Filesystems should provide a
 1210                  * custom method which starts an asynchronous read of
 1211                  * the requested region.
 1212                  */
 1213                 error = 0;
 1214                 break;
 1215         case POSIX_FADV_DONTNEED:
 1216                 error = 0;
 1217                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1218                 if (VN_IS_DOOMED(vp)) {
 1219                         VOP_UNLOCK(vp);
 1220                         break;
 1221                 }
 1222 
 1223                 /*
 1224                  * Round to block boundaries (and later possibly further to
 1225                  * page boundaries).  Applications cannot reasonably be aware  
 1226                  * of the boundaries, and the rounding must be to expand at
 1227                  * both extremities to cover enough.  It still doesn't cover
 1228                  * read-ahead.  For partial blocks, this gives unnecessary
 1229                  * discarding of buffers but is efficient enough since the
 1230                  * pages usually remain in VMIO for some time.
 1231                  */
 1232                 bsize = vp->v_bufobj.bo_bsize;
 1233                 bstart = rounddown(ap->a_start, bsize);
 1234                 bend = roundup(ap->a_end, bsize);
 1235 
 1236                 /*
 1237                  * Deactivate pages in the specified range from the backing VM
 1238                  * object.  Pages that are resident in the buffer cache will
 1239                  * remain wired until their corresponding buffers are released
 1240                  * below.
 1241                  */
 1242                 if (vp->v_object != NULL) {
 1243                         start = trunc_page(bstart);
 1244                         end = round_page(bend);
 1245                         VM_OBJECT_RLOCK(vp->v_object);
 1246                         vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start),
 1247                             OFF_TO_IDX(end));
 1248                         VM_OBJECT_RUNLOCK(vp->v_object);
 1249                 }
 1250 
 1251                 bo = &vp->v_bufobj;
 1252                 BO_RLOCK(bo);
 1253                 startn = bstart / bsize;
 1254                 endn = bend / bsize;
 1255                 error = bnoreuselist(&bo->bo_clean, bo, startn, endn);
 1256                 if (error == 0)
 1257                         error = bnoreuselist(&bo->bo_dirty, bo, startn, endn);
 1258                 BO_RUNLOCK(bo);
 1259                 VOP_UNLOCK(vp);
 1260                 break;
 1261         default:
 1262                 error = EINVAL;
 1263                 break;
 1264         }
 1265         return (error);
 1266 }
 1267 
 1268 int
 1269 vop_stdunp_bind(struct vop_unp_bind_args *ap)
 1270 {
 1271 
 1272         ap->a_vp->v_unpcb = ap->a_unpcb;
 1273         return (0);
 1274 }
 1275 
 1276 int
 1277 vop_stdunp_connect(struct vop_unp_connect_args *ap)
 1278 {
 1279 
 1280         *ap->a_unpcb = ap->a_vp->v_unpcb;
 1281         return (0);
 1282 }
 1283 
 1284 int
 1285 vop_stdunp_detach(struct vop_unp_detach_args *ap)
 1286 {
 1287 
 1288         ap->a_vp->v_unpcb = NULL;
 1289         return (0);
 1290 }
 1291 
 1292 static int
 1293 vop_stdis_text(struct vop_is_text_args *ap)
 1294 {
 1295 
 1296         return (atomic_load_int(&ap->a_vp->v_writecount) < 0);
 1297 }
 1298 
 1299 int
 1300 vop_stdset_text(struct vop_set_text_args *ap)
 1301 {
 1302         struct vnode *vp;
 1303         int n;
 1304         bool gotref;
 1305 
 1306         vp = ap->a_vp;
 1307 
 1308         n = atomic_load_int(&vp->v_writecount);
 1309         for (;;) {
 1310                 if (__predict_false(n > 0)) {
 1311                         return (ETXTBSY);
 1312                 }
 1313 
 1314                 /*
 1315                  * Transition point, we may need to grab a reference on the vnode.
 1316                  *
 1317                  * Take the ref early As a safety measure against bogus calls
 1318                  * to vop_stdunset_text.
 1319                  */
 1320                 if (n == 0) {
 1321                         gotref = false;
 1322                         if ((vn_irflag_read(vp) & VIRF_TEXT_REF) != 0) {
 1323                                 vref(vp);
 1324                                 gotref = true;
 1325                         }
 1326                         if (atomic_fcmpset_int(&vp->v_writecount, &n, -1)) {
 1327                                 return (0);
 1328                         }
 1329                         if (gotref) {
 1330                                 vunref(vp);
 1331                         }
 1332                         continue;
 1333                 }
 1334 
 1335                 MPASS(n < 0);
 1336                 if (atomic_fcmpset_int(&vp->v_writecount, &n, n - 1)) {
 1337                         return (0);
 1338                 }
 1339         }
 1340         __assert_unreachable();
 1341 }
 1342 
 1343 static int
 1344 vop_stdunset_text(struct vop_unset_text_args *ap)
 1345 {
 1346         struct vnode *vp;
 1347         int n;
 1348 
 1349         vp = ap->a_vp;
 1350 
 1351         n = atomic_load_int(&vp->v_writecount);
 1352         for (;;) {
 1353                 if (__predict_false(n >= 0)) {
 1354                         return (EINVAL);
 1355                 }
 1356 
 1357                 /*
 1358                  * Transition point, we may need to release a reference on the vnode.
 1359                  */
 1360                 if (n == -1) {
 1361                         if (atomic_fcmpset_int(&vp->v_writecount, &n, 0)) {
 1362                                 if ((vn_irflag_read(vp) & VIRF_TEXT_REF) != 0) {
 1363                                         vunref(vp);
 1364                                 }
 1365                                 return (0);
 1366                         }
 1367                         continue;
 1368                 }
 1369 
 1370                 MPASS(n < -1);
 1371                 if (atomic_fcmpset_int(&vp->v_writecount, &n, n + 1)) {
 1372                         return (0);
 1373                 }
 1374         }
 1375         __assert_unreachable();
 1376 }
 1377 
 1378 static int __always_inline
 1379 vop_stdadd_writecount_impl(struct vop_add_writecount_args *ap, bool handle_msync)
 1380 {
 1381         struct vnode *vp;
 1382         struct mount *mp __diagused;
 1383         int n;
 1384 
 1385         vp = ap->a_vp;
 1386 
 1387 #ifdef INVARIANTS
 1388         mp = vp->v_mount;
 1389         if (mp != NULL) {
 1390                 if (handle_msync) {
 1391                         VNPASS((mp->mnt_kern_flag & MNTK_NOMSYNC) == 0, vp);
 1392                 } else {
 1393                         VNPASS((mp->mnt_kern_flag & MNTK_NOMSYNC) != 0, vp);
 1394                 }
 1395         }
 1396 #endif
 1397 
 1398         n = atomic_load_int(&vp->v_writecount);
 1399         for (;;) {
 1400                 if (__predict_false(n < 0)) {
 1401                         return (ETXTBSY);
 1402                 }
 1403 
 1404                 VNASSERT(n + ap->a_inc >= 0, vp,
 1405                     ("neg writecount increment %d + %d = %d", n, ap->a_inc,
 1406                     n + ap->a_inc));
 1407                 if (n == 0) {
 1408                         if (handle_msync) {
 1409                                 vlazy(vp);
 1410                         }
 1411                 }
 1412 
 1413                 if (atomic_fcmpset_int(&vp->v_writecount, &n, n + ap->a_inc)) {
 1414                         return (0);
 1415                 }
 1416         }
 1417         __assert_unreachable();
 1418 }
 1419 
 1420 int
 1421 vop_stdadd_writecount(struct vop_add_writecount_args *ap)
 1422 {
 1423 
 1424         return (vop_stdadd_writecount_impl(ap, true));
 1425 }
 1426 
 1427 int
 1428 vop_stdadd_writecount_nomsync(struct vop_add_writecount_args *ap)
 1429 {
 1430 
 1431         return (vop_stdadd_writecount_impl(ap, false));
 1432 }
 1433 
 1434 int
 1435 vop_stdneed_inactive(struct vop_need_inactive_args *ap)
 1436 {
 1437 
 1438         return (1);
 1439 }
 1440 
 1441 int
 1442 vop_stdioctl(struct vop_ioctl_args *ap)
 1443 {
 1444         struct vnode *vp;
 1445         struct vattr va;
 1446         off_t *offp;
 1447         int error;
 1448 
 1449         switch (ap->a_command) {
 1450         case FIOSEEKDATA:
 1451         case FIOSEEKHOLE:
 1452                 vp = ap->a_vp;
 1453                 error = vn_lock(vp, LK_SHARED);
 1454                 if (error != 0)
 1455                         return (EBADF);
 1456                 if (vp->v_type == VREG)
 1457                         error = VOP_GETATTR(vp, &va, ap->a_cred);
 1458                 else
 1459                         error = ENOTTY;
 1460                 if (error == 0) {
 1461                         offp = ap->a_data;
 1462                         if (*offp < 0 || *offp >= va.va_size)
 1463                                 error = ENXIO;
 1464                         else if (ap->a_command == FIOSEEKHOLE)
 1465                                 *offp = va.va_size;
 1466                 }
 1467                 VOP_UNLOCK(vp);
 1468                 break;
 1469         default:
 1470                 error = ENOTTY;
 1471                 break;
 1472         }
 1473         return (error);
 1474 }
 1475 
 1476 /*
 1477  * vfs default ops
 1478  * used to fill the vfs function table to get reasonable default return values.
 1479  */
 1480 int
 1481 vfs_stdroot (mp, flags, vpp)
 1482         struct mount *mp;
 1483         int flags;
 1484         struct vnode **vpp;
 1485 {
 1486 
 1487         return (EOPNOTSUPP);
 1488 }
 1489 
 1490 int
 1491 vfs_stdstatfs (mp, sbp)
 1492         struct mount *mp;
 1493         struct statfs *sbp;
 1494 {
 1495 
 1496         return (EOPNOTSUPP);
 1497 }
 1498 
 1499 int
 1500 vfs_stdquotactl (mp, cmds, uid, arg, mp_busy)
 1501         struct mount *mp;
 1502         int cmds;
 1503         uid_t uid;
 1504         void *arg;
 1505         bool *mp_busy;
 1506 {
 1507         return (EOPNOTSUPP);
 1508 }
 1509 
 1510 int
 1511 vfs_stdsync(mp, waitfor)
 1512         struct mount *mp;
 1513         int waitfor;
 1514 {
 1515         struct vnode *vp, *mvp;
 1516         struct thread *td;
 1517         int error, lockreq, allerror = 0;
 1518 
 1519         td = curthread;
 1520         lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
 1521         if (waitfor != MNT_WAIT)
 1522                 lockreq |= LK_NOWAIT;
 1523         /*
 1524          * Force stale buffer cache information to be flushed.
 1525          */
 1526 loop:
 1527         MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 1528                 if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
 1529                         VI_UNLOCK(vp);
 1530                         continue;
 1531                 }
 1532                 if ((error = vget(vp, lockreq)) != 0) {
 1533                         if (error == ENOENT) {
 1534                                 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 1535                                 goto loop;
 1536                         }
 1537                         continue;
 1538                 }
 1539                 error = VOP_FSYNC(vp, waitfor, td);
 1540                 if (error)
 1541                         allerror = error;
 1542                 vput(vp);
 1543         }
 1544         return (allerror);
 1545 }
 1546 
 1547 int
 1548 vfs_stdnosync (mp, waitfor)
 1549         struct mount *mp;
 1550         int waitfor;
 1551 {
 1552 
 1553         return (0);
 1554 }
 1555 
 1556 static int
 1557 vop_stdcopy_file_range(struct vop_copy_file_range_args *ap)
 1558 {
 1559         int error;
 1560 
 1561         error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
 1562             ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, ap->a_incred,
 1563             ap->a_outcred, ap->a_fsizetd);
 1564         return (error);
 1565 }
 1566 
 1567 int
 1568 vfs_stdvget (mp, ino, flags, vpp)
 1569         struct mount *mp;
 1570         ino_t ino;
 1571         int flags;
 1572         struct vnode **vpp;
 1573 {
 1574 
 1575         return (EOPNOTSUPP);
 1576 }
 1577 
 1578 int
 1579 vfs_stdfhtovp (mp, fhp, flags, vpp)
 1580         struct mount *mp;
 1581         struct fid *fhp;
 1582         int flags;
 1583         struct vnode **vpp;
 1584 {
 1585 
 1586         return (EOPNOTSUPP);
 1587 }
 1588 
 1589 int
 1590 vfs_stdinit (vfsp)
 1591         struct vfsconf *vfsp;
 1592 {
 1593 
 1594         return (0);
 1595 }
 1596 
 1597 int
 1598 vfs_stduninit (vfsp)
 1599         struct vfsconf *vfsp;
 1600 {
 1601 
 1602         return(0);
 1603 }
 1604 
 1605 int
 1606 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)
 1607         struct mount *mp;
 1608         int cmd;
 1609         struct vnode *filename_vp;
 1610         int attrnamespace;
 1611         const char *attrname;
 1612 {
 1613 
 1614         if (filename_vp != NULL)
 1615                 VOP_UNLOCK(filename_vp);
 1616         return (EOPNOTSUPP);
 1617 }
 1618 
 1619 int
 1620 vfs_stdsysctl(mp, op, req)
 1621         struct mount *mp;
 1622         fsctlop_t op;
 1623         struct sysctl_req *req;
 1624 {
 1625 
 1626         return (EOPNOTSUPP);
 1627 }
 1628 
 1629 static vop_bypass_t *
 1630 bp_by_off(struct vop_vector *vop, struct vop_generic_args *a)
 1631 {
 1632 
 1633         return (*(vop_bypass_t **)((char *)vop + a->a_desc->vdesc_vop_offset));
 1634 }
 1635 
 1636 int
 1637 vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a)
 1638 {
 1639         vop_bypass_t *bp;
 1640         int prev_stops, rc;
 1641 
 1642         bp = bp_by_off(vop, a);
 1643         MPASS(bp != NULL);
 1644 
 1645         prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 1646         rc = bp(a);
 1647         sigallowstop(prev_stops);
 1648         return (rc);
 1649 }
 1650 
 1651 static int
 1652 vop_stdstat(struct vop_stat_args *a)
 1653 {
 1654         struct vattr vattr;
 1655         struct vattr *vap;
 1656         struct vnode *vp;
 1657         struct stat *sb;
 1658         int error;
 1659         u_short mode;
 1660 
 1661         vp = a->a_vp;
 1662         sb = a->a_sb;
 1663 
 1664         error = vop_stat_helper_pre(a);
 1665         if (error != 0)
 1666                 return (error);
 1667 
 1668         vap = &vattr;
 1669 
 1670         /*
 1671          * Initialize defaults for new and unusual fields, so that file
 1672          * systems which don't support these fields don't need to know
 1673          * about them.
 1674          */
 1675         vap->va_birthtime.tv_sec = -1;
 1676         vap->va_birthtime.tv_nsec = 0;
 1677         vap->va_fsid = VNOVAL;
 1678         vap->va_gen = 0;
 1679         vap->va_rdev = NODEV;
 1680 
 1681         error = VOP_GETATTR(vp, vap, a->a_active_cred);
 1682         if (error)
 1683                 goto out;
 1684 
 1685         /*
 1686          * Zero the spare stat fields
 1687          */
 1688         bzero(sb, sizeof *sb);
 1689 
 1690         /*
 1691          * Copy from vattr table
 1692          */
 1693         if (vap->va_fsid != VNOVAL)
 1694                 sb->st_dev = vap->va_fsid;
 1695         else
 1696                 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
 1697         sb->st_ino = vap->va_fileid;
 1698         mode = vap->va_mode;
 1699         switch (vap->va_type) {
 1700         case VREG:
 1701                 mode |= S_IFREG;
 1702                 break;
 1703         case VDIR:
 1704                 mode |= S_IFDIR;
 1705                 break;
 1706         case VBLK:
 1707                 mode |= S_IFBLK;
 1708                 break;
 1709         case VCHR:
 1710                 mode |= S_IFCHR;
 1711                 break;
 1712         case VLNK:
 1713                 mode |= S_IFLNK;
 1714                 break;
 1715         case VSOCK:
 1716                 mode |= S_IFSOCK;
 1717                 break;
 1718         case VFIFO:
 1719                 mode |= S_IFIFO;
 1720                 break;
 1721         default:
 1722                 error = EBADF;
 1723                 goto out;
 1724         }
 1725         sb->st_mode = mode;
 1726         sb->st_nlink = vap->va_nlink;
 1727         sb->st_uid = vap->va_uid;
 1728         sb->st_gid = vap->va_gid;
 1729         sb->st_rdev = vap->va_rdev;
 1730         if (vap->va_size > OFF_MAX) {
 1731                 error = EOVERFLOW;
 1732                 goto out;
 1733         }
 1734         sb->st_size = vap->va_size;
 1735         sb->st_atim.tv_sec = vap->va_atime.tv_sec;
 1736         sb->st_atim.tv_nsec = vap->va_atime.tv_nsec;
 1737         sb->st_mtim.tv_sec = vap->va_mtime.tv_sec;
 1738         sb->st_mtim.tv_nsec = vap->va_mtime.tv_nsec;
 1739         sb->st_ctim.tv_sec = vap->va_ctime.tv_sec;
 1740         sb->st_ctim.tv_nsec = vap->va_ctime.tv_nsec;
 1741         sb->st_birthtim.tv_sec = vap->va_birthtime.tv_sec;
 1742         sb->st_birthtim.tv_nsec = vap->va_birthtime.tv_nsec;
 1743 
 1744         /*
 1745          * According to www.opengroup.org, the meaning of st_blksize is
 1746          *   "a filesystem-specific preferred I/O block size for this
 1747          *    object.  In some filesystem types, this may vary from file
 1748          *    to file"
 1749          * Use minimum/default of PAGE_SIZE (e.g. for VCHR).
 1750          */
 1751 
 1752         sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
 1753         sb->st_flags = vap->va_flags;
 1754         sb->st_blocks = vap->va_bytes / S_BLKSIZE;
 1755         sb->st_gen = vap->va_gen;
 1756 out:
 1757         return (vop_stat_helper_post(a, error));
 1758 }
 1759 
 1760 static int
 1761 vop_stdread_pgcache(struct vop_read_pgcache_args *ap __unused)
 1762 {
 1763         return (EJUSTRETURN);
 1764 }
 1765 
 1766 static int
 1767 vop_stdvput_pair(struct vop_vput_pair_args *ap)
 1768 {
 1769         struct vnode *dvp, *vp, **vpp;
 1770 
 1771         dvp = ap->a_dvp;
 1772         vpp = ap->a_vpp;
 1773         vput(dvp);
 1774         if (vpp != NULL && ap->a_unlock_vp && (vp = *vpp) != NULL)
 1775                 vput(vp);
 1776         return (0);
 1777 }

Cache object: 6878a4c365eaa40e2afa821295ef0319


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.