The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_lookup.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)vfs_lookup.c        8.4 (Berkeley) 2/16/94
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include "opt_capsicum.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/dirent.h>
   48 #include <sys/kernel.h>
   49 #include <sys/capsicum.h>
   50 #include <sys/fcntl.h>
   51 #include <sys/jail.h>
   52 #include <sys/lock.h>
   53 #include <sys/mutex.h>
   54 #include <sys/namei.h>
   55 #include <sys/vnode.h>
   56 #include <sys/mount.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/proc.h>
   59 #include <sys/sdt.h>
   60 #include <sys/syscallsubr.h>
   61 #include <sys/sysctl.h>
   62 #ifdef KTRACE
   63 #include <sys/ktrace.h>
   64 #endif
   65 #ifdef INVARIANTS
   66 #include <machine/_inttypes.h>
   67 #endif
   68 
   69 #include <security/audit/audit.h>
   70 #include <security/mac/mac_framework.h>
   71 
   72 #include <vm/uma.h>
   73 
   74 #define NAMEI_DIAGNOSTIC 1
   75 #undef NAMEI_DIAGNOSTIC
   76 
   77 #ifdef INVARIANTS
   78 static void NDVALIDATE_impl(struct nameidata *, int);
   79 #define NDVALIDATE(ndp) NDVALIDATE_impl(ndp, __LINE__)
   80 #else
   81 #define NDVALIDATE(ndp)
   82 #endif
   83 
   84 SDT_PROVIDER_DEFINE(vfs);
   85 SDT_PROBE_DEFINE4(vfs, namei, lookup, entry, "struct vnode *", "char *",
   86     "unsigned long", "bool");
   87 SDT_PROBE_DEFINE4(vfs, namei, lookup, return, "int", "struct vnode *", "bool",
   88     "struct nameidata");
   89 
   90 /* Allocation zone for namei. */
   91 uma_zone_t namei_zone;
   92 
   93 /* Placeholder vnode for mp traversal. */
   94 static struct vnode *vp_crossmp;
   95 
   96 static int
   97 crossmp_vop_islocked(struct vop_islocked_args *ap)
   98 {
   99 
  100         return (LK_SHARED);
  101 }
  102 
  103 static int
  104 crossmp_vop_lock1(struct vop_lock1_args *ap)
  105 {
  106         struct vnode *vp;
  107         struct lock *lk __diagused;
  108         int flags;
  109 
  110         vp = ap->a_vp;
  111         lk = vp->v_vnlock;
  112         flags = ap->a_flags;
  113 
  114         KASSERT((flags & (LK_SHARED | LK_NOWAIT)) == (LK_SHARED | LK_NOWAIT),
  115             ("%s: invalid lock request 0x%x for crossmp", __func__, flags));
  116 
  117         if ((flags & LK_INTERLOCK) != 0)
  118                 VI_UNLOCK(vp);
  119         LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, ap->a_file, ap->a_line);
  120         return (0);
  121 }
  122 
  123 static int
  124 crossmp_vop_unlock(struct vop_unlock_args *ap)
  125 {
  126         struct vnode *vp;
  127         struct lock *lk __diagused;
  128 
  129         vp = ap->a_vp;
  130         lk = vp->v_vnlock;
  131 
  132         LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, LOCK_FILE,
  133             LOCK_LINE);
  134         return (0);
  135 }
  136 
  137 static struct vop_vector crossmp_vnodeops = {
  138         .vop_default =          &default_vnodeops,
  139         .vop_islocked =         crossmp_vop_islocked,
  140         .vop_lock1 =            crossmp_vop_lock1,
  141         .vop_unlock =           crossmp_vop_unlock,
  142 };
  143 /*
  144  * VFS_VOP_VECTOR_REGISTER(crossmp_vnodeops) is not used here since the vnode
  145  * gets allocated early. See nameiinit for the direct call below.
  146  */
  147 
  148 struct nameicap_tracker {
  149         struct vnode *dp;
  150         TAILQ_ENTRY(nameicap_tracker) nm_link;
  151 };
  152 
  153 /* Zone for cap mode tracker elements used for dotdot capability checks. */
  154 MALLOC_DEFINE(M_NAMEITRACKER, "namei_tracker", "namei tracking for dotdot");
  155 
  156 static void
  157 nameiinit(void *dummy __unused)
  158 {
  159 
  160         namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL,
  161             UMA_ALIGN_PTR, 0);
  162         vfs_vector_op_register(&crossmp_vnodeops);
  163         getnewvnode("crossmp", NULL, &crossmp_vnodeops, &vp_crossmp);
  164         vp_crossmp->v_state = VSTATE_CONSTRUCTED;
  165 }
  166 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL);
  167 
  168 static int lookup_cap_dotdot = 1;
  169 SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot, CTLFLAG_RWTUN,
  170     &lookup_cap_dotdot, 0,
  171     "enables \"..\" components in path lookup in capability mode");
  172 static int lookup_cap_dotdot_nonlocal = 1;
  173 SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot_nonlocal, CTLFLAG_RWTUN,
  174     &lookup_cap_dotdot_nonlocal, 0,
  175     "enables \"..\" components in path lookup in capability mode "
  176     "on non-local mount");
  177 
  178 static void
  179 nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp)
  180 {
  181         struct nameicap_tracker *nt;
  182 
  183         if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR)
  184                 return;
  185         nt = TAILQ_LAST(&ndp->ni_cap_tracker, nameicap_tracker_head);
  186         if (nt != NULL && nt->dp == dp)
  187                 return;
  188         nt = malloc(sizeof(*nt), M_NAMEITRACKER, M_WAITOK);
  189         vhold(dp);
  190         nt->dp = dp;
  191         TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link);
  192 }
  193 
  194 static void
  195 nameicap_cleanup_from(struct nameidata *ndp, struct nameicap_tracker *first)
  196 {
  197         struct nameicap_tracker *nt, *nt1;
  198 
  199         nt = first;
  200         TAILQ_FOREACH_FROM_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) {
  201                 TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link);
  202                 vdrop(nt->dp);
  203                 free(nt, M_NAMEITRACKER);
  204         }
  205 }
  206 
  207 static void
  208 nameicap_cleanup(struct nameidata *ndp)
  209 {
  210         KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) ||
  211             (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative"));
  212         nameicap_cleanup_from(ndp, NULL);
  213 }
  214 
  215 /*
  216  * For dotdot lookups in capability mode, only allow the component
  217  * lookup to succeed if the resulting directory was already traversed
  218  * during the operation.  This catches situations where already
  219  * traversed directory is moved to different parent, and then we walk
  220  * over it with dotdots.
  221  *
  222  * Also allow to force failure of dotdot lookups for non-local
  223  * filesystems, where external agents might assist local lookups to
  224  * escape the compartment.
  225  */
  226 static int
  227 nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp)
  228 {
  229         struct nameicap_tracker *nt;
  230         struct mount *mp;
  231 
  232         if (dp == NULL || dp->v_type != VDIR || (ndp->ni_lcf &
  233             NI_LCF_STRICTRELATIVE) == 0)
  234                 return (0);
  235         if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0)
  236                 return (ENOTCAPABLE);
  237         mp = dp->v_mount;
  238         if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL &&
  239             (mp->mnt_flag & MNT_LOCAL) == 0)
  240                 return (ENOTCAPABLE);
  241         TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head,
  242             nm_link) {
  243                 if (dp == nt->dp) {
  244                         nt = TAILQ_NEXT(nt, nm_link);
  245                         if (nt != NULL)
  246                                 nameicap_cleanup_from(ndp, nt);
  247                         return (0);
  248                 }
  249         }
  250         return (ENOTCAPABLE);
  251 }
  252 
  253 static void
  254 namei_cleanup_cnp(struct componentname *cnp)
  255 {
  256 
  257         uma_zfree(namei_zone, cnp->cn_pnbuf);
  258         cnp->cn_pnbuf = NULL;
  259         cnp->cn_nameptr = NULL;
  260 }
  261 
  262 static int
  263 namei_handle_root(struct nameidata *ndp, struct vnode **dpp)
  264 {
  265         struct componentname *cnp;
  266 
  267         cnp = &ndp->ni_cnd;
  268         if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0) {
  269 #ifdef KTRACE
  270                 if (KTRPOINT(curthread, KTR_CAPFAIL))
  271                         ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
  272 #endif
  273                 return (ENOTCAPABLE);
  274         }
  275         while (*(cnp->cn_nameptr) == '/') {
  276                 cnp->cn_nameptr++;
  277                 ndp->ni_pathlen--;
  278         }
  279         *dpp = ndp->ni_rootdir;
  280         vrefact(*dpp);
  281         return (0);
  282 }
  283 
  284 static int
  285 namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp)
  286 {
  287         struct componentname *cnp;
  288         struct thread *td;
  289         struct pwd *pwd;
  290         int error;
  291         bool startdir_used;
  292 
  293         cnp = &ndp->ni_cnd;
  294         td = curthread;
  295 
  296         startdir_used = false;
  297         *pwdp = NULL;
  298         *dpp = NULL;
  299 
  300 #ifdef CAPABILITY_MODE
  301         /*
  302          * In capability mode, lookups must be restricted to happen in
  303          * the subtree with the root specified by the file descriptor:
  304          * - The root must be real file descriptor, not the pseudo-descriptor
  305          *   AT_FDCWD.
  306          * - The passed path must be relative and not absolute.
  307          * - If lookup_cap_dotdot is disabled, path must not contain the
  308          *   '..' components.
  309          * - If lookup_cap_dotdot is enabled, we verify that all '..'
  310          *   components lookups result in the directories which were
  311          *   previously walked by us, which prevents an escape from
  312          *   the relative root.
  313          */
  314         if (IN_CAPABILITY_MODE(td) && (cnp->cn_flags & NOCAPCHECK) == 0) {
  315                 ndp->ni_lcf |= NI_LCF_STRICTRELATIVE;
  316                 ndp->ni_resflags |= NIRES_STRICTREL;
  317                 if (ndp->ni_dirfd == AT_FDCWD) {
  318 #ifdef KTRACE
  319                         if (KTRPOINT(td, KTR_CAPFAIL))
  320                                 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
  321 #endif
  322                         return (ECAPMODE);
  323                 }
  324         }
  325 #endif
  326         error = 0;
  327 
  328         /*
  329          * Get starting point for the translation.
  330          */
  331         pwd = pwd_hold(td);
  332         /*
  333          * The reference on ni_rootdir is acquired in the block below to avoid
  334          * back-to-back atomics for absolute lookups.
  335          */
  336         ndp->ni_rootdir = pwd->pwd_rdir;
  337         ndp->ni_topdir = pwd->pwd_jdir;
  338 
  339         if (cnp->cn_pnbuf[0] == '/') {
  340                 ndp->ni_resflags |= NIRES_ABS;
  341                 error = namei_handle_root(ndp, dpp);
  342         } else {
  343                 if (ndp->ni_startdir != NULL) {
  344                         *dpp = ndp->ni_startdir;
  345                         startdir_used = true;
  346                 } else if (ndp->ni_dirfd == AT_FDCWD) {
  347                         *dpp = pwd->pwd_cdir;
  348                         vrefact(*dpp);
  349                 } else {
  350                         if (cnp->cn_flags & AUDITVNODE1)
  351                                 AUDIT_ARG_ATFD1(ndp->ni_dirfd);
  352                         if (cnp->cn_flags & AUDITVNODE2)
  353                                 AUDIT_ARG_ATFD2(ndp->ni_dirfd);
  354 
  355                         error = fgetvp_lookup(ndp->ni_dirfd, ndp, dpp);
  356                 }
  357                 if (error == 0 && (*dpp)->v_type != VDIR &&
  358                     (cnp->cn_pnbuf[0] != '\0' ||
  359                     (cnp->cn_flags & EMPTYPATH) == 0))
  360                         error = ENOTDIR;
  361         }
  362         if (error == 0 && (cnp->cn_flags & RBENEATH) != 0) {
  363                 if (cnp->cn_pnbuf[0] == '/') {
  364                         error = ENOTCAPABLE;
  365                 } else if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) == 0) {
  366                         ndp->ni_lcf |= NI_LCF_STRICTRELATIVE |
  367                             NI_LCF_CAP_DOTDOT;
  368                 }
  369         }
  370 
  371         /*
  372          * If we are auditing the kernel pathname, save the user pathname.
  373          */
  374         if (cnp->cn_flags & AUDITVNODE1)
  375                 AUDIT_ARG_UPATH1_VP(td, ndp->ni_rootdir, *dpp, cnp->cn_pnbuf);
  376         if (cnp->cn_flags & AUDITVNODE2)
  377                 AUDIT_ARG_UPATH2_VP(td, ndp->ni_rootdir, *dpp, cnp->cn_pnbuf);
  378         if (ndp->ni_startdir != NULL && !startdir_used)
  379                 vrele(ndp->ni_startdir);
  380         if (error != 0) {
  381                 if (*dpp != NULL)
  382                         vrele(*dpp);
  383                 pwd_drop(pwd);
  384                 return (error);
  385         }
  386         if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 &&
  387             lookup_cap_dotdot != 0)
  388                 ndp->ni_lcf |= NI_LCF_CAP_DOTDOT;
  389         SDT_PROBE4(vfs, namei, lookup, entry, *dpp, cnp->cn_pnbuf,
  390             cnp->cn_flags, false);
  391         *pwdp = pwd;
  392         return (0);
  393 }
  394 
  395 static int
  396 namei_getpath(struct nameidata *ndp)
  397 {
  398         struct componentname *cnp;
  399         int error;
  400 
  401         cnp = &ndp->ni_cnd;
  402 
  403         /*
  404          * Get a buffer for the name to be translated, and copy the
  405          * name into the buffer.
  406          */
  407         cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
  408         if (ndp->ni_segflg == UIO_SYSSPACE) {
  409                 error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN,
  410                     &ndp->ni_pathlen);
  411         } else {
  412                 error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN,
  413                     &ndp->ni_pathlen);
  414         }
  415 
  416         return (error);
  417 }
  418 
  419 static int
  420 namei_emptypath(struct nameidata *ndp)
  421 {
  422         struct componentname *cnp;
  423         struct pwd *pwd;
  424         struct vnode *dp;
  425         int error;
  426 
  427         cnp = &ndp->ni_cnd;
  428         MPASS(*cnp->cn_pnbuf == '\0');
  429         MPASS((cnp->cn_flags & EMPTYPATH) != 0);
  430         MPASS((cnp->cn_flags & (LOCKPARENT | WANTPARENT)) == 0);
  431 
  432         ndp->ni_resflags |= NIRES_EMPTYPATH;
  433         error = namei_setup(ndp, &dp, &pwd);
  434         if (error != 0) {
  435                 goto errout;
  436         }
  437 
  438         /*
  439          * Usecount on dp already provided by namei_setup.
  440          */
  441         ndp->ni_vp = dp;
  442         pwd_drop(pwd);
  443         NDVALIDATE(ndp);
  444         if ((cnp->cn_flags & LOCKLEAF) != 0) {
  445                 VOP_LOCK(dp, (cnp->cn_flags & LOCKSHARED) != 0 ?
  446                     LK_SHARED : LK_EXCLUSIVE);
  447                 if (VN_IS_DOOMED(dp)) {
  448                         vput(dp);
  449                         error = ENOENT;
  450                         goto errout;
  451                 }
  452         }
  453         SDT_PROBE4(vfs, namei, lookup, return, 0, ndp->ni_vp, false, ndp);
  454         return (0);
  455 
  456 errout:
  457         SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp);
  458         namei_cleanup_cnp(cnp);
  459         return (error);
  460 }
  461 
  462 static int __noinline
  463 namei_follow_link(struct nameidata *ndp)
  464 {
  465         char *cp;
  466         struct iovec aiov;
  467         struct uio auio;
  468         struct componentname *cnp;
  469         struct thread *td;
  470         int error, linklen;
  471 
  472         error = 0;
  473         cnp = &ndp->ni_cnd;
  474         td = curthread;
  475 
  476         if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
  477                 error = ELOOP;
  478                 goto out;
  479         }
  480 #ifdef MAC
  481         if ((cnp->cn_flags & NOMACCHECK) == 0) {
  482                 error = mac_vnode_check_readlink(td->td_ucred, ndp->ni_vp);
  483                 if (error != 0)
  484                         goto out;
  485         }
  486 #endif
  487         if (ndp->ni_pathlen > 1)
  488                 cp = uma_zalloc(namei_zone, M_WAITOK);
  489         else
  490                 cp = cnp->cn_pnbuf;
  491         aiov.iov_base = cp;
  492         aiov.iov_len = MAXPATHLEN;
  493         auio.uio_iov = &aiov;
  494         auio.uio_iovcnt = 1;
  495         auio.uio_offset = 0;
  496         auio.uio_rw = UIO_READ;
  497         auio.uio_segflg = UIO_SYSSPACE;
  498         auio.uio_td = td;
  499         auio.uio_resid = MAXPATHLEN;
  500         error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
  501         if (error != 0) {
  502                 if (ndp->ni_pathlen > 1)
  503                         uma_zfree(namei_zone, cp);
  504                 goto out;
  505         }
  506         linklen = MAXPATHLEN - auio.uio_resid;
  507         if (linklen == 0) {
  508                 if (ndp->ni_pathlen > 1)
  509                         uma_zfree(namei_zone, cp);
  510                 error = ENOENT;
  511                 goto out;
  512         }
  513         if (linklen + ndp->ni_pathlen > MAXPATHLEN) {
  514                 if (ndp->ni_pathlen > 1)
  515                         uma_zfree(namei_zone, cp);
  516                 error = ENAMETOOLONG;
  517                 goto out;
  518         }
  519         if (ndp->ni_pathlen > 1) {
  520                 bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
  521                 uma_zfree(namei_zone, cnp->cn_pnbuf);
  522                 cnp->cn_pnbuf = cp;
  523         } else
  524                 cnp->cn_pnbuf[linklen] = '\0';
  525         ndp->ni_pathlen += linklen;
  526 out:
  527         return (error);
  528 }
  529 
  530 /*
  531  * Convert a pathname into a pointer to a locked vnode.
  532  *
  533  * The FOLLOW flag is set when symbolic links are to be followed
  534  * when they occur at the end of the name translation process.
  535  * Symbolic links are always followed for all other pathname
  536  * components other than the last.
  537  *
  538  * The segflg defines whether the name is to be copied from user
  539  * space or kernel space.
  540  *
  541  * Overall outline of namei:
  542  *
  543  *      copy in name
  544  *      get starting directory
  545  *      while (!done && !error) {
  546  *              call lookup to search path.
  547  *              if symbolic link, massage name in buffer and continue
  548  *      }
  549  */
  550 int
  551 namei(struct nameidata *ndp)
  552 {
  553         struct vnode *dp;       /* the directory we are searching */
  554         struct componentname *cnp;
  555         struct thread *td;
  556         struct pwd *pwd;
  557         int error;
  558         enum cache_fpl_status status;
  559 
  560         cnp = &ndp->ni_cnd;
  561         td = curthread;
  562 #ifdef INVARIANTS
  563         KASSERT((ndp->ni_debugflags & NAMEI_DBG_CALLED) == 0,
  564             ("%s: repeated call to namei without NDREINIT", __func__));
  565         KASSERT(ndp->ni_debugflags == NAMEI_DBG_INITED,
  566             ("%s: bad debugflags %d", __func__, ndp->ni_debugflags));
  567         ndp->ni_debugflags |= NAMEI_DBG_CALLED;
  568         if (ndp->ni_startdir != NULL)
  569                 ndp->ni_debugflags |= NAMEI_DBG_HADSTARTDIR;
  570         if (cnp->cn_flags & FAILIFEXISTS) {
  571                 KASSERT(cnp->cn_nameiop == CREATE,
  572                     ("%s: FAILIFEXISTS passed for op %d", __func__, cnp->cn_nameiop));
  573                 /*
  574                  * The limitation below is to restrict hairy corner cases.
  575                  */
  576                 KASSERT((cnp->cn_flags & (LOCKPARENT | LOCKLEAF)) == LOCKPARENT,
  577                     ("%s: FAILIFEXISTS must be passed with LOCKPARENT and without LOCKLEAF",
  578                     __func__));
  579         }
  580 #endif
  581         ndp->ni_cnd.cn_cred = td->td_ucred;
  582         KASSERT(ndp->ni_resflags == 0, ("%s: garbage in ni_resflags: %x\n",
  583             __func__, ndp->ni_resflags));
  584         KASSERT(cnp->cn_cred && td->td_proc, ("namei: bad cred/proc"));
  585         KASSERT((cnp->cn_flags & NAMEI_INTERNAL_FLAGS) == 0,
  586             ("namei: unexpected flags: %" PRIx64 "\n",
  587             cnp->cn_flags & NAMEI_INTERNAL_FLAGS));
  588         if (cnp->cn_flags & NOCACHE)
  589                 KASSERT(cnp->cn_nameiop != LOOKUP,
  590                     ("%s: NOCACHE passed with LOOKUP", __func__));
  591         MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR ||
  592             ndp->ni_startdir->v_type == VBAD);
  593 
  594         ndp->ni_lcf = 0;
  595         ndp->ni_loopcnt = 0;
  596         ndp->ni_vp = NULL;
  597 
  598         error = namei_getpath(ndp);
  599         if (__predict_false(error != 0)) {
  600                 namei_cleanup_cnp(cnp);
  601                 SDT_PROBE4(vfs, namei, lookup, return, error, NULL,
  602                     false, ndp);
  603                 return (error);
  604         }
  605 
  606         cnp->cn_nameptr = cnp->cn_pnbuf;
  607 
  608 #ifdef KTRACE
  609         if (KTRPOINT(td, KTR_NAMEI)) {
  610                 ktrnamei(cnp->cn_pnbuf);
  611         }
  612 #endif
  613         TSNAMEI(curthread->td_proc->p_pid, cnp->cn_pnbuf);
  614 
  615         /*
  616          * First try looking up the target without locking any vnodes.
  617          *
  618          * We may need to start from scratch or pick up where it left off.
  619          */
  620         error = cache_fplookup(ndp, &status, &pwd);
  621         switch (status) {
  622         case CACHE_FPL_STATUS_UNSET:
  623                 __assert_unreachable();
  624                 break;
  625         case CACHE_FPL_STATUS_HANDLED:
  626                 if (error == 0)
  627                         NDVALIDATE(ndp);
  628                 return (error);
  629         case CACHE_FPL_STATUS_PARTIAL:
  630                 TAILQ_INIT(&ndp->ni_cap_tracker);
  631                 dp = ndp->ni_startdir;
  632                 break;
  633         case CACHE_FPL_STATUS_DESTROYED:
  634                 ndp->ni_loopcnt = 0;
  635                 error = namei_getpath(ndp);
  636                 if (__predict_false(error != 0)) {
  637                         namei_cleanup_cnp(cnp);
  638                         return (error);
  639                 }
  640                 cnp->cn_nameptr = cnp->cn_pnbuf;
  641                 /* FALLTHROUGH */
  642         case CACHE_FPL_STATUS_ABORTED:
  643                 TAILQ_INIT(&ndp->ni_cap_tracker);
  644                 MPASS(ndp->ni_lcf == 0);
  645                 if (*cnp->cn_pnbuf == '\0') {
  646                         if ((cnp->cn_flags & EMPTYPATH) != 0) {
  647                                 return (namei_emptypath(ndp));
  648                         }
  649                         namei_cleanup_cnp(cnp);
  650                         SDT_PROBE4(vfs, namei, lookup, return, ENOENT, NULL,
  651                             false, ndp);
  652                         return (ENOENT);
  653                 }
  654                 error = namei_setup(ndp, &dp, &pwd);
  655                 if (error != 0) {
  656                         namei_cleanup_cnp(cnp);
  657                         return (error);
  658                 }
  659                 break;
  660         }
  661 
  662         /*
  663          * Locked lookup.
  664          */
  665         for (;;) {
  666                 ndp->ni_startdir = dp;
  667                 error = vfs_lookup(ndp);
  668                 if (error != 0)
  669                         goto out;
  670 
  671                 /*
  672                  * If not a symbolic link, we're done.
  673                  */
  674                 if ((cnp->cn_flags & ISSYMLINK) == 0) {
  675                         SDT_PROBE4(vfs, namei, lookup, return, error,
  676                             ndp->ni_vp, false, ndp);
  677                         nameicap_cleanup(ndp);
  678                         pwd_drop(pwd);
  679                         NDVALIDATE(ndp);
  680                         return (0);
  681                 }
  682                 error = namei_follow_link(ndp);
  683                 if (error != 0)
  684                         break;
  685                 vput(ndp->ni_vp);
  686                 dp = ndp->ni_dvp;
  687                 /*
  688                  * Check if root directory should replace current directory.
  689                  */
  690                 cnp->cn_nameptr = cnp->cn_pnbuf;
  691                 if (*(cnp->cn_nameptr) == '/') {
  692                         vrele(dp);
  693                         error = namei_handle_root(ndp, &dp);
  694                         if (error != 0)
  695                                 goto out;
  696                 }
  697         }
  698         vput(ndp->ni_vp);
  699         ndp->ni_vp = NULL;
  700         vrele(ndp->ni_dvp);
  701 out:
  702         MPASS(error != 0);
  703         SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp);
  704         namei_cleanup_cnp(cnp);
  705         nameicap_cleanup(ndp);
  706         pwd_drop(pwd);
  707         return (error);
  708 }
  709 
  710 static int
  711 compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags)
  712 {
  713 
  714         if (mp == NULL || ((lkflags & LK_SHARED) &&
  715             !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) {
  716                 lkflags &= ~LK_SHARED;
  717                 lkflags |= LK_EXCLUSIVE;
  718         }
  719         lkflags |= LK_NODDLKTREAT;
  720         return (lkflags);
  721 }
  722 
  723 static __inline int
  724 needs_exclusive_leaf(struct mount *mp, int flags)
  725 {
  726 
  727         /*
  728          * Intermediate nodes can use shared locks, we only need to
  729          * force an exclusive lock for leaf nodes.
  730          */
  731         if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF))
  732                 return (0);
  733 
  734         /* Always use exclusive locks if LOCKSHARED isn't set. */
  735         if (!(flags & LOCKSHARED))
  736                 return (1);
  737 
  738         /*
  739          * For lookups during open(), if the mount point supports
  740          * extended shared operations, then use a shared lock for the
  741          * leaf node, otherwise use an exclusive lock.
  742          */
  743         if ((flags & ISOPEN) != 0)
  744                 return (!MNT_EXTENDED_SHARED(mp));
  745 
  746         /*
  747          * Lookup requests outside of open() that specify LOCKSHARED
  748          * only need a shared lock on the leaf vnode.
  749          */
  750         return (0);
  751 }
  752 
  753 /*
  754  * Various filesystems expect to be able to copy a name component with length
  755  * bounded by NAME_MAX into a directory entry buffer of size MAXNAMLEN.  Make
  756  * sure that these are the same size.
  757  */
  758 _Static_assert(MAXNAMLEN == NAME_MAX,
  759     "MAXNAMLEN and NAME_MAX have different values");
  760 
  761 static int __noinline
  762 vfs_lookup_degenerate(struct nameidata *ndp, struct vnode *dp, int wantparent)
  763 {
  764         struct componentname *cnp;
  765         struct mount *mp;
  766         int error;
  767 
  768         cnp = &ndp->ni_cnd;
  769 
  770         cnp->cn_flags |= ISLASTCN;
  771 
  772         mp = atomic_load_ptr(&dp->v_mount);
  773         if (needs_exclusive_leaf(mp, cnp->cn_flags)) {
  774                 cnp->cn_lkflags &= ~LK_SHARED;
  775                 cnp->cn_lkflags |= LK_EXCLUSIVE;
  776         }
  777 
  778         vn_lock(dp,
  779             compute_cn_lkflags(mp, cnp->cn_lkflags | LK_RETRY,
  780             cnp->cn_flags));
  781 
  782         if (dp->v_type != VDIR) {
  783                 error = ENOTDIR;
  784                 goto bad;
  785         }
  786         if (cnp->cn_nameiop != LOOKUP) {
  787                 error = EISDIR;
  788                 goto bad;
  789         }
  790         if (wantparent) {
  791                 ndp->ni_dvp = dp;
  792                 VREF(dp);
  793         }
  794         ndp->ni_vp = dp;
  795         cnp->cn_namelen = 0;
  796 
  797         if (cnp->cn_flags & AUDITVNODE1)
  798                 AUDIT_ARG_VNODE1(dp);
  799         else if (cnp->cn_flags & AUDITVNODE2)
  800                 AUDIT_ARG_VNODE2(dp);
  801 
  802         if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF)))
  803                 VOP_UNLOCK(dp);
  804         return (0);
  805 bad:
  806         VOP_UNLOCK(dp);
  807         return (error);
  808 }
  809 
  810 /*
  811  * FAILIFEXISTS handling.
  812  *
  813  * XXX namei called with LOCKPARENT but not LOCKLEAF has the strange
  814  * behaviour of leaving the vnode unlocked if the target is the same
  815  * vnode as the parent.
  816  */
  817 static int __noinline
  818 vfs_lookup_failifexists(struct nameidata *ndp)
  819 {
  820         struct componentname *cnp __diagused;
  821 
  822         cnp = &ndp->ni_cnd;
  823 
  824         MPASS((cnp->cn_flags & ISSYMLINK) == 0);
  825         if (ndp->ni_vp == ndp->ni_dvp)
  826                 vrele(ndp->ni_dvp);
  827         else
  828                 vput(ndp->ni_dvp);
  829         vrele(ndp->ni_vp);
  830         ndp->ni_dvp = NULL;
  831         ndp->ni_vp = NULL;
  832         NDFREE_PNBUF(ndp);
  833         return (EEXIST);
  834 }
  835 
  836 /*
  837  * Search a pathname.
  838  * This is a very central and rather complicated routine.
  839  *
  840  * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
  841  * The starting directory is taken from ni_startdir. The pathname is
  842  * descended until done, or a symbolic link is encountered. The variable
  843  * ni_more is clear if the path is completed; it is set to one if a
  844  * symbolic link needing interpretation is encountered.
  845  *
  846  * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
  847  * whether the name is to be looked up, created, renamed, or deleted.
  848  * When CREATE, RENAME, or DELETE is specified, information usable in
  849  * creating, renaming, or deleting a directory entry may be calculated.
  850  * If flag has LOCKPARENT or'ed into it, the parent directory is returned
  851  * locked. If flag has WANTPARENT or'ed into it, the parent directory is
  852  * returned unlocked. Otherwise the parent directory is not returned. If
  853  * the target of the pathname exists and LOCKLEAF is or'ed into the flag
  854  * the target is returned locked, otherwise it is returned unlocked.
  855  * When creating or renaming and LOCKPARENT is specified, the target may not
  856  * be ".".  When deleting and LOCKPARENT is specified, the target may be ".".
  857  *
  858  * Overall outline of lookup:
  859  *
  860  * dirloop:
  861  *      identify next component of name at ndp->ni_ptr
  862  *      handle degenerate case where name is null string
  863  *      if .. and crossing mount points and on mounted filesys, find parent
  864  *      call VOP_LOOKUP routine for next component name
  865  *          directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
  866  *          component vnode returned in ni_vp (if it exists), locked.
  867  *      if result vnode is mounted on and crossing mount points,
  868  *          find mounted on vnode
  869  *      if more components of name, do next level at dirloop
  870  *      return the answer in ni_vp, locked if LOCKLEAF set
  871  *          if LOCKPARENT set, return locked parent in ni_dvp
  872  *          if WANTPARENT set, return unlocked parent in ni_dvp
  873  */
  874 int
  875 vfs_lookup(struct nameidata *ndp)
  876 {
  877         char *cp;                       /* pointer into pathname argument */
  878         char *prev_ni_next;             /* saved ndp->ni_next */
  879         char *nulchar;                  /* location of '\0' in cn_pnbuf */
  880         char *lastchar;                 /* location of the last character */
  881         struct vnode *dp = NULL;        /* the directory we are searching */
  882         struct vnode *tdp;              /* saved dp */
  883         struct mount *mp;               /* mount table entry */
  884         struct prison *pr;
  885         size_t prev_ni_pathlen;         /* saved ndp->ni_pathlen */
  886         int docache;                    /* == 0 do not cache last component */
  887         int wantparent;                 /* 1 => wantparent or lockparent flag */
  888         int rdonly;                     /* lookup read-only flag bit */
  889         int error = 0;
  890         int dpunlocked = 0;             /* dp has already been unlocked */
  891         int relookup = 0;               /* do not consume the path component */
  892         struct componentname *cnp = &ndp->ni_cnd;
  893         int lkflags_save;
  894         int ni_dvp_unlocked;
  895         int crosslkflags;
  896         bool crosslock;
  897 
  898         /*
  899          * Setup: break out flag bits into variables.
  900          */
  901         ni_dvp_unlocked = 0;
  902         wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
  903         KASSERT(cnp->cn_nameiop == LOOKUP || wantparent,
  904             ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT."));
  905         /*
  906          * When set to zero, docache causes the last component of the
  907          * pathname to be deleted from the cache and the full lookup
  908          * of the name to be done (via VOP_CACHEDLOOKUP()). Often
  909          * filesystems need some pre-computed values that are made
  910          * during the full lookup, for instance UFS sets dp->i_offset.
  911          *
  912          * The docache variable is set to zero when requested by the
  913          * NOCACHE flag and for all modifying operations except CREATE.
  914          */
  915         docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
  916         if (cnp->cn_nameiop == DELETE ||
  917             (wantparent && cnp->cn_nameiop != CREATE &&
  918              cnp->cn_nameiop != LOOKUP))
  919                 docache = 0;
  920         rdonly = cnp->cn_flags & RDONLY;
  921         cnp->cn_flags &= ~ISSYMLINK;
  922         ndp->ni_dvp = NULL;
  923 
  924         cnp->cn_lkflags = LK_SHARED;
  925         dp = ndp->ni_startdir;
  926         ndp->ni_startdir = NULLVP;
  927 
  928         /*
  929          * Leading slashes, if any, are supposed to be skipped by the caller.
  930          */
  931         MPASS(cnp->cn_nameptr[0] != '/');
  932 
  933         /*
  934          * Check for degenerate name (e.g. / or "") which is a way of talking
  935          * about a directory, e.g. like "/." or ".".
  936          */
  937         if (__predict_false(cnp->cn_nameptr[0] == '\0')) {
  938                 error = vfs_lookup_degenerate(ndp, dp, wantparent);
  939                 if (error == 0)
  940                         goto success_right_lock;
  941                 goto bad_unlocked;
  942         }
  943 
  944         /*
  945          * Nul-out trailing slashes (e.g., "foo///" -> "foo").
  946          *
  947          * This must be done before VOP_LOOKUP() because some fs's don't know
  948          * about trailing slashes.  Remember if there were trailing slashes to
  949          * handle symlinks, existing non-directories and non-existing files
  950          * that won't be directories specially later.
  951          */
  952         MPASS(ndp->ni_pathlen >= 2);
  953         lastchar = &cnp->cn_nameptr[ndp->ni_pathlen - 2];
  954         if (*lastchar == '/') {
  955                 while (lastchar >= cnp->cn_pnbuf) {
  956                         *lastchar = '\0';
  957                         lastchar--;
  958                         ndp->ni_pathlen--;
  959                         if (*lastchar != '/') {
  960                                 break;
  961                         }
  962                 }
  963                 cnp->cn_flags |= TRAILINGSLASH;
  964         }
  965 
  966         /*
  967          * We use shared locks until we hit the parent of the last cn then
  968          * we adjust based on the requesting flags.
  969          */
  970         vn_lock(dp,
  971             compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY,
  972             cnp->cn_flags));
  973 
  974 dirloop:
  975         /*
  976          * Search a new directory.
  977          *
  978          * The last component of the filename is left accessible via
  979          * cnp->cn_nameptr. It has to be freed with a call to NDFREE*.
  980          *
  981          * Store / as a temporary sentinel so that we only have one character
  982          * to test for. Pathnames tend to be short so this should not be
  983          * resulting in cache misses.
  984          */
  985         nulchar = &cnp->cn_nameptr[ndp->ni_pathlen - 1];
  986         KASSERT(*nulchar == '\0',
  987             ("%s: expected nul at %p; string [%s]\n", __func__, nulchar,
  988             cnp->cn_pnbuf));
  989         *nulchar = '/';
  990         for (cp = cnp->cn_nameptr; *cp != '/'; cp++) {
  991                 KASSERT(*cp != '\0',
  992                     ("%s: encountered unexpected nul; string [%s]\n", __func__,
  993                     cnp->cn_nameptr));
  994                 continue;
  995         }
  996         *nulchar = '\0';
  997         cnp->cn_namelen = cp - cnp->cn_nameptr;
  998         if (__predict_false(cnp->cn_namelen > NAME_MAX)) {
  999                 error = ENAMETOOLONG;
 1000                 goto bad;
 1001         }
 1002 #ifdef NAMEI_DIAGNOSTIC
 1003         { char c = *cp;
 1004         *cp = '\0';
 1005         printf("{%s}: ", cnp->cn_nameptr);
 1006         *cp = c; }
 1007 #endif
 1008         prev_ni_pathlen = ndp->ni_pathlen;
 1009         ndp->ni_pathlen -= cnp->cn_namelen;
 1010         KASSERT(ndp->ni_pathlen <= PATH_MAX,
 1011             ("%s: ni_pathlen underflow to %zd\n", __func__, ndp->ni_pathlen));
 1012         prev_ni_next = ndp->ni_next;
 1013         ndp->ni_next = cp;
 1014 
 1015         /*
 1016          * Something else should be clearing this.
 1017          */
 1018         cnp->cn_flags &= ~(ISDOTDOT|ISLASTCN);
 1019 
 1020         cnp->cn_flags |= MAKEENTRY;
 1021         if (*cp == '\0' && docache == 0)
 1022                 cnp->cn_flags &= ~MAKEENTRY;
 1023         if (cnp->cn_namelen == 2 &&
 1024             cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
 1025                 cnp->cn_flags |= ISDOTDOT;
 1026         if (*ndp->ni_next == 0) {
 1027                 cnp->cn_flags |= ISLASTCN;
 1028 
 1029                 if (__predict_false(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' &&
 1030                     (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))) {
 1031                         error = EINVAL;
 1032                         goto bad;
 1033                 }
 1034         }
 1035 
 1036         nameicap_tracker_add(ndp, dp);
 1037 
 1038         /*
 1039          * Make sure degenerate names don't get here, their handling was
 1040          * previously found in this spot.
 1041          */
 1042         MPASS(cnp->cn_nameptr[0] != '\0');
 1043 
 1044         /*
 1045          * Handle "..": five special cases.
 1046          * 0. If doing a capability lookup and lookup_cap_dotdot is
 1047          *    disabled, return ENOTCAPABLE.
 1048          * 1. Return an error if this is the last component of
 1049          *    the name and the operation is DELETE or RENAME.
 1050          * 2. If at root directory (e.g. after chroot)
 1051          *    or at absolute root directory
 1052          *    then ignore it so can't get out.
 1053          * 3. If this vnode is the root of a mounted
 1054          *    filesystem, then replace it with the
 1055          *    vnode which was mounted on so we take the
 1056          *    .. in the other filesystem.
 1057          * 4. If the vnode is the top directory of
 1058          *    the jail or chroot, don't let them out.
 1059          * 5. If doing a capability lookup and lookup_cap_dotdot is
 1060          *    enabled, return ENOTCAPABLE if the lookup would escape
 1061          *    from the initial file descriptor directory.  Checks are
 1062          *    done by ensuring that namei() already traversed the
 1063          *    result of dotdot lookup.
 1064          */
 1065         if (cnp->cn_flags & ISDOTDOT) {
 1066                 if ((ndp->ni_lcf & (NI_LCF_STRICTRELATIVE | NI_LCF_CAP_DOTDOT))
 1067                     == NI_LCF_STRICTRELATIVE) {
 1068 #ifdef KTRACE
 1069                         if (KTRPOINT(curthread, KTR_CAPFAIL))
 1070                                 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
 1071 #endif
 1072                         error = ENOTCAPABLE;
 1073                         goto bad;
 1074                 }
 1075                 if ((cnp->cn_flags & ISLASTCN) != 0 &&
 1076                     (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
 1077                         error = EINVAL;
 1078                         goto bad;
 1079                 }
 1080                 for (;;) {
 1081                         for (pr = cnp->cn_cred->cr_prison; pr != NULL;
 1082                              pr = pr->pr_parent)
 1083                                 if (dp == pr->pr_root)
 1084                                         break;
 1085                         if (dp == ndp->ni_rootdir || 
 1086                             dp == ndp->ni_topdir || 
 1087                             dp == rootvnode ||
 1088                             pr != NULL ||
 1089                             ((dp->v_vflag & VV_ROOT) != 0 &&
 1090                              (cnp->cn_flags & NOCROSSMOUNT) != 0)) {
 1091                                 ndp->ni_dvp = dp;
 1092                                 ndp->ni_vp = dp;
 1093                                 VREF(dp);
 1094                                 goto nextname;
 1095                         }
 1096                         if ((dp->v_vflag & VV_ROOT) == 0)
 1097                                 break;
 1098                         if (VN_IS_DOOMED(dp)) { /* forced unmount */
 1099                                 error = ENOENT;
 1100                                 goto bad;
 1101                         }
 1102                         tdp = dp;
 1103                         dp = dp->v_mount->mnt_vnodecovered;
 1104                         VREF(dp);
 1105                         vput(tdp);
 1106                         vn_lock(dp,
 1107                             compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
 1108                             LK_RETRY, ISDOTDOT));
 1109                         error = nameicap_check_dotdot(ndp, dp);
 1110                         if (error != 0) {
 1111 #ifdef KTRACE
 1112                                 if (KTRPOINT(curthread, KTR_CAPFAIL))
 1113                                         ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
 1114 #endif
 1115                                 goto bad;
 1116                         }
 1117                 }
 1118         }
 1119 
 1120         /*
 1121          * We now have a segment name to search for, and a directory to search.
 1122          */
 1123 unionlookup:
 1124 #ifdef MAC
 1125         error = mac_vnode_check_lookup(cnp->cn_cred, dp, cnp);
 1126         if (__predict_false(error))
 1127                 goto bad;
 1128 #endif
 1129         ndp->ni_dvp = dp;
 1130         ndp->ni_vp = NULL;
 1131         ASSERT_VOP_LOCKED(dp, "lookup");
 1132         /*
 1133          * If we have a shared lock we may need to upgrade the lock for the
 1134          * last operation.
 1135          */
 1136         if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN) &&
 1137             dp != vp_crossmp && VOP_ISLOCKED(dp) == LK_SHARED)
 1138                 vn_lock(dp, LK_UPGRADE|LK_RETRY);
 1139         if (VN_IS_DOOMED(dp)) {
 1140                 error = ENOENT;
 1141                 goto bad;
 1142         }
 1143         /*
 1144          * If we're looking up the last component and we need an exclusive
 1145          * lock, adjust our lkflags.
 1146          */
 1147         if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags))
 1148                 cnp->cn_lkflags = LK_EXCLUSIVE;
 1149 #ifdef NAMEI_DIAGNOSTIC
 1150         vn_printf(dp, "lookup in ");
 1151 #endif
 1152         lkflags_save = cnp->cn_lkflags;
 1153         cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags,
 1154             cnp->cn_flags);
 1155         error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp);
 1156         cnp->cn_lkflags = lkflags_save;
 1157         if (error != 0) {
 1158                 KASSERT(ndp->ni_vp == NULL, ("leaf should be empty"));
 1159 #ifdef NAMEI_DIAGNOSTIC
 1160                 printf("not found\n");
 1161 #endif
 1162                 if ((error == ENOENT) &&
 1163                     (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) &&
 1164                     (dp->v_mount->mnt_flag & MNT_UNION)) {
 1165                         tdp = dp;
 1166                         dp = dp->v_mount->mnt_vnodecovered;
 1167                         VREF(dp);
 1168                         vput(tdp);
 1169                         vn_lock(dp,
 1170                             compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
 1171                             LK_RETRY, cnp->cn_flags));
 1172                         nameicap_tracker_add(ndp, dp);
 1173                         goto unionlookup;
 1174                 }
 1175 
 1176                 if (error == ERELOOKUP) {
 1177                         vref(dp);
 1178                         ndp->ni_vp = dp;
 1179                         error = 0;
 1180                         relookup = 1;
 1181                         goto good;
 1182                 }
 1183 
 1184                 if (error != EJUSTRETURN)
 1185                         goto bad;
 1186                 /*
 1187                  * At this point, we know we're at the end of the
 1188                  * pathname.  If creating / renaming, we can consider
 1189                  * allowing the file or directory to be created / renamed,
 1190                  * provided we're not on a read-only filesystem.
 1191                  */
 1192                 if (rdonly) {
 1193                         error = EROFS;
 1194                         goto bad;
 1195                 }
 1196                 /* trailing slash only allowed for directories */
 1197                 if ((cnp->cn_flags & TRAILINGSLASH) &&
 1198                     !(cnp->cn_flags & WILLBEDIR)) {
 1199                         error = ENOENT;
 1200                         goto bad;
 1201                 }
 1202                 if ((cnp->cn_flags & LOCKPARENT) == 0)
 1203                         VOP_UNLOCK(dp);
 1204                 /*
 1205                  * We return with ni_vp NULL to indicate that the entry
 1206                  * doesn't currently exist, leaving a pointer to the
 1207                  * (possibly locked) directory vnode in ndp->ni_dvp.
 1208                  */
 1209                 goto success;
 1210         }
 1211 
 1212 good:
 1213 #ifdef NAMEI_DIAGNOSTIC
 1214         printf("found\n");
 1215 #endif
 1216         dp = ndp->ni_vp;
 1217 
 1218         /*
 1219          * Check for symbolic link
 1220          */
 1221         if ((dp->v_type == VLNK) &&
 1222             ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) ||
 1223              *ndp->ni_next == '/')) {
 1224                 cnp->cn_flags |= ISSYMLINK;
 1225                 if (VN_IS_DOOMED(dp)) {
 1226                         /*
 1227                          * We can't know whether the directory was mounted with
 1228                          * NOSYMFOLLOW, so we can't follow safely.
 1229                          */
 1230                         error = ENOENT;
 1231                         goto bad2;
 1232                 }
 1233                 if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) {
 1234                         error = EACCES;
 1235                         goto bad2;
 1236                 }
 1237                 /*
 1238                  * Symlink code always expects an unlocked dvp.
 1239                  */
 1240                 if (ndp->ni_dvp != ndp->ni_vp) {
 1241                         VOP_UNLOCK(ndp->ni_dvp);
 1242                         ni_dvp_unlocked = 1;
 1243                 }
 1244                 goto success;
 1245         } else if ((vn_irflag_read(dp) & VIRF_MOUNTPOINT) != 0) {
 1246                 if ((cnp->cn_flags & NOCROSSMOUNT) != 0)
 1247                         goto nextname;
 1248         } else
 1249                 goto nextname;
 1250 
 1251         /*
 1252          * Check to see if the vnode has been mounted on;
 1253          * if so find the root of the mounted filesystem.
 1254          */
 1255         do {
 1256                 mp = dp->v_mountedhere;
 1257                 KASSERT(mp != NULL,
 1258                     ("%s: NULL mountpoint for VIRF_MOUNTPOINT vnode", __func__));
 1259                 crosslock = (dp->v_vflag & VV_CROSSLOCK) != 0;
 1260                 crosslkflags = compute_cn_lkflags(mp, cnp->cn_lkflags,
 1261                     cnp->cn_flags);
 1262                 if (__predict_false(crosslock)) {
 1263                         /*
 1264                          * We are going to be holding the vnode lock, which
 1265                          * in this case is shared by the root vnode of the
 1266                          * filesystem mounted at mp, across the call to
 1267                          * VFS_ROOT().  Make the situation clear to the
 1268                          * filesystem by passing LK_CANRECURSE if the
 1269                          * lock is held exclusive, or by clearinng
 1270                          * LK_NODDLKTREAT to allow recursion on the shared
 1271                          * lock in the presence of an exclusive waiter.
 1272                          */
 1273                         if (VOP_ISLOCKED(dp) == LK_EXCLUSIVE) {
 1274                                 crosslkflags &= ~LK_SHARED;
 1275                                 crosslkflags |= LK_EXCLUSIVE | LK_CANRECURSE;
 1276                         } else if ((crosslkflags & LK_EXCLUSIVE) != 0) {
 1277                                 vn_lock(dp, LK_UPGRADE | LK_RETRY);
 1278                                 if (VN_IS_DOOMED(dp)) {
 1279                                         error = ENOENT;
 1280                                         goto bad2;
 1281                                 }
 1282                         } else
 1283                                 crosslkflags &= ~LK_NODDLKTREAT;
 1284                 }
 1285                 if (vfs_busy(mp, 0) != 0)
 1286                         continue;
 1287                 if (__predict_true(!crosslock))
 1288                         vput(dp);
 1289                 if (dp != ndp->ni_dvp)
 1290                         vput(ndp->ni_dvp);
 1291                 else
 1292                         vrele(ndp->ni_dvp);
 1293                 vrefact(vp_crossmp);
 1294                 ndp->ni_dvp = vp_crossmp;
 1295                 error = VFS_ROOT(mp, crosslkflags, &tdp);
 1296                 vfs_unbusy(mp);
 1297                 if (__predict_false(crosslock))
 1298                         vput(dp);
 1299                 if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT))
 1300                         panic("vp_crossmp exclusively locked or reclaimed");
 1301                 if (error != 0) {
 1302                         dpunlocked = 1;
 1303                         goto bad2;
 1304                 }
 1305                 ndp->ni_vp = dp = tdp;
 1306         } while ((vn_irflag_read(dp) & VIRF_MOUNTPOINT) != 0);
 1307 
 1308 nextname:
 1309         /*
 1310          * Not a symbolic link that we will follow.  Continue with the
 1311          * next component if there is any; otherwise, we're done.
 1312          */
 1313         KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/',
 1314             ("lookup: invalid path state."));
 1315         if (relookup) {
 1316                 relookup = 0;
 1317                 ndp->ni_pathlen = prev_ni_pathlen;
 1318                 ndp->ni_next = prev_ni_next;
 1319                 if (ndp->ni_dvp != dp)
 1320                         vput(ndp->ni_dvp);
 1321                 else
 1322                         vrele(ndp->ni_dvp);
 1323                 goto dirloop;
 1324         }
 1325         if (cnp->cn_flags & ISDOTDOT) {
 1326                 error = nameicap_check_dotdot(ndp, ndp->ni_vp);
 1327                 if (error != 0) {
 1328 #ifdef KTRACE
 1329                         if (KTRPOINT(curthread, KTR_CAPFAIL))
 1330                                 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
 1331 #endif
 1332                         goto bad2;
 1333                 }
 1334         }
 1335         if (*ndp->ni_next == '/') {
 1336                 cnp->cn_nameptr = ndp->ni_next;
 1337                 while (*cnp->cn_nameptr == '/') {
 1338                         cnp->cn_nameptr++;
 1339                         ndp->ni_pathlen--;
 1340                 }
 1341                 if (ndp->ni_dvp != dp)
 1342                         vput(ndp->ni_dvp);
 1343                 else
 1344                         vrele(ndp->ni_dvp);
 1345                 goto dirloop;
 1346         }
 1347         /*
 1348          * If we're processing a path with a trailing slash,
 1349          * check that the end result is a directory.
 1350          */
 1351         if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) {
 1352                 error = ENOTDIR;
 1353                 goto bad2;
 1354         }
 1355         /*
 1356          * Disallow directory write attempts on read-only filesystems.
 1357          */
 1358         if (rdonly &&
 1359             (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
 1360                 error = EROFS;
 1361                 goto bad2;
 1362         }
 1363         if (!wantparent) {
 1364                 ni_dvp_unlocked = 2;
 1365                 if (ndp->ni_dvp != dp)
 1366                         vput(ndp->ni_dvp);
 1367                 else
 1368                         vrele(ndp->ni_dvp);
 1369         } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) {
 1370                 VOP_UNLOCK(ndp->ni_dvp);
 1371                 ni_dvp_unlocked = 1;
 1372         }
 1373 
 1374         if (cnp->cn_flags & AUDITVNODE1)
 1375                 AUDIT_ARG_VNODE1(dp);
 1376         else if (cnp->cn_flags & AUDITVNODE2)
 1377                 AUDIT_ARG_VNODE2(dp);
 1378 
 1379         if ((cnp->cn_flags & LOCKLEAF) == 0)
 1380                 VOP_UNLOCK(dp);
 1381 success:
 1382         /*
 1383          * FIXME: for lookups which only cross a mount point to fetch the
 1384          * root vnode, ni_dvp will be set to vp_crossmp. This can be a problem
 1385          * if either WANTPARENT or LOCKPARENT is set.
 1386          */
 1387         /*
 1388          * Because of shared lookup we may have the vnode shared locked, but
 1389          * the caller may want it to be exclusively locked.
 1390          */
 1391         if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) &&
 1392             VOP_ISLOCKED(dp) != LK_EXCLUSIVE) {
 1393                 vn_lock(dp, LK_UPGRADE | LK_RETRY);
 1394                 if (VN_IS_DOOMED(dp)) {
 1395                         error = ENOENT;
 1396                         goto bad2;
 1397                 }
 1398         }
 1399 success_right_lock:
 1400         if (ndp->ni_vp != NULL) {
 1401                 if ((cnp->cn_flags & ISDOTDOT) == 0)
 1402                         nameicap_tracker_add(ndp, ndp->ni_vp);
 1403                 if ((cnp->cn_flags & (FAILIFEXISTS | ISSYMLINK)) == FAILIFEXISTS)
 1404                         return (vfs_lookup_failifexists(ndp));
 1405         }
 1406         return (0);
 1407 
 1408 bad2:
 1409         if (ni_dvp_unlocked != 2) {
 1410                 if (dp != ndp->ni_dvp && !ni_dvp_unlocked)
 1411                         vput(ndp->ni_dvp);
 1412                 else
 1413                         vrele(ndp->ni_dvp);
 1414         }
 1415 bad:
 1416         if (!dpunlocked)
 1417                 vput(dp);
 1418 bad_unlocked:
 1419         ndp->ni_vp = NULL;
 1420         return (error);
 1421 }
 1422 
 1423 /*
 1424  * relookup - lookup a path name component
 1425  *    Used by lookup to re-acquire things.
 1426  */
 1427 int
 1428 vfs_relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 1429     bool refstart)
 1430 {
 1431         struct vnode *dp = NULL;                /* the directory we are searching */
 1432         int rdonly;                     /* lookup read-only flag bit */
 1433         int error = 0;
 1434 
 1435         KASSERT(cnp->cn_flags & ISLASTCN,
 1436             ("relookup: Not given last component."));
 1437         /*
 1438          * Setup: break out flag bits into variables.
 1439          */
 1440         KASSERT((cnp->cn_flags & (LOCKPARENT | WANTPARENT)) != 0,
 1441             ("relookup: parent not wanted"));
 1442         rdonly = cnp->cn_flags & RDONLY;
 1443         cnp->cn_flags &= ~ISSYMLINK;
 1444         dp = dvp;
 1445         cnp->cn_lkflags = LK_EXCLUSIVE;
 1446         vn_lock(dp, LK_EXCLUSIVE | LK_RETRY);
 1447 
 1448         /*
 1449          * Search a new directory.
 1450          *
 1451          * See a comment in vfs_lookup for cnp->cn_nameptr.
 1452          */
 1453 #ifdef NAMEI_DIAGNOSTIC
 1454         printf("{%s}: ", cnp->cn_nameptr);
 1455 #endif
 1456 
 1457         /*
 1458          * Check for "" which represents the root directory after slash
 1459          * removal.
 1460          */
 1461         if (cnp->cn_nameptr[0] == '\0') {
 1462                 /*
 1463                  * Support only LOOKUP for "/" because lookup()
 1464                  * can't succeed for CREATE, DELETE and RENAME.
 1465                  */
 1466                 KASSERT(cnp->cn_nameiop == LOOKUP, ("nameiop must be LOOKUP"));
 1467                 KASSERT(dp->v_type == VDIR, ("dp is not a directory"));
 1468 
 1469                 if (!(cnp->cn_flags & LOCKLEAF))
 1470                         VOP_UNLOCK(dp);
 1471                 *vpp = dp;
 1472                 /* XXX This should probably move to the top of function. */
 1473                 if (refstart)
 1474                         panic("lookup: SAVESTART");
 1475                 return (0);
 1476         }
 1477 
 1478         if (cnp->cn_flags & ISDOTDOT)
 1479                 panic ("relookup: lookup on dot-dot");
 1480 
 1481         /*
 1482          * We now have a segment name to search for, and a directory to search.
 1483          */
 1484 #ifdef NAMEI_DIAGNOSTIC
 1485         vn_printf(dp, "search in ");
 1486 #endif
 1487         if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) {
 1488                 KASSERT(*vpp == NULL, ("leaf should be empty"));
 1489                 if (error != EJUSTRETURN)
 1490                         goto bad;
 1491                 /*
 1492                  * If creating and at end of pathname, then can consider
 1493                  * allowing file to be created.
 1494                  */
 1495                 if (rdonly) {
 1496                         error = EROFS;
 1497                         goto bad;
 1498                 }
 1499                 /* ASSERT(dvp == ndp->ni_startdir) */
 1500                 if (refstart)
 1501                         VREF(dvp);
 1502                 if ((cnp->cn_flags & LOCKPARENT) == 0)
 1503                         VOP_UNLOCK(dp);
 1504                 /*
 1505                  * We return with ni_vp NULL to indicate that the entry
 1506                  * doesn't currently exist, leaving a pointer to the
 1507                  * (possibly locked) directory vnode in ndp->ni_dvp.
 1508                  */
 1509                 return (0);
 1510         }
 1511 
 1512         dp = *vpp;
 1513 
 1514         /*
 1515          * Disallow directory write attempts on read-only filesystems.
 1516          */
 1517         if (rdonly &&
 1518             (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
 1519                 if (dvp == dp)
 1520                         vrele(dvp);
 1521                 else
 1522                         vput(dvp);
 1523                 error = EROFS;
 1524                 goto bad;
 1525         }
 1526         /*
 1527          * Set the parent lock/ref state to the requested state.
 1528          */
 1529         if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp)
 1530                 VOP_UNLOCK(dvp);
 1531         /*
 1532          * Check for symbolic link
 1533          */
 1534         KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW),
 1535             ("relookup: symlink found.\n"));
 1536 
 1537         /* ASSERT(dvp == ndp->ni_startdir) */
 1538         if (refstart)
 1539                 VREF(dvp);
 1540 
 1541         if ((cnp->cn_flags & LOCKLEAF) == 0)
 1542                 VOP_UNLOCK(dp);
 1543         return (0);
 1544 bad:
 1545         vput(dp);
 1546         *vpp = NULL;
 1547         return (error);
 1548 }
 1549 
 1550 #ifdef INVARIANTS
 1551 /*
 1552  * Validate the final state of ndp after the lookup.
 1553  */
 1554 static void
 1555 NDVALIDATE_impl(struct nameidata *ndp, int line)
 1556 {
 1557         struct componentname *cnp;
 1558 
 1559         cnp = &ndp->ni_cnd;
 1560         if (cnp->cn_pnbuf == NULL)
 1561                 panic("%s: got no buf! called from %d", __func__, line);
 1562 }
 1563 
 1564 #endif
 1565 
 1566 /*
 1567  * Determine if there is a suitable alternate filename under the specified
 1568  * prefix for the specified path.  If the create flag is set, then the
 1569  * alternate prefix will be used so long as the parent directory exists.
 1570  * This is used by the various compatibility ABIs so that Linux binaries prefer
 1571  * files under /compat/linux for example.  The chosen path (whether under
 1572  * the prefix or under /) is returned in a kernel malloc'd buffer pointed
 1573  * to by pathbuf.  The caller is responsible for free'ing the buffer from
 1574  * the M_TEMP bucket if one is returned.
 1575  */
 1576 int
 1577 kern_alternate_path(const char *prefix, const char *path, enum uio_seg pathseg,
 1578     char **pathbuf, int create, int dirfd)
 1579 {
 1580         struct nameidata nd, ndroot;
 1581         char *ptr, *buf, *cp;
 1582         size_t len, sz;
 1583         int error;
 1584 
 1585         buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 1586         *pathbuf = buf;
 1587 
 1588         /* Copy the prefix into the new pathname as a starting point. */
 1589         len = strlcpy(buf, prefix, MAXPATHLEN);
 1590         if (len >= MAXPATHLEN) {
 1591                 *pathbuf = NULL;
 1592                 free(buf, M_TEMP);
 1593                 return (EINVAL);
 1594         }
 1595         sz = MAXPATHLEN - len;
 1596         ptr = buf + len;
 1597 
 1598         /* Append the filename to the prefix. */
 1599         if (pathseg == UIO_SYSSPACE)
 1600                 error = copystr(path, ptr, sz, &len);
 1601         else
 1602                 error = copyinstr(path, ptr, sz, &len);
 1603 
 1604         if (error) {
 1605                 *pathbuf = NULL;
 1606                 free(buf, M_TEMP);
 1607                 return (error);
 1608         }
 1609 
 1610         /* Only use a prefix with absolute pathnames. */
 1611         if (*ptr != '/') {
 1612                 error = EINVAL;
 1613                 goto keeporig;
 1614         }
 1615 
 1616         if (dirfd != AT_FDCWD) {
 1617                 /*
 1618                  * We want the original because the "prefix" is
 1619                  * included in the already opened dirfd.
 1620                  */
 1621                 bcopy(ptr, buf, len);
 1622                 return (0);
 1623         }
 1624 
 1625         /*
 1626          * We know that there is a / somewhere in this pathname.
 1627          * Search backwards for it, to find the file's parent dir
 1628          * to see if it exists in the alternate tree. If it does,
 1629          * and we want to create a file (cflag is set). We don't
 1630          * need to worry about the root comparison in this case.
 1631          */
 1632 
 1633         if (create) {
 1634                 for (cp = &ptr[len] - 1; *cp != '/'; cp--);
 1635                 *cp = '\0';
 1636 
 1637                 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf);
 1638                 error = namei(&nd);
 1639                 *cp = '/';
 1640                 if (error != 0)
 1641                         goto keeporig;
 1642         } else {
 1643                 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf);
 1644 
 1645                 error = namei(&nd);
 1646                 if (error != 0)
 1647                         goto keeporig;
 1648 
 1649                 /*
 1650                  * We now compare the vnode of the prefix to the one
 1651                  * vnode asked. If they resolve to be the same, then we
 1652                  * ignore the match so that the real root gets used.
 1653                  * This avoids the problem of traversing "../.." to find the
 1654                  * root directory and never finding it, because "/" resolves
 1655                  * to the emulation root directory. This is expensive :-(
 1656                  */
 1657                 NDINIT(&ndroot, LOOKUP, FOLLOW, UIO_SYSSPACE, prefix);
 1658 
 1659                 /* We shouldn't ever get an error from this namei(). */
 1660                 error = namei(&ndroot);
 1661                 if (error == 0) {
 1662                         if (nd.ni_vp == ndroot.ni_vp)
 1663                                 error = ENOENT;
 1664 
 1665                         NDFREE_PNBUF(&ndroot);
 1666                         vrele(ndroot.ni_vp);
 1667                 }
 1668         }
 1669 
 1670         NDFREE_PNBUF(&nd);
 1671         vrele(nd.ni_vp);
 1672 
 1673 keeporig:
 1674         /* If there was an error, use the original path name. */
 1675         if (error)
 1676                 bcopy(ptr, buf, len);
 1677         return (error);
 1678 }

Cache object: 887621b47f9dca7e0b7bb6666a014d2c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.