The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_lookup.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: vfs_lookup.c,v 1.232 2022/08/22 09:14:59 hannken Exp $ */
    2 
    3 /*
    4  * Copyright (c) 1982, 1986, 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)vfs_lookup.c        8.10 (Berkeley) 5/27/95
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.232 2022/08/22 09:14:59 hannken Exp $");
   41 
   42 #ifdef _KERNEL_OPT
   43 #include "opt_magiclinks.h"
   44 #endif
   45 
   46 #include <sys/param.h>
   47 #include <sys/systm.h>
   48 #include <sys/kernel.h>
   49 #include <sys/syslimits.h>
   50 #include <sys/time.h>
   51 #include <sys/namei.h>
   52 #include <sys/vnode.h>
   53 #include <sys/vnode_impl.h>
   54 #include <sys/fstrans.h>
   55 #include <sys/mount.h>
   56 #include <sys/errno.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/hash.h>
   59 #include <sys/proc.h>
   60 #include <sys/syslog.h>
   61 #include <sys/kauth.h>
   62 #include <sys/ktrace.h>
   63 #include <sys/dirent.h>
   64 
   65 #ifndef MAGICLINKS
   66 #define MAGICLINKS 0
   67 #endif
   68 
   69 int vfs_magiclinks = MAGICLINKS;
   70 
   71 __CTASSERT(MAXNAMLEN == NAME_MAX);
   72 
   73 /*
   74  * Substitute replacement text for 'magic' strings in symlinks.
   75  * Returns 0 if successful, and returns non-zero if an error
   76  * occurs.  (Currently, the only possible error is running out
   77  * of temporary pathname space.)
   78  *
   79  * Looks for "@<string>" and "@<string>/", where <string> is a
   80  * recognized 'magic' string.  Replaces the "@<string>" with the
   81  * appropriate replacement text.  (Note that in some cases the
   82  * replacement text may have zero length.)
   83  *
   84  * This would have been table driven, but the variance in
   85  * replacement strings (and replacement string lengths) made
   86  * that impractical.
   87  */
   88 #define VNL(x)                                                  \
   89         (sizeof(x) - 1)
   90 
   91 #define VO      '{'
   92 #define VC      '}'
   93 
   94 #define MATCH(str)                                              \
   95         ((termchar == '/' && i + VNL(str) == *len) ||           \
   96          (i + VNL(str) < *len &&                                \
   97           cp[i + VNL(str)] == termchar)) &&                     \
   98         !strncmp((str), &cp[i], VNL(str))
   99 
  100 #define SUBSTITUTE(m, s, sl)                                    \
  101         if ((newlen + (sl)) >= MAXPATHLEN)                      \
  102                 return 1;                                       \
  103         i += VNL(m);                                            \
  104         if (termchar != '/')                                    \
  105                 i++;                                            \
  106         (void)memcpy(&tmp[newlen], (s), (sl));                  \
  107         newlen += (sl);                                         \
  108         change = 1;                                             \
  109         termchar = '/';
  110 
  111 static int
  112 symlink_magic(struct proc *p, char *cp, size_t *len)
  113 {
  114         char *tmp;
  115         size_t change, i, newlen, slen;
  116         char termchar = '/';
  117         char idtmp[11]; /* enough for 32 bit *unsigned* integer */
  118 
  119 
  120         tmp = PNBUF_GET();
  121         for (change = i = newlen = 0; i < *len; ) {
  122                 if (cp[i] != '@') {
  123                         tmp[newlen++] = cp[i++];
  124                         continue;
  125                 }
  126 
  127                 i++;
  128 
  129                 /* Check for @{var} syntax. */
  130                 if (cp[i] == VO) {
  131                         termchar = VC;
  132                         i++;
  133                 }
  134 
  135                 /*
  136                  * The following checks should be ordered according
  137                  * to frequency of use.
  138                  */
  139                 if (MATCH("machine_arch")) {
  140                         slen = VNL(MACHINE_ARCH);
  141                         SUBSTITUTE("machine_arch", MACHINE_ARCH, slen);
  142                 } else if (MATCH("machine")) {
  143                         slen = VNL(MACHINE);
  144                         SUBSTITUTE("machine", MACHINE, slen);
  145                 } else if (MATCH("hostname")) {
  146                         SUBSTITUTE("hostname", hostname, hostnamelen);
  147                 } else if (MATCH("osrelease")) {
  148                         slen = strlen(osrelease);
  149                         SUBSTITUTE("osrelease", osrelease, slen);
  150                 } else if (MATCH("emul")) {
  151                         slen = strlen(p->p_emul->e_name);
  152                         SUBSTITUTE("emul", p->p_emul->e_name, slen);
  153                 } else if (MATCH("kernel_ident")) {
  154                         slen = strlen(kernel_ident);
  155                         SUBSTITUTE("kernel_ident", kernel_ident, slen);
  156                 } else if (MATCH("domainname")) {
  157                         SUBSTITUTE("domainname", domainname, domainnamelen);
  158                 } else if (MATCH("ostype")) {
  159                         slen = strlen(ostype);
  160                         SUBSTITUTE("ostype", ostype, slen);
  161                 } else if (MATCH("uid")) {
  162                         slen = snprintf(idtmp, sizeof(idtmp), "%u",
  163                             kauth_cred_geteuid(kauth_cred_get()));
  164                         SUBSTITUTE("uid", idtmp, slen);
  165                 } else if (MATCH("ruid")) {
  166                         slen = snprintf(idtmp, sizeof(idtmp), "%u",
  167                             kauth_cred_getuid(kauth_cred_get()));
  168                         SUBSTITUTE("ruid", idtmp, slen);
  169                 } else if (MATCH("gid")) {
  170                         slen = snprintf(idtmp, sizeof(idtmp), "%u",
  171                             kauth_cred_getegid(kauth_cred_get()));
  172                         SUBSTITUTE("gid", idtmp, slen);
  173                 } else if (MATCH("rgid")) {
  174                         slen = snprintf(idtmp, sizeof(idtmp), "%u",
  175                             kauth_cred_getgid(kauth_cred_get()));
  176                         SUBSTITUTE("rgid", idtmp, slen);
  177                 } else {
  178                         tmp[newlen++] = '@';
  179                         if (termchar == VC)
  180                                 tmp[newlen++] = VO;
  181                 }
  182         }
  183 
  184         if (change) {
  185                 (void)memcpy(cp, tmp, newlen);
  186                 *len = newlen;
  187         }
  188         PNBUF_PUT(tmp);
  189 
  190         return 0;
  191 }
  192 
  193 #undef VNL
  194 #undef VO
  195 #undef VC
  196 #undef MATCH
  197 #undef SUBSTITUTE
  198 
  199 ////////////////////////////////////////////////////////////
  200 
  201 /*
  202  * Determine the namei hash (for the namecache) for name.
  203  * If *ep != NULL, hash from name to ep-1.
  204  * If *ep == NULL, hash from name until the first NUL or '/', and
  205  * return the location of this termination character in *ep.
  206  *
  207  * This function returns an equivalent hash to the MI hash32_strn().
  208  * The latter isn't used because in the *ep == NULL case, determining
  209  * the length of the string to the first NUL or `/' and then calling
  210  * hash32_strn() involves unnecessary double-handling of the data.
  211  */
  212 uint32_t
  213 namei_hash(const char *name, const char **ep)
  214 {
  215         uint32_t        hash;
  216 
  217         hash = HASH32_STR_INIT;
  218         if (*ep != NULL) {
  219                 for (; name < *ep; name++)
  220                         hash = hash * 33 + *(const uint8_t *)name;
  221         } else {
  222                 for (; *name != '\0' && *name != '/'; name++)
  223                         hash = hash * 33 + *(const uint8_t *)name;
  224                 *ep = name;
  225         }
  226         return (hash + (hash >> 5));
  227 }
  228 
  229 ////////////////////////////////////////////////////////////
  230 
  231 /*
  232  * Sealed abstraction for pathnames.
  233  *
  234  * System-call-layer level code that is going to call namei should
  235  * first create a pathbuf and adjust all the bells and whistles on it
  236  * as needed by context.
  237  */
  238 
  239 struct pathbuf {
  240         char *pb_path;
  241         char *pb_pathcopy;
  242         unsigned pb_pathcopyuses;
  243 };
  244 
  245 static struct pathbuf *
  246 pathbuf_create_raw(void)
  247 {
  248         struct pathbuf *pb;
  249 
  250         pb = kmem_alloc(sizeof(*pb), KM_SLEEP);
  251         pb->pb_path = PNBUF_GET();
  252         if (pb->pb_path == NULL) {
  253                 kmem_free(pb, sizeof(*pb));
  254                 return NULL;
  255         }
  256         pb->pb_pathcopy = NULL;
  257         pb->pb_pathcopyuses = 0;
  258         return pb;
  259 }
  260 
  261 void
  262 pathbuf_destroy(struct pathbuf *pb)
  263 {
  264         KASSERT(pb->pb_pathcopyuses == 0);
  265         KASSERT(pb->pb_pathcopy == NULL);
  266         PNBUF_PUT(pb->pb_path);
  267         kmem_free(pb, sizeof(*pb));
  268 }
  269 
  270 struct pathbuf *
  271 pathbuf_assimilate(char *pnbuf)
  272 {
  273         struct pathbuf *pb;
  274 
  275         pb = kmem_alloc(sizeof(*pb), KM_SLEEP);
  276         pb->pb_path = pnbuf;
  277         pb->pb_pathcopy = NULL;
  278         pb->pb_pathcopyuses = 0;
  279         return pb;
  280 }
  281 
  282 struct pathbuf *
  283 pathbuf_create(const char *path)
  284 {
  285         struct pathbuf *pb;
  286         int error;
  287 
  288         pb = pathbuf_create_raw();
  289         if (pb == NULL) {
  290                 return NULL;
  291         }
  292         error = copystr(path, pb->pb_path, PATH_MAX, NULL);
  293         if (error != 0) {
  294                 KASSERT(!"kernel path too long in pathbuf_create");
  295                 /* make sure it's null-terminated, just in case */
  296                 pb->pb_path[PATH_MAX-1] = '\0';
  297         }
  298         return pb;
  299 }
  300 
  301 int
  302 pathbuf_copyin(const char *userpath, struct pathbuf **ret)
  303 {
  304         struct pathbuf *pb;
  305         int error;
  306 
  307         pb = pathbuf_create_raw();
  308         if (pb == NULL) {
  309                 return ENOMEM;
  310         }
  311         error = copyinstr(userpath, pb->pb_path, PATH_MAX, NULL);
  312         if (error) {
  313                 pathbuf_destroy(pb);
  314                 return error;
  315         }
  316         *ret = pb;
  317         return 0;
  318 }
  319 
  320 /*
  321  * XXX should not exist:
  322  *   1. whether a pointer is kernel or user should be statically checkable.
  323  *   2. copyin should be handled by the upper part of the syscall layer,
  324  *      not in here.
  325  */
  326 int
  327 pathbuf_maybe_copyin(const char *path, enum uio_seg seg, struct pathbuf **ret)
  328 {
  329         if (seg == UIO_USERSPACE) {
  330                 return pathbuf_copyin(path, ret);
  331         } else {
  332                 *ret = pathbuf_create(path);
  333                 if (*ret == NULL) {
  334                         return ENOMEM;
  335                 }
  336                 return 0;
  337         }
  338 }
  339 
  340 /*
  341  * Get a copy of the path buffer as it currently exists. If this is
  342  * called after namei starts the results may be arbitrary.
  343  */
  344 void
  345 pathbuf_copystring(const struct pathbuf *pb, char *buf, size_t maxlen)
  346 {
  347         strlcpy(buf, pb->pb_path, maxlen);
  348 }
  349 
  350 /*
  351  * These two functions allow access to a saved copy of the original
  352  * path string. The first copy should be gotten before namei is
  353  * called. Each copy that is gotten should be put back.
  354  */
  355 
  356 const char *
  357 pathbuf_stringcopy_get(struct pathbuf *pb)
  358 {
  359         if (pb->pb_pathcopyuses == 0) {
  360                 pb->pb_pathcopy = PNBUF_GET();
  361                 strcpy(pb->pb_pathcopy, pb->pb_path);
  362         }
  363         pb->pb_pathcopyuses++;
  364         return pb->pb_pathcopy;
  365 }
  366 
  367 void
  368 pathbuf_stringcopy_put(struct pathbuf *pb, const char *str)
  369 {
  370         KASSERT(str == pb->pb_pathcopy);
  371         KASSERT(pb->pb_pathcopyuses > 0);
  372         pb->pb_pathcopyuses--;
  373         if (pb->pb_pathcopyuses == 0) {
  374                 PNBUF_PUT(pb->pb_pathcopy);
  375                 pb->pb_pathcopy = NULL;
  376         }
  377 }
  378 
  379 
  380 ////////////////////////////////////////////////////////////
  381 
  382 /*
  383  * namei: convert a pathname into a pointer to a (maybe-locked) vnode,
  384  * and maybe also its parent directory vnode, and assorted other guff.
  385  * See namei(9) for the interface documentation.
  386  *
  387  *
  388  * The FOLLOW flag is set when symbolic links are to be followed
  389  * when they occur at the end of the name translation process.
  390  * Symbolic links are always followed for all other pathname
  391  * components other than the last.
  392  *
  393  * The segflg defines whether the name is to be copied from user
  394  * space or kernel space.
  395  *
  396  * Overall outline of namei:
  397  *
  398  *      copy in name
  399  *      get starting directory
  400  *      while (!done && !error) {
  401  *              call lookup to search path.
  402  *              if symbolic link, massage name in buffer and continue
  403  *      }
  404  */
  405 
  406 /*
  407  * Search a pathname.
  408  * This is a very central and rather complicated routine.
  409  *
  410  * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
  411  * The starting directory is passed in. The pathname is descended
  412  * until done, or a symbolic link is encountered. The variable ni_more
  413  * is clear if the path is completed; it is set to one if a symbolic
  414  * link needing interpretation is encountered.
  415  *
  416  * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
  417  * whether the name is to be looked up, created, renamed, or deleted.
  418  * When CREATE, RENAME, or DELETE is specified, information usable in
  419  * creating, renaming, or deleting a directory entry may be calculated.
  420  * If flag has LOCKPARENT or'ed into it, the parent directory is returned
  421  * locked.  Otherwise the parent directory is not returned. If the target
  422  * of the pathname exists and LOCKLEAF is or'ed into the flag the target
  423  * is returned locked, otherwise it is returned unlocked.  When creating
  424  * or renaming and LOCKPARENT is specified, the target may not be ".".
  425  * When deleting and LOCKPARENT is specified, the target may be ".".
  426  *
  427  * Overall outline of lookup:
  428  *
  429  * dirloop:
  430  *      identify next component of name at ndp->ni_ptr
  431  *      handle degenerate case where name is null string
  432  *      if .. and crossing mount points and on mounted filesys, find parent
  433  *      call VOP_LOOKUP routine for next component name
  434  *          directory vnode returned in ni_dvp, locked.
  435  *          component vnode returned in ni_vp (if it exists), locked.
  436  *      if result vnode is mounted on and crossing mount points,
  437  *          find mounted on vnode
  438  *      if more components of name, do next level at dirloop
  439  *      return the answer in ni_vp, locked if LOCKLEAF set
  440  *          if LOCKPARENT set, return locked parent in ni_dvp
  441  */
  442 
  443 
  444 /*
  445  * Internal state for a namei operation.
  446  *
  447  * cnp is always equal to &ndp->ni_cnp.
  448  */
  449 struct namei_state {
  450         struct nameidata *ndp;
  451         struct componentname *cnp;
  452 
  453         int docache;                    /* == 0 do not cache last component */
  454         int rdonly;                     /* lookup read-only flag bit */
  455         int slashes;
  456 
  457         unsigned attempt_retry:1;       /* true if error allows emul retry */
  458         unsigned root_referenced:1;     /* true if ndp->ni_rootdir and
  459                                              ndp->ni_erootdir were referenced */
  460 };
  461 
  462 
  463 /*
  464  * Initialize the namei working state.
  465  */
  466 static void
  467 namei_init(struct namei_state *state, struct nameidata *ndp)
  468 {
  469 
  470         state->ndp = ndp;
  471         state->cnp = &ndp->ni_cnd;
  472 
  473         state->docache = 0;
  474         state->rdonly = 0;
  475         state->slashes = 0;
  476 
  477         state->root_referenced = 0;
  478 
  479         KASSERTMSG((state->cnp->cn_cred != NULL), "namei: bad cred/proc");
  480         KASSERTMSG(((state->cnp->cn_nameiop & (~OPMASK)) == 0),
  481             "namei: nameiop contaminated with flags: %08"PRIx32,
  482             state->cnp->cn_nameiop);
  483         KASSERTMSG(((state->cnp->cn_flags & OPMASK) == 0),
  484             "name: flags contaminated with nameiops: %08"PRIx32,
  485             state->cnp->cn_flags);
  486 
  487         /*
  488          * The buffer for name translation shall be the one inside the
  489          * pathbuf.
  490          */
  491         state->ndp->ni_pnbuf = state->ndp->ni_pathbuf->pb_path;
  492 }
  493 
  494 /*
  495  * Clean up the working namei state, leaving things ready for return
  496  * from namei.
  497  */
  498 static void
  499 namei_cleanup(struct namei_state *state)
  500 {
  501         KASSERT(state->cnp == &state->ndp->ni_cnd);
  502 
  503         if (state->root_referenced) {
  504                 if (state->ndp->ni_rootdir != NULL)
  505                         vrele(state->ndp->ni_rootdir);
  506                 if (state->ndp->ni_erootdir != NULL)
  507                         vrele(state->ndp->ni_erootdir);
  508         }
  509 }
  510 
  511 //////////////////////////////
  512 
  513 /*
  514  * Get the directory context.
  515  * Initializes the rootdir and erootdir state and returns a reference
  516  * to the starting dir.
  517  */
  518 static struct vnode *
  519 namei_getstartdir(struct namei_state *state)
  520 {
  521         struct nameidata *ndp = state->ndp;
  522         struct componentname *cnp = state->cnp;
  523         struct cwdinfo *cwdi;           /* pointer to cwd state */
  524         struct lwp *self = curlwp;      /* thread doing namei() */
  525         struct vnode *rootdir, *erootdir, *curdir, *startdir;
  526 
  527         if (state->root_referenced) {
  528                 if (state->ndp->ni_rootdir != NULL)
  529                         vrele(state->ndp->ni_rootdir);
  530                 if (state->ndp->ni_erootdir != NULL)
  531                         vrele(state->ndp->ni_erootdir);
  532                 state->root_referenced = 0;
  533         }
  534 
  535         cwdi = self->l_proc->p_cwdi;
  536         rw_enter(&cwdi->cwdi_lock, RW_READER);
  537 
  538         /* root dir */
  539         if (cwdi->cwdi_rdir == NULL || (cnp->cn_flags & NOCHROOT)) {
  540                 rootdir = rootvnode;
  541         } else {
  542                 rootdir = cwdi->cwdi_rdir;
  543         }
  544 
  545         /* emulation root dir, if any */
  546         if ((cnp->cn_flags & TRYEMULROOT) == 0) {
  547                 /* if we don't want it, don't fetch it */
  548                 erootdir = NULL;
  549         } else if (cnp->cn_flags & EMULROOTSET) {
  550                 /* explicitly set emulroot; "/../" doesn't override this */
  551                 erootdir = ndp->ni_erootdir;
  552         } else if (!strncmp(ndp->ni_pnbuf, "/../", 4)) {
  553                 /* explicit reference to real rootdir */
  554                 erootdir = NULL;
  555         } else {
  556                 /* may be null */
  557                 erootdir = cwdi->cwdi_edir;
  558         }
  559 
  560         /* current dir */
  561         curdir = cwdi->cwdi_cdir;
  562 
  563         if (ndp->ni_pnbuf[0] != '/') {
  564                 if (ndp->ni_atdir != NULL) {
  565                         startdir = ndp->ni_atdir;
  566                 } else {
  567                         startdir = curdir;
  568                 }
  569                 erootdir = NULL;
  570         } else if (cnp->cn_flags & TRYEMULROOT && erootdir != NULL) {
  571                 startdir = erootdir;
  572         } else {
  573                 startdir = rootdir;
  574                 erootdir = NULL;
  575         }
  576 
  577         state->ndp->ni_rootdir = rootdir;
  578         state->ndp->ni_erootdir = erootdir;
  579 
  580         /*
  581          * Get a reference to the start dir so we can safely unlock cwdi.
  582          *
  583          * Must hold references to rootdir and erootdir while we're running.
  584          * A multithreaded process may chroot during namei.
  585          */
  586         if (startdir != NULL)
  587                 vref(startdir);
  588         if (state->ndp->ni_rootdir != NULL)
  589                 vref(state->ndp->ni_rootdir);
  590         if (state->ndp->ni_erootdir != NULL)
  591                 vref(state->ndp->ni_erootdir);
  592         state->root_referenced = 1;
  593 
  594         rw_exit(&cwdi->cwdi_lock);
  595         return startdir;
  596 }
  597 
  598 /*
  599  * Get the directory context for the nfsd case, in parallel to
  600  * getstartdir. Initializes the rootdir and erootdir state and
  601  * returns a reference to the passed-in starting dir.
  602  */
  603 static struct vnode *
  604 namei_getstartdir_for_nfsd(struct namei_state *state)
  605 {
  606         KASSERT(state->ndp->ni_atdir != NULL);
  607 
  608         /* always use the real root, and never set an emulation root */
  609         if (rootvnode == NULL) {
  610                 return NULL;
  611         }
  612         state->ndp->ni_rootdir = rootvnode;
  613         state->ndp->ni_erootdir = NULL;
  614 
  615         vref(state->ndp->ni_atdir);
  616         KASSERT(! state->root_referenced);
  617         vref(state->ndp->ni_rootdir);
  618         state->root_referenced = 1;
  619         return state->ndp->ni_atdir;
  620 }
  621 
  622 
  623 /*
  624  * Ktrace the namei operation.
  625  */
  626 static void
  627 namei_ktrace(struct namei_state *state)
  628 {
  629         struct nameidata *ndp = state->ndp;
  630         struct componentname *cnp = state->cnp;
  631         struct lwp *self = curlwp;      /* thread doing namei() */
  632         const char *emul_path;
  633 
  634         if (ktrpoint(KTR_NAMEI)) {
  635                 if (ndp->ni_erootdir != NULL) {
  636                         /*
  637                          * To make any sense, the trace entry need to have the
  638                          * text of the emulation path prepended.
  639                          * Usually we can get this from the current process,
  640                          * but when called from emul_find_interp() it is only
  641                          * in the exec_package - so we get it passed in ni_next
  642                          * (this is a hack).
  643                          */
  644                         if (cnp->cn_flags & EMULROOTSET)
  645                                 emul_path = ndp->ni_next;
  646                         else
  647                                 emul_path = self->l_proc->p_emul->e_path;
  648                         ktrnamei2(emul_path, strlen(emul_path),
  649                             ndp->ni_pnbuf, ndp->ni_pathlen);
  650                 } else
  651                         ktrnamei(ndp->ni_pnbuf, ndp->ni_pathlen);
  652         }
  653 }
  654 
  655 /*
  656  * Start up namei. Find the root dir and cwd, establish the starting
  657  * directory for lookup, and lock it. Also calls ktrace when
  658  * appropriate.
  659  */
  660 static int
  661 namei_start(struct namei_state *state, int isnfsd,
  662             struct vnode **startdir_ret)
  663 {
  664         struct nameidata *ndp = state->ndp;
  665         struct vnode *startdir;
  666 
  667         /* length includes null terminator (was originally from copyinstr) */
  668         ndp->ni_pathlen = strlen(ndp->ni_pnbuf) + 1;
  669 
  670         /*
  671          * POSIX.1 requirement: "" is not a valid file name.
  672          */
  673         if (ndp->ni_pathlen == 1) {
  674                 ndp->ni_erootdir = NULL;
  675                 return ENOENT;
  676         }
  677 
  678         ndp->ni_loopcnt = 0;
  679 
  680         /* Get starting directory, set up root, and ktrace. */
  681         if (isnfsd) {
  682                 startdir = namei_getstartdir_for_nfsd(state);
  683                 /* no ktrace */
  684         } else {
  685                 startdir = namei_getstartdir(state);
  686                 namei_ktrace(state);
  687         }
  688 
  689         if (startdir == NULL) {
  690                 return ENOENT;
  691         }
  692 
  693         /* NDAT may feed us with a non directory namei_getstartdir */
  694         if (startdir->v_type != VDIR) {
  695                 vrele(startdir);
  696                 return ENOTDIR;
  697         }
  698 
  699         *startdir_ret = startdir;
  700         return 0;
  701 }
  702 
  703 /*
  704  * Check for being at a symlink that we're going to follow.
  705  */
  706 static inline int
  707 namei_atsymlink(struct namei_state *state, struct vnode *foundobj)
  708 {
  709         return (foundobj->v_type == VLNK) &&
  710                 (state->cnp->cn_flags & (FOLLOW|REQUIREDIR));
  711 }
  712 
  713 /*
  714  * Follow a symlink.
  715  *
  716  * Updates searchdir. inhibitmagic causes magic symlinks to not be
  717  * interpreted; this is used by nfsd.
  718  *
  719  * Unlocks foundobj on success (ugh)
  720  */
  721 static inline int
  722 namei_follow(struct namei_state *state, int inhibitmagic,
  723              struct vnode *searchdir, struct vnode *foundobj,
  724              struct vnode **newsearchdir_ret)
  725 {
  726         struct nameidata *ndp = state->ndp;
  727         struct componentname *cnp = state->cnp;
  728 
  729         struct lwp *self = curlwp;      /* thread doing namei() */
  730         struct iovec aiov;              /* uio for reading symbolic links */
  731         struct uio auio;
  732         char *cp;                       /* pointer into pathname argument */
  733         size_t linklen;
  734         int error;
  735 
  736         if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
  737                 return ELOOP;
  738         }
  739 
  740         vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY);
  741         if (foundobj->v_mount->mnt_flag & MNT_SYMPERM) {
  742                 error = VOP_ACCESS(foundobj, VEXEC, cnp->cn_cred);
  743                 if (error != 0) {
  744                         VOP_UNLOCK(foundobj);
  745                         return error;
  746                 }
  747         }
  748 
  749         /* FUTURE: fix this to not use a second buffer */
  750         cp = PNBUF_GET();
  751         aiov.iov_base = cp;
  752         aiov.iov_len = MAXPATHLEN;
  753         auio.uio_iov = &aiov;
  754         auio.uio_iovcnt = 1;
  755         auio.uio_offset = 0;
  756         auio.uio_rw = UIO_READ;
  757         auio.uio_resid = MAXPATHLEN;
  758         UIO_SETUP_SYSSPACE(&auio);
  759         error = VOP_READLINK(foundobj, &auio, cnp->cn_cred);
  760         VOP_UNLOCK(foundobj);
  761         if (error) {
  762                 PNBUF_PUT(cp);
  763                 return error;
  764         }
  765         linklen = MAXPATHLEN - auio.uio_resid;
  766         if (linklen == 0) {
  767                 PNBUF_PUT(cp);
  768                 return ENOENT;
  769         }
  770 
  771         /*
  772          * Do symlink substitution, if appropriate, and
  773          * check length for potential overflow.
  774          *
  775          * Inhibit symlink substitution for nfsd.
  776          * XXX: This is how it was before; is that a bug or a feature?
  777          */
  778         if ((!inhibitmagic && vfs_magiclinks &&
  779              symlink_magic(self->l_proc, cp, &linklen)) ||
  780             (linklen + ndp->ni_pathlen >= MAXPATHLEN)) {
  781                 PNBUF_PUT(cp);
  782                 return ENAMETOOLONG;
  783         }
  784         if (ndp->ni_pathlen > 1) {
  785                 /* includes a null-terminator */
  786                 memcpy(cp + linklen, ndp->ni_next, ndp->ni_pathlen);
  787         } else {
  788                 cp[linklen] = '\0';
  789         }
  790         ndp->ni_pathlen += linklen;
  791         memcpy(ndp->ni_pnbuf, cp, ndp->ni_pathlen);
  792         PNBUF_PUT(cp);
  793 
  794         /* we're now starting from the beginning of the buffer again */
  795         cnp->cn_nameptr = ndp->ni_pnbuf;
  796 
  797         /*
  798          * Check if root directory should replace current directory.
  799          */
  800         if (ndp->ni_pnbuf[0] == '/') {
  801                 vrele(searchdir);
  802                 /* Keep absolute symbolic links inside emulation root */
  803                 searchdir = ndp->ni_erootdir;
  804                 if (searchdir == NULL ||
  805                     (ndp->ni_pnbuf[1] == '.'
  806                      && ndp->ni_pnbuf[2] == '.'
  807                      && ndp->ni_pnbuf[3] == '/')) {
  808                         ndp->ni_erootdir = NULL;
  809                         searchdir = ndp->ni_rootdir;
  810                 }
  811                 vref(searchdir);
  812                 while (cnp->cn_nameptr[0] == '/') {
  813                         cnp->cn_nameptr++;
  814                         ndp->ni_pathlen--;
  815                 }
  816         }
  817 
  818         *newsearchdir_ret = searchdir;
  819         return 0;
  820 }
  821 
  822 //////////////////////////////
  823 
  824 /*
  825  * Inspect the leading path component and update the state accordingly.
  826  */
  827 static int
  828 lookup_parsepath(struct namei_state *state, struct vnode *searchdir)
  829 {
  830         const char *cp;                 /* pointer into pathname argument */
  831         int error;
  832 
  833         struct componentname *cnp = state->cnp;
  834         struct nameidata *ndp = state->ndp;
  835 
  836         KASSERT(cnp == &ndp->ni_cnd);
  837 
  838         /*
  839          * Search a new directory.
  840          *
  841          * The last component of the filename is left accessible via
  842          * cnp->cn_nameptr for callers that need the name. Callers needing
  843          * the name set the SAVENAME flag. When done, they assume
  844          * responsibility for freeing the pathname buffer.
  845          *
  846          * At this point, our only vnode state is that the search dir
  847          * is held.
  848          */
  849         error = VOP_PARSEPATH(searchdir, cnp->cn_nameptr, &cnp->cn_namelen);
  850         if (error) {
  851                 return error;
  852         }
  853         cp = cnp->cn_nameptr + cnp->cn_namelen;
  854         if (cnp->cn_namelen > KERNEL_NAME_MAX) {
  855                 return ENAMETOOLONG;
  856         }
  857 #ifdef NAMEI_DIAGNOSTIC
  858         { char c = *cp;
  859         *(char *)cp = '\0';
  860         printf("{%s}: ", cnp->cn_nameptr);
  861         *(char *)cp = c; }
  862 #endif /* NAMEI_DIAGNOSTIC */
  863         ndp->ni_pathlen -= cnp->cn_namelen;
  864         ndp->ni_next = cp;
  865         /*
  866          * If this component is followed by a slash, then move the pointer to
  867          * the next component forward, and remember that this component must be
  868          * a directory.
  869          */
  870         if (*cp == '/') {
  871                 do {
  872                         cp++;
  873                 } while (*cp == '/');
  874                 state->slashes = cp - ndp->ni_next;
  875                 ndp->ni_pathlen -= state->slashes;
  876                 ndp->ni_next = cp;
  877                 cnp->cn_flags |= REQUIREDIR;
  878         } else {
  879                 state->slashes = 0;
  880                 cnp->cn_flags &= ~REQUIREDIR;
  881         }
  882         /*
  883          * We do special processing on the last component, whether or not it's
  884          * a directory.  Cache all intervening lookups, but not the final one.
  885          */
  886         if (*cp == '\0') {
  887                 if (state->docache)
  888                         cnp->cn_flags |= MAKEENTRY;
  889                 else
  890                         cnp->cn_flags &= ~MAKEENTRY;
  891                 cnp->cn_flags |= ISLASTCN;
  892         } else {
  893                 cnp->cn_flags |= MAKEENTRY;
  894                 cnp->cn_flags &= ~ISLASTCN;
  895         }
  896         if (cnp->cn_namelen == 2 &&
  897             cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
  898                 cnp->cn_flags |= ISDOTDOT;
  899         else
  900                 cnp->cn_flags &= ~ISDOTDOT;
  901 
  902         return 0;
  903 }
  904 
  905 /*
  906  * Take care of crossing a mounted-on vnode.  On error, foundobj_ret will be
  907  * vrele'd, but searchdir is left alone.
  908  */
  909 static int
  910 lookup_crossmount(struct namei_state *state,
  911                   struct vnode **searchdir_ret,
  912                   struct vnode **foundobj_ret,
  913                   bool *searchdir_locked)
  914 {
  915         struct componentname *cnp = state->cnp;
  916         struct vnode *foundobj, *vp;
  917         struct vnode *searchdir;
  918         struct mount *mp;
  919         int error, lktype;
  920 
  921         searchdir = *searchdir_ret;
  922         foundobj = *foundobj_ret;
  923         error = 0;
  924 
  925         KASSERT((cnp->cn_flags & NOCROSSMOUNT) == 0);
  926 
  927         /* First, unlock searchdir (oof). */
  928         if (*searchdir_locked) {
  929                 KASSERT(searchdir != NULL);
  930                 lktype = VOP_ISLOCKED(searchdir);
  931                 VOP_UNLOCK(searchdir);
  932                 *searchdir_locked = false;
  933         } else {
  934                 lktype = LK_NONE;
  935         }
  936 
  937         /*
  938          * Do an unlocked check to see if the vnode has been mounted on; if
  939          * so find the root of the mounted file system.
  940          */
  941         while (foundobj->v_type == VDIR &&
  942             (mp = foundobj->v_mountedhere) != NULL &&
  943             (cnp->cn_flags & NOCROSSMOUNT) == 0) {
  944                 /*
  945                  * Try the namecache first.  If that doesn't work, do
  946                  * it the hard way.
  947                  */
  948                 if (cache_lookup_mount(foundobj, &vp)) {
  949                         vrele(foundobj);
  950                         foundobj = vp;
  951                 } else {
  952                         /* First get the vnodes mount stable. */
  953                         while ((mp = foundobj->v_mountedhere) != NULL) {
  954                                 fstrans_start(mp);
  955                                 if (fstrans_held(mp) &&
  956                                     mp == foundobj->v_mountedhere) {
  957                                         break;
  958                                 }
  959                                 fstrans_done(mp);
  960                         }
  961                         if (mp == NULL) {
  962                                 break;
  963                         }
  964 
  965                         /*
  966                          * Now get a reference on the root vnode.
  967                          * XXX Future - maybe allow only VDIR here.
  968                          */
  969                         error = VFS_ROOT(mp, LK_NONE, &vp);
  970 
  971                         /*
  972                          * If successful, enter it into the cache while
  973                          * holding the mount busy (competing with unmount).
  974                          */
  975                         if (error == 0) {
  976                                 cache_enter_mount(foundobj, vp);
  977                         }
  978 
  979                         /* Finally, drop references to foundobj & mountpoint. */
  980                         vrele(foundobj);
  981                         fstrans_done(mp);
  982                         if (error) {
  983                                 foundobj = NULL;
  984                                 break;
  985                         }
  986                         foundobj = vp;
  987                 }
  988 
  989                 /*
  990                  * Avoid locking vnodes from two filesystems because
  991                  * it's prone to deadlock, e.g. when using puffs.
  992                  * Also, it isn't a good idea to propagate slowness of
  993                  * a filesystem up to the root directory. For now,
  994                  * only handle the common case, where foundobj is
  995                  * VDIR.
  996                  *
  997                  * In this case set searchdir to null to avoid using
  998                  * it again. It is not correct to set searchdir ==
  999                  * foundobj here as that will confuse the caller.
 1000                  * (See PR 40740.)
 1001                  */
 1002                 if (searchdir == NULL) {
 1003                         /* already been here once; do nothing further */
 1004                 } else if (foundobj->v_type == VDIR) {
 1005                         vrele(searchdir);
 1006                         *searchdir_ret = searchdir = NULL;
 1007                         lktype = LK_NONE;
 1008                 }
 1009         }
 1010 
 1011         /* If searchdir is still around, re-lock it. */
 1012         if (error == 0 && lktype != LK_NONE) {
 1013                 vn_lock(searchdir, lktype | LK_RETRY);
 1014                 *searchdir_locked = true;
 1015         }
 1016         *foundobj_ret = foundobj;
 1017         return error;
 1018 }
 1019 
 1020 /*
 1021  * Determine the desired locking mode for the directory of a lookup.
 1022  */
 1023 static int
 1024 lookup_lktype(struct vnode *searchdir, struct componentname *cnp)
 1025 {
 1026 
 1027         /*
 1028          * If the file system supports VOP_LOOKUP() with a shared lock, and
 1029          * we are not making any modifications (nameiop LOOKUP) or this is
 1030          * not the last component then get a shared lock.  Where we can't do
 1031          * fast-forwarded lookups (for example with layered file systems)
 1032          * then this is the fallback for reducing lock contention.
 1033          */
 1034         if ((searchdir->v_mount->mnt_iflag & IMNT_SHRLOOKUP) != 0 &&
 1035             (cnp->cn_nameiop == LOOKUP || (cnp->cn_flags & ISLASTCN) == 0)) {
 1036                 return LK_SHARED;
 1037         } else {
 1038                 return LK_EXCLUSIVE;
 1039         }
 1040 }
 1041 
 1042 /*
 1043  * Call VOP_LOOKUP for a single lookup; return a new search directory
 1044  * (used when crossing mountpoints up or searching union mounts down) and
 1045  * the found object, which for create operations may be NULL on success.
 1046  *
 1047  * Note that the new search directory may be null, which means the
 1048  * searchdir was unlocked and released. This happens in the common case
 1049  * when crossing a mount point downwards, in order to avoid coupling
 1050  * locks between different file system volumes. Importantly, this can
 1051  * happen even if the call fails. (XXX: this is gross and should be
 1052  * tidied somehow.)
 1053  */
 1054 static int
 1055 lookup_once(struct namei_state *state,
 1056             struct vnode *searchdir,
 1057             struct vnode **newsearchdir_ret,
 1058             struct vnode **foundobj_ret,
 1059             bool *newsearchdir_locked_ret)
 1060 {
 1061         struct vnode *tmpvn;            /* scratch vnode */
 1062         struct vnode *foundobj;         /* result */
 1063         struct lwp *l = curlwp;
 1064         bool searchdir_locked = false;
 1065         int error, lktype;
 1066 
 1067         struct componentname *cnp = state->cnp;
 1068         struct nameidata *ndp = state->ndp;
 1069 
 1070         KASSERT(cnp == &ndp->ni_cnd);
 1071         *newsearchdir_ret = searchdir;
 1072 
 1073         /*
 1074          * Handle "..": two special cases.
 1075          * 1. If at root directory (e.g. after chroot)
 1076          *    or at absolute root directory
 1077          *    then ignore it so can't get out.
 1078          * 1a. If at the root of the emulation filesystem go to the real
 1079          *    root. So "/../<path>" is always absolute.
 1080          * 1b. If we have somehow gotten out of a jail, warn
 1081          *    and also ignore it so we can't get farther out.
 1082          * 2. If this vnode is the root of a mounted
 1083          *    filesystem, then replace it with the
 1084          *    vnode which was mounted on so we take the
 1085          *    .. in the other file system.
 1086          */
 1087         if (cnp->cn_flags & ISDOTDOT) {
 1088                 struct proc *p = l->l_proc;
 1089 
 1090                 for (;;) {
 1091                         if (searchdir == ndp->ni_rootdir ||
 1092                             searchdir == rootvnode) {
 1093                                 foundobj = searchdir;
 1094                                 vref(foundobj);
 1095                                 *foundobj_ret = foundobj;
 1096                                 if (cnp->cn_flags & LOCKPARENT) {
 1097                                         lktype = lookup_lktype(searchdir, cnp);
 1098                                         vn_lock(searchdir, lktype | LK_RETRY);
 1099                                         searchdir_locked = true;
 1100                                 }
 1101                                 error = 0;
 1102                                 goto done;
 1103                         }
 1104                         if (ndp->ni_rootdir != rootvnode) {
 1105                                 int retval;
 1106 
 1107                                 retval = vn_isunder(searchdir, ndp->ni_rootdir, l);
 1108                                 if (!retval) {
 1109                                     /* Oops! We got out of jail! */
 1110                                     log(LOG_WARNING,
 1111                                         "chrooted pid %d uid %d (%s) "
 1112                                         "detected outside of its chroot\n",
 1113                                         p->p_pid, kauth_cred_geteuid(l->l_cred),
 1114                                         p->p_comm);
 1115                                     /* Put us at the jail root. */
 1116                                     vrele(searchdir);
 1117                                     searchdir = NULL;
 1118                                     foundobj = ndp->ni_rootdir;
 1119                                     vref(foundobj);
 1120                                     vref(foundobj);
 1121                                     *newsearchdir_ret = foundobj;
 1122                                     *foundobj_ret = foundobj;
 1123                                     error = 0;
 1124                                     goto done;
 1125                                 }
 1126                         }
 1127                         if ((searchdir->v_vflag & VV_ROOT) == 0 ||
 1128                             (cnp->cn_flags & NOCROSSMOUNT))
 1129                                 break;
 1130                         tmpvn = searchdir;
 1131                         searchdir = searchdir->v_mount->mnt_vnodecovered;
 1132                         vref(searchdir);
 1133                         vrele(tmpvn);
 1134                         *newsearchdir_ret = searchdir;
 1135                 }
 1136         }
 1137 
 1138         lktype = lookup_lktype(searchdir, cnp);
 1139 
 1140         /*
 1141          * We now have a segment name to search for, and a directory to search.
 1142          * Our vnode state here is that "searchdir" is held.
 1143          */
 1144 unionlookup:
 1145         foundobj = NULL;
 1146         if (!searchdir_locked) {
 1147                 vn_lock(searchdir, lktype | LK_RETRY);
 1148                 searchdir_locked = true;
 1149         }
 1150         error = VOP_LOOKUP(searchdir, &foundobj, cnp);
 1151 
 1152         if (error != 0) {
 1153                 KASSERTMSG((foundobj == NULL),
 1154                     "leaf `%s' should be empty but is %p",
 1155                     cnp->cn_nameptr, foundobj);
 1156 #ifdef NAMEI_DIAGNOSTIC
 1157                 printf("not found\n");
 1158 #endif /* NAMEI_DIAGNOSTIC */
 1159 
 1160                 /*
 1161                  * If ENOLCK, the file system needs us to retry the lookup
 1162                  * with an exclusive lock.  It's likely nothing was found in
 1163                  * cache and/or modifications need to be made.
 1164                  */
 1165                 if (error == ENOLCK) {
 1166                         KASSERT(VOP_ISLOCKED(searchdir) == LK_SHARED);
 1167                         KASSERT(searchdir_locked);
 1168                         if (vn_lock(searchdir, LK_UPGRADE | LK_NOWAIT)) {
 1169                                 VOP_UNLOCK(searchdir);
 1170                                 searchdir_locked = false;
 1171                         }
 1172                         lktype = LK_EXCLUSIVE;
 1173                         goto unionlookup;
 1174                 }
 1175 
 1176                 if ((error == ENOENT) &&
 1177                     (searchdir->v_vflag & VV_ROOT) &&
 1178                     (searchdir->v_mount->mnt_flag & MNT_UNION)) {
 1179                         tmpvn = searchdir;
 1180                         searchdir = searchdir->v_mount->mnt_vnodecovered;
 1181                         vref(searchdir);
 1182                         vput(tmpvn);
 1183                         searchdir_locked = false;
 1184                         *newsearchdir_ret = searchdir;
 1185                         goto unionlookup;
 1186                 }
 1187 
 1188                 if (error != EJUSTRETURN)
 1189                         goto done;
 1190 
 1191                 /*
 1192                  * If this was not the last component, or there were trailing
 1193                  * slashes, and we are not going to create a directory,
 1194                  * then the name must exist.
 1195                  */
 1196                 if ((cnp->cn_flags & (REQUIREDIR | CREATEDIR)) == REQUIREDIR) {
 1197                         error = ENOENT;
 1198                         goto done;
 1199                 }
 1200 
 1201                 /*
 1202                  * If creating and at end of pathname, then can consider
 1203                  * allowing file to be created.
 1204                  */
 1205                 if (state->rdonly) {
 1206                         error = EROFS;
 1207                         goto done;
 1208                 }
 1209 
 1210                 /*
 1211                  * We return success and a NULL foundobj to indicate
 1212                  * that the entry doesn't currently exist, leaving a
 1213                  * pointer to the (normally, locked) directory vnode
 1214                  * as searchdir.
 1215                  */
 1216                 *foundobj_ret = NULL;
 1217                 error = 0;
 1218                 goto done;
 1219         }
 1220 #ifdef NAMEI_DIAGNOSTIC
 1221         printf("found\n");
 1222 #endif /* NAMEI_DIAGNOSTIC */
 1223 
 1224         /* Unlock, unless the caller needs the parent locked. */
 1225         if (searchdir != NULL) {
 1226                 KASSERT(searchdir_locked);
 1227                 if ((cnp->cn_flags & (ISLASTCN | LOCKPARENT)) !=
 1228                     (ISLASTCN | LOCKPARENT)) {
 1229                         VOP_UNLOCK(searchdir);
 1230                         searchdir_locked = false;
 1231                 }
 1232         } else {
 1233                 KASSERT(!searchdir_locked);
 1234         }
 1235 
 1236         *foundobj_ret = foundobj;
 1237         error = 0;
 1238 done:
 1239         *newsearchdir_locked_ret = searchdir_locked;
 1240         return error;
 1241 }
 1242 
 1243 /*
 1244  * Parse out the first path name component that we need to to consider.
 1245  *
 1246  * While doing this, attempt to use the name cache to fast-forward through
 1247  * as many "easy" to find components of the path as possible.
 1248  *
 1249  * We use the namecache's node locks to form a chain, and avoid as many
 1250  * vnode references and locks as possible.  In the ideal case, only the
 1251  * final vnode will have its reference count adjusted and lock taken.
 1252  */
 1253 static int
 1254 lookup_fastforward(struct namei_state *state, struct vnode **searchdir_ret,
 1255                    struct vnode **foundobj_ret)
 1256 {
 1257         struct componentname *cnp = state->cnp;
 1258         struct nameidata *ndp = state->ndp;
 1259         krwlock_t *plock;
 1260         struct vnode *foundobj, *searchdir;
 1261         int error, error2;
 1262         size_t oldpathlen;
 1263         const char *oldnameptr;
 1264         bool terminal;
 1265 
 1266         /*
 1267          * Eat as many path name components as possible before giving up and
 1268          * letting lookup_once() handle it.  Remember the starting point in
 1269          * case we can't get vnode references and need to roll back.
 1270          */
 1271         plock = NULL;
 1272         searchdir = *searchdir_ret;
 1273         oldnameptr = cnp->cn_nameptr;
 1274         oldpathlen = ndp->ni_pathlen;
 1275         terminal = false;
 1276         for (;;) {
 1277                 foundobj = NULL;
 1278 
 1279                 /*
 1280                  * Get the next component name.  There should be no slashes
 1281                  * here, and we shouldn't have looped around if we were
 1282                  * done.
 1283                  */
 1284                 KASSERT(cnp->cn_nameptr[0] != '/');
 1285                 KASSERT(cnp->cn_nameptr[0] != '\0');
 1286                 if ((error = lookup_parsepath(state, searchdir)) != 0) {
 1287                         break;
 1288                 }
 1289 
 1290                 /*
 1291                  * Can't deal with DOTDOT lookups if NOCROSSMOUNT or the
 1292                  * lookup is chrooted.
 1293                  */
 1294                 if ((cnp->cn_flags & ISDOTDOT) != 0) {
 1295                         if ((searchdir->v_vflag & VV_ROOT) != 0 &&
 1296                             (cnp->cn_flags & NOCROSSMOUNT)) {
 1297                                 error = EOPNOTSUPP;
 1298                                 break;
 1299                         }
 1300                         if (ndp->ni_rootdir != rootvnode) {
 1301                                 error = EOPNOTSUPP;
 1302                                 break;
 1303                         }
 1304                 }
 1305 
 1306                 /*
 1307                  * Can't deal with last component when modifying; this needs
 1308                  * searchdir locked and VOP_LOOKUP() called (which can and
 1309                  * does modify state, despite the name).  NB: this case means
 1310                  * terminal is never set true when LOCKPARENT.
 1311                  */
 1312                 if ((cnp->cn_flags & ISLASTCN) != 0) {
 1313                         if (cnp->cn_nameiop != LOOKUP ||
 1314                             (cnp->cn_flags & LOCKPARENT) != 0) {
 1315                                 error = EOPNOTSUPP;
 1316                                 break;
 1317                         }
 1318                 }
 1319 
 1320                 /*
 1321                  * Good, now look for it in cache.  cache_lookup_linked()
 1322                  * will fail if there's nothing there, or if there's no
 1323                  * ownership info for the directory, or if the user doesn't
 1324                  * have permission to look up files in this directory.
 1325                  */
 1326                 if (!cache_lookup_linked(searchdir, cnp->cn_nameptr,
 1327                     cnp->cn_namelen, &foundobj, &plock, cnp->cn_cred)) {
 1328                         error = EOPNOTSUPP;
 1329                         break;
 1330                 }
 1331                 KASSERT(plock != NULL && rw_lock_held(plock));
 1332 
 1333                 /*
 1334                  * Scored a hit.  Negative is good too (ENOENT).  If there's
 1335                  * a '-o union' mount here, punt and let lookup_once() deal
 1336                  * with it.
 1337                  */
 1338                 if (foundobj == NULL) {
 1339                         if ((searchdir->v_vflag & VV_ROOT) != 0 &&
 1340                             (searchdir->v_mount->mnt_flag & MNT_UNION) != 0) {
 1341                                 error = EOPNOTSUPP;
 1342                         } else {
 1343                                 error = ENOENT;
 1344                                 terminal = ((cnp->cn_flags & ISLASTCN) != 0);
 1345                         }
 1346                         break;
 1347                 }
 1348 
 1349                 /*
 1350                  * Stop and get a hold on the vnode if we've encountered
 1351                  * something other than a dirctory.
 1352                  */
 1353                 if (foundobj->v_type != VDIR) {
 1354                         error = vcache_tryvget(foundobj);
 1355                         if (error != 0) {
 1356                                 foundobj = NULL;
 1357                                 error = EOPNOTSUPP;
 1358                         } else {
 1359                                 terminal = (foundobj->v_type != VLNK &&
 1360                                     (cnp->cn_flags & ISLASTCN) != 0);
 1361                         }
 1362                         break;
 1363                 }
 1364 
 1365                 /*
 1366                  * Try to cross mountpoints, bearing in mind that they can
 1367                  * be stacked.  If at any point we can't go further, stop
 1368                  * and try to get a reference on the vnode.  If we are able
 1369                  * to get a ref then lookup_crossmount() will take care of
 1370                  * it, otherwise we'll fall through to lookup_once().
 1371                  */
 1372                 if (foundobj->v_mountedhere != NULL) {
 1373                         while (foundobj->v_mountedhere != NULL &&
 1374                             (cnp->cn_flags & NOCROSSMOUNT) == 0 &&
 1375                             cache_cross_mount(&foundobj, &plock)) {
 1376                                 KASSERT(foundobj != NULL);
 1377                                 KASSERT(foundobj->v_type == VDIR);
 1378                         }
 1379                         if (foundobj->v_mountedhere != NULL) {
 1380                                 error = vcache_tryvget(foundobj);
 1381                                 if (error != 0) {
 1382                                         foundobj = NULL;
 1383                                         error = EOPNOTSUPP;
 1384                                 }
 1385                                 break;
 1386                         } else {
 1387                                 searchdir = NULL;
 1388                         }
 1389                 }
 1390 
 1391                 /*
 1392                  * Time to stop if we found the last component & traversed
 1393                  * all mounts.
 1394                  */
 1395                 if ((cnp->cn_flags & ISLASTCN) != 0) {
 1396                         error = vcache_tryvget(foundobj);
 1397                         if (error != 0) {
 1398                                 foundobj = NULL;
 1399                                 error = EOPNOTSUPP;
 1400                         } else {
 1401                                 terminal = (foundobj->v_type != VLNK);
 1402                         }
 1403                         break;
 1404                 }
 1405 
 1406                 /*
 1407                  * Otherwise, we're still in business.  Set the found VDIR
 1408                  * vnode as the search dir for the next component and
 1409                  * continue on to it.
 1410                  */
 1411                 cnp->cn_nameptr = ndp->ni_next;
 1412                 searchdir = foundobj;
 1413         }
 1414 
 1415         if (terminal) {
 1416                 /*
 1417                  * If we exited the loop above having successfully located
 1418                  * the last component with a zero error code, and it's not a
 1419                  * symbolic link, then the parent directory is not needed.
 1420                  * Release reference to the starting parent and make the
 1421                  * terminal parent disappear into thin air.
 1422                  */
 1423                 KASSERT(plock != NULL);
 1424                 rw_exit(plock);
 1425                 vrele(*searchdir_ret);
 1426                 *searchdir_ret = NULL;
 1427         } else if (searchdir != *searchdir_ret) {
 1428                 /*
 1429                  * Otherwise we need to return the parent.  If we ended up
 1430                  * with a new search dir, ref it before dropping the
 1431                  * namecache's lock.  The lock prevents both searchdir and
 1432                  * foundobj from disappearing.  If we can't ref the new
 1433                  * searchdir, we have a bit of a problem.  Roll back the
 1434                  * fastforward to the beginning and let lookup_once() take
 1435                  * care of it.
 1436                  */
 1437                 if (searchdir == NULL) {
 1438                         /*
 1439                          * It's possible for searchdir to be NULL in the
 1440                          * case of a root vnode being reclaimed while
 1441                          * trying to cross a mount.
 1442                          */
 1443                         error2 = EOPNOTSUPP;
 1444                 } else {
 1445                         error2 = vcache_tryvget(searchdir);
 1446                 }
 1447                 KASSERT(plock != NULL);
 1448                 rw_exit(plock);
 1449                 if (__predict_true(error2 == 0)) {
 1450                         /* Returning new searchdir, and maybe new foundobj. */
 1451                         vrele(*searchdir_ret);
 1452                         *searchdir_ret = searchdir;
 1453                 } else {
 1454                         /* Returning nothing. */
 1455                         if (foundobj != NULL) {
 1456                                 vrele(foundobj);
 1457                                 foundobj = NULL;
 1458                         }
 1459                         cnp->cn_nameptr = oldnameptr;
 1460                         ndp->ni_pathlen = oldpathlen;
 1461                         error = lookup_parsepath(state, *searchdir_ret);
 1462                         if (error == 0) {
 1463                                 error = EOPNOTSUPP;
 1464                         }
 1465                 }
 1466         } else if (plock != NULL) {
 1467                 /* Drop any namecache lock still held. */
 1468                 rw_exit(plock);
 1469         }
 1470 
 1471         KASSERT(error == 0 ? foundobj != NULL : foundobj == NULL);
 1472         *foundobj_ret = foundobj;
 1473         return error;
 1474 }
 1475 
 1476 //////////////////////////////
 1477 
 1478 /*
 1479  * Do a complete path search from a single root directory.
 1480  * (This is called up to twice if TRYEMULROOT is in effect.)
 1481  */
 1482 static int
 1483 namei_oneroot(struct namei_state *state,
 1484          int neverfollow, int inhibitmagic, int isnfsd)
 1485 {
 1486         struct nameidata *ndp = state->ndp;
 1487         struct componentname *cnp = state->cnp;
 1488         struct vnode *searchdir, *foundobj;
 1489         bool searchdir_locked = false;
 1490         int error;
 1491 
 1492         error = namei_start(state, isnfsd, &searchdir);
 1493         if (error) {
 1494                 ndp->ni_dvp = NULL;
 1495                 ndp->ni_vp = NULL;
 1496                 return error;
 1497         }
 1498         KASSERT(searchdir->v_type == VDIR);
 1499 
 1500         /*
 1501          * Setup: break out flag bits into variables.
 1502          */
 1503         state->docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
 1504         if (cnp->cn_nameiop == DELETE)
 1505                 state->docache = 0;
 1506         state->rdonly = cnp->cn_flags & RDONLY;
 1507 
 1508         /*
 1509          * Keep going until we run out of path components.
 1510          */
 1511         cnp->cn_nameptr = ndp->ni_pnbuf;
 1512 
 1513         /* drop leading slashes (already used them to choose startdir) */
 1514         while (cnp->cn_nameptr[0] == '/') {
 1515                 cnp->cn_nameptr++;
 1516                 ndp->ni_pathlen--;
 1517         }
 1518         /* was it just "/"? */
 1519         if (cnp->cn_nameptr[0] == '\0') {
 1520                 foundobj = searchdir;
 1521                 searchdir = NULL;
 1522                 cnp->cn_flags |= ISLASTCN;
 1523 
 1524                 /* bleh */
 1525                 goto skiploop;
 1526         }
 1527 
 1528         for (;;) {
 1529                 KASSERT(searchdir != NULL);
 1530                 KASSERT(!searchdir_locked);
 1531 
 1532                 /*
 1533                  * Parse out the first path name component that we need to
 1534                  * to consider.  While doing this, attempt to use the name
 1535                  * cache to fast-forward through as many "easy" to find
 1536                  * components of the path as possible.
 1537                  */
 1538                 error = lookup_fastforward(state, &searchdir, &foundobj);
 1539 
 1540                 /*
 1541                  * If we didn't get a good answer from the namecache, then
 1542                  * go directly to the file system.
 1543                  */
 1544                 if (error == EOPNOTSUPP) {
 1545                         error = lookup_once(state, searchdir, &searchdir,
 1546                             &foundobj, &searchdir_locked);
 1547                 }
 1548 
 1549                 /*
 1550                  * If the vnode we found is mounted on, then cross the mount
 1551                  * and get the root vnode in foundobj.  If this encounters
 1552                  * an error, it will dispose of foundobj, but searchdir is
 1553                  * untouched.
 1554                  */
 1555                 if (error == 0 && foundobj != NULL &&
 1556                     foundobj->v_type == VDIR &&
 1557                     foundobj->v_mountedhere != NULL &&
 1558                     (cnp->cn_flags & NOCROSSMOUNT) == 0) {
 1559                         error = lookup_crossmount(state, &searchdir,
 1560                             &foundobj, &searchdir_locked);
 1561                 }
 1562 
 1563                 if (error) {
 1564                         if (searchdir != NULL) {
 1565                                 if (searchdir_locked) {
 1566                                         searchdir_locked = false;
 1567                                         vput(searchdir);
 1568                                 } else {
 1569                                         vrele(searchdir);
 1570                                 }
 1571                         }
 1572                         ndp->ni_dvp = NULL;
 1573                         ndp->ni_vp = NULL;
 1574                         /*
 1575                          * Note that if we're doing TRYEMULROOT we can
 1576                          * retry with the normal root. Where this is
 1577                          * currently set matches previous practice,
 1578                          * but the previous practice didn't make much
 1579                          * sense and somebody should sit down and
 1580                          * figure out which cases should cause retry
 1581                          * and which shouldn't. XXX.
 1582                          */
 1583                         state->attempt_retry = 1;
 1584                         return (error);
 1585                 }
 1586 
 1587                 if (foundobj == NULL) {
 1588                         /*
 1589                          * Success with no object returned means we're
 1590                          * creating something and it isn't already
 1591                          * there. Break out of the main loop now so
 1592                          * the code below doesn't have to test for
 1593                          * foundobj == NULL.
 1594                          */
 1595                         /* lookup_once can't have dropped the searchdir */
 1596                         KASSERT(searchdir != NULL ||
 1597                             (cnp->cn_flags & ISLASTCN) != 0);
 1598                         break;
 1599                 }
 1600 
 1601                 /*
 1602                  * Check for symbolic link. If we've reached one,
 1603                  * follow it, unless we aren't supposed to. Back up
 1604                  * over any slashes that we skipped, as we will need
 1605                  * them again.
 1606                  */
 1607                 if (namei_atsymlink(state, foundobj)) {
 1608                         /* Don't need searchdir locked any more. */
 1609                         if (searchdir_locked) {
 1610                                 searchdir_locked = false;
 1611                                 VOP_UNLOCK(searchdir);
 1612                         }
 1613                         ndp->ni_pathlen += state->slashes;
 1614                         ndp->ni_next -= state->slashes;
 1615                         if (neverfollow) {
 1616                                 error = EINVAL;
 1617                         } else if (searchdir == NULL) {
 1618                                 /*
 1619                                  * dholland 20160410: lookup_once only
 1620                                  * drops searchdir if it crossed a
 1621                                  * mount point. Therefore, if we get
 1622                                  * here it means we crossed a mount
 1623                                  * point to a mounted filesystem whose
 1624                                  * root vnode is a symlink. In theory
 1625                                  * we could continue at this point by
 1626                                  * using the pre-crossing searchdir
 1627                                  * (e.g. just take out an extra
 1628                                  * reference on it before calling
 1629                                  * lookup_once so we still have it),
 1630                                  * but this will make an ugly mess and
 1631                                  * it should never happen in practice
 1632                                  * as only badly broken filesystems
 1633                                  * have non-directory root vnodes. (I
 1634                                  * have seen this sort of thing with
 1635                                  * NFS occasionally but even then it
 1636                                  * means something's badly wrong.)
 1637                                  */
 1638                                 error = ENOTDIR;
 1639                         } else {
 1640                                 /*
 1641                                  * dholland 20110410: if we're at a
 1642                                  * union mount it might make sense to
 1643                                  * use the top of the union stack here
 1644                                  * rather than the layer we found the
 1645                                  * symlink in. (FUTURE)
 1646                                  */
 1647                                 error = namei_follow(state, inhibitmagic,
 1648                                                      searchdir, foundobj,
 1649                                                      &searchdir);
 1650                         }
 1651                         if (error) {
 1652                                 KASSERT(searchdir != foundobj);
 1653                                 if (searchdir != NULL) {
 1654                                         vrele(searchdir);
 1655                                 }
 1656                                 vrele(foundobj);
 1657                                 ndp->ni_dvp = NULL;
 1658                                 ndp->ni_vp = NULL;
 1659                                 return error;
 1660                         }
 1661                         vrele(foundobj);
 1662                         foundobj = NULL;
 1663 
 1664                         /*
 1665                          * If we followed a symlink to `/' and there
 1666                          * are no more components after the symlink,
 1667                          * we're done with the loop and what we found
 1668                          * is the searchdir.
 1669                          */
 1670                         if (cnp->cn_nameptr[0] == '\0') {
 1671                                 KASSERT(searchdir != NULL);
 1672                                 foundobj = searchdir;
 1673                                 searchdir = NULL;
 1674                                 cnp->cn_flags |= ISLASTCN;
 1675                                 break;
 1676                         }
 1677 
 1678                         continue;
 1679                 }
 1680 
 1681                 /*
 1682                  * Not a symbolic link.
 1683                  *
 1684                  * Check for directory, if the component was
 1685                  * followed by a series of slashes.
 1686                  */
 1687                 if ((foundobj->v_type != VDIR) &&
 1688                     (cnp->cn_flags & REQUIREDIR)) {
 1689                         KASSERT(foundobj != searchdir);
 1690                         if (searchdir) {
 1691                                 if (searchdir_locked) {
 1692                                         searchdir_locked = false;
 1693                                         vput(searchdir);
 1694                                 } else {
 1695                                         vrele(searchdir);
 1696                                 }
 1697                         } else {
 1698                                 KASSERT(!searchdir_locked);
 1699                         }
 1700                         vrele(foundobj);
 1701                         ndp->ni_dvp = NULL;
 1702                         ndp->ni_vp = NULL;
 1703                         state->attempt_retry = 1;
 1704                         return ENOTDIR;
 1705                 }
 1706 
 1707                 /*
 1708                  * Stop if we've reached the last component.
 1709                  */
 1710                 if (cnp->cn_flags & ISLASTCN) {
 1711                         break;
 1712                 }
 1713 
 1714                 /*
 1715                  * Continue with the next component.
 1716                  */
 1717                 cnp->cn_nameptr = ndp->ni_next;
 1718                 if (searchdir != NULL) {
 1719                         if (searchdir_locked) {
 1720                                 searchdir_locked = false;
 1721                                 vput(searchdir);
 1722                         } else {
 1723                                 vrele(searchdir);
 1724                         }
 1725                 }
 1726                 searchdir = foundobj;
 1727                 foundobj = NULL;
 1728         }
 1729 
 1730         KASSERT((cnp->cn_flags & LOCKPARENT) == 0 || searchdir == NULL ||
 1731             VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE);
 1732 
 1733  skiploop:
 1734 
 1735         if (foundobj != NULL) {
 1736                 if (foundobj == ndp->ni_erootdir) {
 1737                         /*
 1738                          * We are about to return the emulation root.
 1739                          * This isn't a good idea because code might
 1740                          * repeatedly lookup ".." until the file
 1741                          * matches that returned for "/" and loop
 1742                          * forever.  So convert it to the real root.
 1743                          */
 1744                         if (searchdir != NULL) {
 1745                                 if (searchdir_locked) {
 1746                                         vput(searchdir);
 1747                                         searchdir_locked = false;
 1748                                 } else {
 1749                                         vrele(searchdir);
 1750                                 }
 1751                                 searchdir = NULL;
 1752                         }
 1753                         vrele(foundobj);
 1754                         foundobj = ndp->ni_rootdir;
 1755                         vref(foundobj);
 1756                 }
 1757 
 1758                 /*
 1759                  * If the caller requested the parent node (i.e. it's
 1760                  * a CREATE, DELETE, or RENAME), and we don't have one
 1761                  * (because this is the root directory, or we crossed
 1762                  * a mount point), then we must fail.
 1763                  *
 1764                  * 20210604 dholland when NONEXCLHACK is set (open
 1765                  * with O_CREAT but not O_EXCL) skip this logic. Since
 1766                  * we have a foundobj, open will not be creating, so
 1767                  * it doesn't actually need or use the searchdir, so
 1768                  * it's ok to return it even if it's on a different
 1769                  * volume, and it's also ok to return NULL; by setting
 1770                  * NONEXCLHACK the open code promises to cope with
 1771                  * those cases correctly. (That is, it should do what
 1772                  * it would do anyway, that is, just release the
 1773                  * searchdir, except not crash if it's null.) This is
 1774                  * needed because otherwise opening mountpoints with
 1775                  * O_CREAT but not O_EXCL fails... which is a silly
 1776                  * thing to do but ought to work. (This whole issue
 1777                  * came to light because 3rd party code wanted to open
 1778                  * certain procfs nodes with O_CREAT for some 3rd
 1779                  * party reason, and it failed.)
 1780                  *
 1781                  * Note that NONEXCLHACK is properly a different
 1782                  * nameiop (it is partway between LOOKUP and CREATE)
 1783                  * but it was stuffed in as a flag instead to make the
 1784                  * resulting patch less invasive for pullup. Blah.
 1785                  */
 1786                 if (cnp->cn_nameiop != LOOKUP &&
 1787                     (searchdir == NULL ||
 1788                      searchdir->v_mount != foundobj->v_mount) &&
 1789                     (cnp->cn_flags & NONEXCLHACK) == 0) {
 1790                         if (searchdir) {
 1791                                 if (searchdir_locked) {
 1792                                         vput(searchdir);
 1793                                         searchdir_locked = false;
 1794                                 } else {
 1795                                         vrele(searchdir);
 1796                                 }
 1797                                 searchdir = NULL;
 1798                         }
 1799                         vrele(foundobj);
 1800                         foundobj = NULL;
 1801                         ndp->ni_dvp = NULL;
 1802                         ndp->ni_vp = NULL;
 1803                         state->attempt_retry = 1;
 1804 
 1805                         switch (cnp->cn_nameiop) {
 1806                             case CREATE:
 1807                                 return EEXIST;
 1808                             case DELETE:
 1809                             case RENAME:
 1810                                 return EBUSY;
 1811                             default:
 1812                                 break;
 1813                         }
 1814                         panic("Invalid nameiop\n");
 1815                 }
 1816 
 1817                 /*
 1818                  * Disallow directory write attempts on read-only lookups.
 1819                  * Prefers EEXIST over EROFS for the CREATE case.
 1820                  */
 1821                 if (state->rdonly &&
 1822                     (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
 1823                         if (searchdir) {
 1824                                 if (searchdir_locked) {
 1825                                         vput(searchdir);
 1826                                         searchdir_locked = false;
 1827                                 } else {
 1828                                         vrele(searchdir);
 1829                                 }
 1830                                 searchdir = NULL;
 1831                         }
 1832                         vrele(foundobj);
 1833                         foundobj = NULL;
 1834                         ndp->ni_dvp = NULL;
 1835                         ndp->ni_vp = NULL;
 1836                         state->attempt_retry = 1;
 1837                         return EROFS;
 1838                 }
 1839 
 1840                 /* Lock the leaf node if requested. */
 1841                 if ((cnp->cn_flags & (LOCKLEAF | LOCKPARENT)) == LOCKPARENT &&
 1842                     searchdir == foundobj) {
 1843                         /*
 1844                          * Note: if LOCKPARENT but not LOCKLEAF is
 1845                          * set, and searchdir == foundobj, this code
 1846                          * necessarily unlocks the parent as well as
 1847                          * the leaf. That is, just because you specify
 1848                          * LOCKPARENT doesn't mean you necessarily get
 1849                          * a locked parent vnode. The code in
 1850                          * vfs_syscalls.c, and possibly elsewhere,
 1851                          * that uses this combination "knows" this, so
 1852                          * it can't be safely changed. Feh. XXX
 1853                          */
 1854                         KASSERT(searchdir_locked);
 1855                         VOP_UNLOCK(searchdir);
 1856                         searchdir_locked = false;
 1857                 } else if ((cnp->cn_flags & LOCKLEAF) != 0 &&
 1858                     (searchdir != foundobj ||
 1859                     (cnp->cn_flags & LOCKPARENT) == 0)) {
 1860                         const int lktype = (cnp->cn_flags & LOCKSHARED) != 0 ?
 1861                             LK_SHARED : LK_EXCLUSIVE;
 1862                         vn_lock(foundobj, lktype | LK_RETRY);
 1863                 }
 1864         }
 1865 
 1866         /*
 1867          * Done.
 1868          */
 1869 
 1870         /*
 1871          * If LOCKPARENT is not set, the parent directory isn't returned.
 1872          */
 1873         if ((cnp->cn_flags & LOCKPARENT) == 0 && searchdir != NULL) {
 1874                 vrele(searchdir);
 1875                 searchdir = NULL;
 1876         }
 1877 
 1878         ndp->ni_dvp = searchdir;
 1879         ndp->ni_vp = foundobj;
 1880         return 0;
 1881 }
 1882 
 1883 /*
 1884  * Do namei; wrapper layer that handles TRYEMULROOT.
 1885  */
 1886 static int
 1887 namei_tryemulroot(struct namei_state *state,
 1888          int neverfollow, int inhibitmagic, int isnfsd)
 1889 {
 1890         int error;
 1891 
 1892         struct nameidata *ndp = state->ndp;
 1893         struct componentname *cnp = state->cnp;
 1894         const char *savepath = NULL;
 1895 
 1896         KASSERT(cnp == &ndp->ni_cnd);
 1897 
 1898         if (cnp->cn_flags & TRYEMULROOT) {
 1899                 savepath = pathbuf_stringcopy_get(ndp->ni_pathbuf);
 1900         }
 1901 
 1902     emul_retry:
 1903         state->attempt_retry = 0;
 1904 
 1905         error = namei_oneroot(state, neverfollow, inhibitmagic, isnfsd);
 1906         if (error) {
 1907                 /*
 1908                  * Once namei has started up, the existence of ni_erootdir
 1909                  * tells us whether we're working from an emulation root.
 1910                  * The TRYEMULROOT flag isn't necessarily authoritative.
 1911                  */
 1912                 if (ndp->ni_erootdir != NULL && state->attempt_retry) {
 1913                         /* Retry the whole thing using the normal root */
 1914                         cnp->cn_flags &= ~TRYEMULROOT;
 1915                         state->attempt_retry = 0;
 1916 
 1917                         /* kinda gross */
 1918                         strcpy(ndp->ni_pathbuf->pb_path, savepath);
 1919                         pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath);
 1920                         savepath = NULL;
 1921 
 1922                         goto emul_retry;
 1923                 }
 1924         }
 1925         if (savepath != NULL) {
 1926                 pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath);
 1927         }
 1928         return error;
 1929 }
 1930 
 1931 /*
 1932  * External interface.
 1933  */
 1934 int
 1935 namei(struct nameidata *ndp)
 1936 {
 1937         struct namei_state state;
 1938         int error;
 1939 
 1940         namei_init(&state, ndp);
 1941         error = namei_tryemulroot(&state,
 1942                                   0/*!neverfollow*/, 0/*!inhibitmagic*/,
 1943                                   0/*isnfsd*/);
 1944         namei_cleanup(&state);
 1945 
 1946         if (error) {
 1947                 /* make sure no stray refs leak out */
 1948                 KASSERT(ndp->ni_dvp == NULL);
 1949                 KASSERT(ndp->ni_vp == NULL);
 1950         }
 1951 
 1952         return error;
 1953 }
 1954 
 1955 ////////////////////////////////////////////////////////////
 1956 
 1957 /*
 1958  * External interface used by nfsd. This is basically different from
 1959  * namei only in that it has the ability to pass in the "current
 1960  * directory", and uses an extra flag "neverfollow" for which there's
 1961  * no physical flag defined in namei.h. (There used to be a cut&paste
 1962  * copy of about half of namei in nfsd to allow these minor
 1963  * adjustments to exist.)
 1964  *
 1965  * XXX: the namei interface should be adjusted so nfsd can just use
 1966  * ordinary namei().
 1967  */
 1968 int
 1969 lookup_for_nfsd(struct nameidata *ndp, struct vnode *forcecwd, int neverfollow)
 1970 {
 1971         struct namei_state state;
 1972         int error;
 1973 
 1974         KASSERT(ndp->ni_atdir == NULL);
 1975         ndp->ni_atdir = forcecwd;
 1976 
 1977         namei_init(&state, ndp);
 1978         error = namei_tryemulroot(&state,
 1979                                   neverfollow, 1/*inhibitmagic*/, 1/*isnfsd*/);
 1980         namei_cleanup(&state);
 1981 
 1982         if (error) {
 1983                 /* make sure no stray refs leak out */
 1984                 KASSERT(ndp->ni_dvp == NULL);
 1985                 KASSERT(ndp->ni_vp == NULL);
 1986         }
 1987 
 1988         return error;
 1989 }
 1990 
 1991 /*
 1992  * A second external interface used by nfsd. This turns out to be a
 1993  * single lookup used by the WebNFS code (ha!) to get "index.html" or
 1994  * equivalent when asked for a directory. It should eventually evolve
 1995  * into some kind of namei_once() call; for the time being it's kind
 1996  * of a mess. XXX.
 1997  *
 1998  * dholland 20110109: I don't think it works, and I don't think it
 1999  * worked before I started hacking and slashing either, and I doubt
 2000  * anyone will ever notice.
 2001  */
 2002 
 2003 /*
 2004  * Internals. This calls lookup_once() after setting up the assorted
 2005  * pieces of state the way they ought to be.
 2006  */
 2007 static int
 2008 do_lookup_for_nfsd_index(struct namei_state *state)
 2009 {
 2010         int error;
 2011 
 2012         struct componentname *cnp = state->cnp;
 2013         struct nameidata *ndp = state->ndp;
 2014         struct vnode *startdir;
 2015         struct vnode *foundobj;
 2016         bool startdir_locked;
 2017         const char *cp;                 /* pointer into pathname argument */
 2018 
 2019         KASSERT(cnp == &ndp->ni_cnd);
 2020 
 2021         startdir = state->ndp->ni_atdir;
 2022 
 2023         cnp->cn_nameptr = ndp->ni_pnbuf;
 2024         state->docache = 1;
 2025         state->rdonly = cnp->cn_flags & RDONLY;
 2026         ndp->ni_dvp = NULL;
 2027 
 2028         error = VOP_PARSEPATH(startdir, cnp->cn_nameptr, &cnp->cn_namelen);
 2029         if (error) {
 2030                 return error;
 2031         }
 2032 
 2033         cp = cnp->cn_nameptr + cnp->cn_namelen;
 2034         KASSERT(cnp->cn_namelen <= KERNEL_NAME_MAX);
 2035         ndp->ni_pathlen -= cnp->cn_namelen;
 2036         ndp->ni_next = cp;
 2037         state->slashes = 0;
 2038         cnp->cn_flags &= ~REQUIREDIR;
 2039         cnp->cn_flags |= MAKEENTRY|ISLASTCN;
 2040 
 2041         if (cnp->cn_namelen == 2 &&
 2042             cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
 2043                 cnp->cn_flags |= ISDOTDOT;
 2044         else
 2045                 cnp->cn_flags &= ~ISDOTDOT;
 2046 
 2047         /*
 2048          * Because lookup_once can change the startdir, we need our
 2049          * own reference to it to avoid consuming the caller's.
 2050          */
 2051         vref(startdir);
 2052         error = lookup_once(state, startdir, &startdir, &foundobj,
 2053             &startdir_locked);
 2054 
 2055         KASSERT((cnp->cn_flags & LOCKPARENT) == 0);
 2056         if (startdir_locked) {
 2057                 VOP_UNLOCK(startdir);
 2058                 startdir_locked = false;
 2059         }
 2060 
 2061         /*
 2062          * If the vnode we found is mounted on, then cross the mount and get
 2063          * the root vnode in foundobj.  If this encounters an error, it will
 2064          * dispose of foundobj, but searchdir is untouched.
 2065          */
 2066         if (error == 0 && foundobj != NULL &&
 2067             foundobj->v_type == VDIR &&
 2068             foundobj->v_mountedhere != NULL &&
 2069             (cnp->cn_flags & NOCROSSMOUNT) == 0) {
 2070                 error = lookup_crossmount(state, &startdir, &foundobj,
 2071                     &startdir_locked);
 2072         }
 2073 
 2074         /* Now toss startdir and see if we have an error. */
 2075         if (startdir != NULL)
 2076                 vrele(startdir);
 2077         if (error)
 2078                 foundobj = NULL;
 2079         else if (foundobj != NULL && (cnp->cn_flags & LOCKLEAF) != 0)
 2080                 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY);
 2081 
 2082         ndp->ni_vp = foundobj;
 2083         return (error);
 2084 }
 2085 
 2086 /*
 2087  * External interface. The partitioning between this function and the
 2088  * above isn't very clear - the above function exists mostly so code
 2089  * that uses "state->" can be shuffled around without having to change
 2090  * it to "state.".
 2091  */
 2092 int
 2093 lookup_for_nfsd_index(struct nameidata *ndp, struct vnode *startdir)
 2094 {
 2095         struct namei_state state;
 2096         int error;
 2097 
 2098         KASSERT(ndp->ni_atdir == NULL);
 2099         ndp->ni_atdir = startdir;
 2100 
 2101         /*
 2102          * Note: the name sent in here (is not|should not be) allowed
 2103          * to contain a slash.
 2104          */
 2105         if (strlen(ndp->ni_pathbuf->pb_path) > KERNEL_NAME_MAX) {
 2106                 return ENAMETOOLONG;
 2107         }
 2108         if (strchr(ndp->ni_pathbuf->pb_path, '/')) {
 2109                 return EINVAL;
 2110         }
 2111 
 2112         ndp->ni_pathlen = strlen(ndp->ni_pathbuf->pb_path) + 1;
 2113         ndp->ni_pnbuf = NULL;
 2114         ndp->ni_cnd.cn_nameptr = NULL;
 2115 
 2116         namei_init(&state, ndp);
 2117         error = do_lookup_for_nfsd_index(&state);
 2118         namei_cleanup(&state);
 2119 
 2120         return error;
 2121 }
 2122 
 2123 ////////////////////////////////////////////////////////////
 2124 
 2125 /*
 2126  * Reacquire a path name component.
 2127  * dvp is locked on entry and exit.
 2128  * *vpp is locked on exit unless it's NULL.
 2129  */
 2130 int
 2131 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int dummy)
 2132 {
 2133         int rdonly;                     /* lookup read-only flag bit */
 2134         int error = 0;
 2135 #ifdef DEBUG
 2136         size_t newlen;                  /* DEBUG: check name len */
 2137         const char *cp;                 /* DEBUG: check name ptr */
 2138 #endif /* DEBUG */
 2139 
 2140         (void)dummy;
 2141 
 2142         /*
 2143          * Setup: break out flag bits into variables.
 2144          */
 2145         rdonly = cnp->cn_flags & RDONLY;
 2146 
 2147         /*
 2148          * Search a new directory.
 2149          *
 2150          * The cn_hash value is for use by vfs_cache.
 2151          * The last component of the filename is left accessible via
 2152          * cnp->cn_nameptr for callers that need the name. Callers needing
 2153          * the name set the SAVENAME flag. When done, they assume
 2154          * responsibility for freeing the pathname buffer.
 2155          */
 2156 #ifdef DEBUG
 2157 #if 0
 2158         cp = NULL;
 2159         newhash = namei_hash(cnp->cn_nameptr, &cp);
 2160         if ((uint32_t)newhash != (uint32_t)cnp->cn_hash)
 2161                 panic("relookup: bad hash");
 2162 #endif
 2163         error = VOP_PARSEPATH(dvp, cnp->cn_nameptr, &newlen);
 2164         if (error) {
 2165                 panic("relookup: parsepath failed with error %d", error);
 2166         }
 2167         if (cnp->cn_namelen != newlen)
 2168                 panic("relookup: bad len");
 2169         cp = cnp->cn_nameptr + cnp->cn_namelen;
 2170         while (*cp == '/')
 2171                 cp++;
 2172         if (*cp != 0)
 2173                 panic("relookup: not last component");
 2174 #endif /* DEBUG */
 2175 
 2176         /*
 2177          * Check for degenerate name (e.g. / or "")
 2178          * which is a way of talking about a directory,
 2179          * e.g. like "/." or ".".
 2180          */
 2181         if (cnp->cn_nameptr[0] == '\0')
 2182                 panic("relookup: null name");
 2183 
 2184         if (cnp->cn_flags & ISDOTDOT)
 2185                 panic("relookup: lookup on dot-dot");
 2186 
 2187         /*
 2188          * We now have a segment name to search for, and a directory to search.
 2189          */
 2190         *vpp = NULL;
 2191         error = VOP_LOOKUP(dvp, vpp, cnp);
 2192         if ((error) != 0) {
 2193                 KASSERTMSG((*vpp == NULL),
 2194                     "leaf `%s' should be empty but is %p",
 2195                     cnp->cn_nameptr, *vpp);
 2196                 if (error != EJUSTRETURN)
 2197                         goto bad;
 2198         }
 2199 
 2200         /*
 2201          * Check for symbolic link
 2202          */
 2203         KASSERTMSG((*vpp == NULL || (*vpp)->v_type != VLNK ||
 2204                 (cnp->cn_flags & FOLLOW) == 0),
 2205             "relookup: symlink found");
 2206 
 2207         /*
 2208          * Check for read-only lookups.
 2209          */
 2210         if (rdonly && cnp->cn_nameiop != LOOKUP) {
 2211                 error = EROFS;
 2212                 if (*vpp) {
 2213                         vrele(*vpp);
 2214                 }
 2215                 goto bad;
 2216         }
 2217         /*
 2218          * Lock result.
 2219          */
 2220         if (*vpp && *vpp != dvp) {
 2221                 error = vn_lock(*vpp, LK_EXCLUSIVE);
 2222                 if (error != 0) {
 2223                         vrele(*vpp);
 2224                         goto bad;
 2225                 }
 2226         }
 2227         return (0);
 2228 
 2229 bad:
 2230         *vpp = NULL;
 2231         return (error);
 2232 }
 2233 
 2234 /*
 2235  * namei_simple - simple forms of namei.
 2236  *
 2237  * These are wrappers to allow the simple case callers of namei to be
 2238  * left alone while everything else changes under them.
 2239  */
 2240 
 2241 /* Flags */
 2242 struct namei_simple_flags_type {
 2243         int dummy;
 2244 };
 2245 static const struct namei_simple_flags_type ns_nn, ns_nt, ns_fn, ns_ft;
 2246 const namei_simple_flags_t NSM_NOFOLLOW_NOEMULROOT = &ns_nn;
 2247 const namei_simple_flags_t NSM_NOFOLLOW_TRYEMULROOT = &ns_nt;
 2248 const namei_simple_flags_t NSM_FOLLOW_NOEMULROOT = &ns_fn;
 2249 const namei_simple_flags_t NSM_FOLLOW_TRYEMULROOT = &ns_ft;
 2250 
 2251 static
 2252 int
 2253 namei_simple_convert_flags(namei_simple_flags_t sflags)
 2254 {
 2255         if (sflags == NSM_NOFOLLOW_NOEMULROOT)
 2256                 return NOFOLLOW | 0;
 2257         if (sflags == NSM_NOFOLLOW_TRYEMULROOT)
 2258                 return NOFOLLOW | TRYEMULROOT;
 2259         if (sflags == NSM_FOLLOW_NOEMULROOT)
 2260                 return FOLLOW | 0;
 2261         if (sflags == NSM_FOLLOW_TRYEMULROOT)
 2262                 return FOLLOW | TRYEMULROOT;
 2263         panic("namei_simple_convert_flags: bogus sflags\n");
 2264         return 0;
 2265 }
 2266 
 2267 int
 2268 namei_simple_kernel(const char *path, namei_simple_flags_t sflags,
 2269         struct vnode **vp_ret)
 2270 {
 2271         return nameiat_simple_kernel(NULL, path, sflags, vp_ret);
 2272 }
 2273 
 2274 int
 2275 nameiat_simple_kernel(struct vnode *dvp, const char *path,
 2276         namei_simple_flags_t sflags, struct vnode **vp_ret)
 2277 {
 2278         struct nameidata nd;
 2279         struct pathbuf *pb;
 2280         int err;
 2281 
 2282         pb = pathbuf_create(path);
 2283         if (pb == NULL) {
 2284                 return ENOMEM;
 2285         }
 2286 
 2287         NDINIT(&nd,
 2288                 LOOKUP,
 2289                 namei_simple_convert_flags(sflags),
 2290                 pb);
 2291 
 2292         if (dvp != NULL)
 2293                 NDAT(&nd, dvp);
 2294 
 2295         err = namei(&nd);
 2296         if (err != 0) {
 2297                 pathbuf_destroy(pb);
 2298                 return err;
 2299         }
 2300         *vp_ret = nd.ni_vp;
 2301         pathbuf_destroy(pb);
 2302         return 0;
 2303 }
 2304 
 2305 int
 2306 namei_simple_user(const char *path, namei_simple_flags_t sflags,
 2307         struct vnode **vp_ret)
 2308 {
 2309         return nameiat_simple_user(NULL, path, sflags, vp_ret);
 2310 }
 2311 
 2312 int
 2313 nameiat_simple_user(struct vnode *dvp, const char *path,
 2314         namei_simple_flags_t sflags, struct vnode **vp_ret)
 2315 {
 2316         struct pathbuf *pb;
 2317         struct nameidata nd;
 2318         int err;
 2319 
 2320         err = pathbuf_copyin(path, &pb);
 2321         if (err) {
 2322                 return err;
 2323         }
 2324 
 2325         NDINIT(&nd,
 2326                 LOOKUP,
 2327                 namei_simple_convert_flags(sflags),
 2328                 pb);
 2329 
 2330         if (dvp != NULL)
 2331                 NDAT(&nd, dvp);
 2332 
 2333         err = namei(&nd);
 2334         if (err != 0) {
 2335                 pathbuf_destroy(pb);
 2336                 return err;
 2337         }
 2338         *vp_ret = nd.ni_vp;
 2339         pathbuf_destroy(pb);
 2340         return 0;
 2341 }

Cache object: 7b552df82c88e2fd5c0b38d52d9479e5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.