The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_exec.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: kern_exec.c,v 1.280.4.3 2009/04/01 21:03:04 snj Exp $  */
    2 
    3 /*-
    4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   26  * POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 /*-
   30  * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
   31  * Copyright (C) 1992 Wolfgang Solfrank.
   32  * Copyright (C) 1992 TooLs GmbH.
   33  * All rights reserved.
   34  *
   35  * Redistribution and use in source and binary forms, with or without
   36  * modification, are permitted provided that the following conditions
   37  * are met:
   38  * 1. Redistributions of source code must retain the above copyright
   39  *    notice, this list of conditions and the following disclaimer.
   40  * 2. Redistributions in binary form must reproduce the above copyright
   41  *    notice, this list of conditions and the following disclaimer in the
   42  *    documentation and/or other materials provided with the distribution.
   43  * 3. All advertising materials mentioning features or use of this software
   44  *    must display the following acknowledgement:
   45  *      This product includes software developed by TooLs GmbH.
   46  * 4. The name of TooLs GmbH may not be used to endorse or promote products
   47  *    derived from this software without specific prior written permission.
   48  *
   49  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
   50  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   51  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   52  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   53  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   54  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
   55  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
   56  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
   57  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
   58  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   59  */
   60 
   61 #include <sys/cdefs.h>
   62 __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.280.4.3 2009/04/01 21:03:04 snj Exp $");
   63 
   64 #include "opt_ktrace.h"
   65 #include "opt_syscall_debug.h"
   66 #include "opt_compat_netbsd.h"
   67 #include "veriexec.h"
   68 #include "opt_pax.h"
   69 #include "opt_sa.h"
   70 
   71 #include <sys/param.h>
   72 #include <sys/systm.h>
   73 #include <sys/filedesc.h>
   74 #include <sys/kernel.h>
   75 #include <sys/proc.h>
   76 #include <sys/mount.h>
   77 #include <sys/malloc.h>
   78 #include <sys/kmem.h>
   79 #include <sys/namei.h>
   80 #include <sys/vnode.h>
   81 #include <sys/file.h>
   82 #include <sys/acct.h>
   83 #include <sys/exec.h>
   84 #include <sys/ktrace.h>
   85 #include <sys/uidinfo.h>
   86 #include <sys/wait.h>
   87 #include <sys/mman.h>
   88 #include <sys/ras.h>
   89 #include <sys/signalvar.h>
   90 #include <sys/stat.h>
   91 #include <sys/syscall.h>
   92 #include <sys/kauth.h>
   93 #include <sys/lwpctl.h>
   94 #include <sys/pax.h>
   95 #include <sys/cpu.h>
   96 
   97 #include <sys/sa.h>
   98 #include <sys/savar.h>
   99 #include <sys/syscallargs.h>
  100 #if NVERIEXEC > 0
  101 #include <sys/verified_exec.h>
  102 #endif /* NVERIEXEC > 0 */
  103 
  104 #include <uvm/uvm_extern.h>
  105 
  106 #include <machine/reg.h>
  107 
  108 #include <compat/common/compat_util.h>
  109 
  110 static int exec_sigcode_map(struct proc *, const struct emul *);
  111 
  112 #ifdef DEBUG_EXEC
  113 #define DPRINTF(a) uprintf a
  114 #else
  115 #define DPRINTF(a)
  116 #endif /* DEBUG_EXEC */
  117 
  118 /*
  119  * Exec function switch:
  120  *
  121  * Note that each makecmds function is responsible for loading the
  122  * exec package with the necessary functions for any exec-type-specific
  123  * handling.
  124  *
  125  * Functions for specific exec types should be defined in their own
  126  * header file.
  127  */
  128 extern const struct execsw      execsw_builtin[];
  129 extern int                      nexecs_builtin;
  130 static const struct execsw      **execsw = NULL;
  131 static int                      nexecs;
  132 
  133 u_int   exec_maxhdrsz;          /* must not be static - netbsd32 needs it */
  134 
  135 #ifdef LKM
  136 /* list of supported emulations */
  137 static
  138 LIST_HEAD(emlist_head, emul_entry) el_head = LIST_HEAD_INITIALIZER(el_head);
  139 struct emul_entry {
  140         LIST_ENTRY(emul_entry)  el_list;
  141         const struct emul       *el_emul;
  142         int                     ro_entry;
  143 };
  144 
  145 /* list of dynamically loaded execsw entries */
  146 static
  147 LIST_HEAD(execlist_head, exec_entry) ex_head = LIST_HEAD_INITIALIZER(ex_head);
  148 struct exec_entry {
  149         LIST_ENTRY(exec_entry)  ex_list;
  150         const struct execsw     *es;
  151 };
  152 
  153 /* structure used for building execw[] */
  154 struct execsw_entry {
  155         struct execsw_entry     *next;
  156         const struct execsw     *es;
  157 };
  158 #endif /* LKM */
  159 
  160 #ifdef SYSCALL_DEBUG
  161 extern const char * const syscallnames[];
  162 #endif
  163 
  164 #ifdef COMPAT_16
  165 extern char     sigcode[], esigcode[];
  166 struct uvm_object *emul_netbsd_object;
  167 #endif
  168 
  169 #ifndef __HAVE_SYSCALL_INTERN
  170 void    syscall(void);
  171 #endif
  172 
  173 #ifdef KERN_SA
  174 static const struct sa_emul saemul_netbsd = {
  175         sizeof(ucontext_t),
  176         sizeof(struct sa_t),
  177         sizeof(struct sa_t *),
  178         NULL,
  179         NULL,
  180         cpu_upcall,
  181         (void (*)(struct lwp *, void *))getucontext_sa,
  182         sa_ucsp
  183 };
  184 #endif /* KERN_SA */
  185 
  186 /* NetBSD emul struct */
  187 const struct emul emul_netbsd = {
  188         "netbsd",
  189         NULL,           /* emulation path */
  190 #ifndef __HAVE_MINIMAL_EMUL
  191         EMUL_HAS_SYS___syscall,
  192         NULL,
  193         SYS_syscall,
  194         SYS_NSYSENT,
  195 #endif
  196         sysent,
  197 #ifdef SYSCALL_DEBUG
  198         syscallnames,
  199 #else
  200         NULL,
  201 #endif
  202         sendsig,
  203         trapsignal,
  204         NULL,
  205 #ifdef COMPAT_16
  206         sigcode,
  207         esigcode,
  208         &emul_netbsd_object,
  209 #else
  210         NULL,
  211         NULL,
  212         NULL,
  213 #endif
  214         setregs,
  215         NULL,
  216         NULL,
  217         NULL,
  218         NULL,
  219         NULL,
  220 #ifdef __HAVE_SYSCALL_INTERN
  221         syscall_intern,
  222 #else
  223         syscall,
  224 #endif
  225         NULL,
  226         NULL,
  227 
  228         uvm_default_mapaddr,
  229         NULL,
  230 #ifdef KERN_SA
  231         &saemul_netbsd,
  232 #else
  233         NULL,
  234 #endif
  235         sizeof(ucontext_t),
  236         startlwp,
  237 };
  238 
  239 /*
  240  * Exec lock. Used to control access to execsw[] structures.
  241  * This must not be static so that netbsd32 can access it, too.
  242  */
  243 krwlock_t exec_lock;
  244 
  245 #ifdef LKM
  246 static void link_es(struct execsw_entry **, const struct execsw *);
  247 #endif /* LKM */
  248 
  249 static kmutex_t sigobject_lock;
  250 
  251 static void *
  252 exec_pool_alloc(struct pool *pp, int flags)
  253 {
  254 
  255         return (void *)uvm_km_alloc(kernel_map, NCARGS, 0,
  256             UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
  257 }
  258 
  259 static void
  260 exec_pool_free(struct pool *pp, void *addr)
  261 {
  262 
  263         uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
  264 }
  265 
  266 static struct pool exec_pool;
  267 
  268 static struct pool_allocator exec_palloc = {
  269         .pa_alloc = exec_pool_alloc,
  270         .pa_free = exec_pool_free,
  271         .pa_pagesz = NCARGS
  272 };
  273 
  274 /*
  275  * check exec:
  276  * given an "executable" described in the exec package's namei info,
  277  * see what we can do with it.
  278  *
  279  * ON ENTRY:
  280  *      exec package with appropriate namei info
  281  *      lwp pointer of exec'ing lwp
  282  *      NO SELF-LOCKED VNODES
  283  *
  284  * ON EXIT:
  285  *      error:  nothing held, etc.  exec header still allocated.
  286  *      ok:     filled exec package, executable's vnode (unlocked).
  287  *
  288  * EXEC SWITCH ENTRY:
  289  *      Locked vnode to check, exec package, proc.
  290  *
  291  * EXEC SWITCH EXIT:
  292  *      ok:     return 0, filled exec package, executable's vnode (unlocked).
  293  *      error:  destructive:
  294  *                      everything deallocated execept exec header.
  295  *              non-destructive:
  296  *                      error code, executable's vnode (unlocked),
  297  *                      exec header unmodified.
  298  */
  299 int
  300 /*ARGSUSED*/
  301 check_exec(struct lwp *l, struct exec_package *epp)
  302 {
  303         int             error, i;
  304         struct vnode    *vp;
  305         struct nameidata *ndp;
  306         size_t          resid;
  307 
  308         ndp = epp->ep_ndp;
  309         ndp->ni_cnd.cn_nameiop = LOOKUP;
  310         ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF | SAVENAME | TRYEMULROOT;
  311         /* first get the vnode */
  312         if ((error = namei(ndp)) != 0)
  313                 return error;
  314         epp->ep_vp = vp = ndp->ni_vp;
  315 
  316         /* check access and type */
  317         if (vp->v_type != VREG) {
  318                 error = EACCES;
  319                 goto bad1;
  320         }
  321         if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
  322                 goto bad1;
  323 
  324         /* get attributes */
  325         if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
  326                 goto bad1;
  327 
  328         /* Check mount point */
  329         if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
  330                 error = EACCES;
  331                 goto bad1;
  332         }
  333         if (vp->v_mount->mnt_flag & MNT_NOSUID)
  334                 epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
  335 
  336         /* try to open it */
  337         if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
  338                 goto bad1;
  339 
  340         /* unlock vp, since we need it unlocked from here on out. */
  341         VOP_UNLOCK(vp, 0);
  342 
  343 #if NVERIEXEC > 0
  344         error = veriexec_verify(l, vp, ndp->ni_cnd.cn_pnbuf,
  345             epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
  346             NULL);
  347         if (error)
  348                 goto bad2;
  349 #endif /* NVERIEXEC > 0 */
  350 
  351 #ifdef PAX_SEGVGUARD
  352         error = pax_segvguard(l, vp, ndp->ni_cnd.cn_pnbuf, false);
  353         if (error)
  354                 goto bad2;
  355 #endif /* PAX_SEGVGUARD */
  356 
  357         /* now we have the file, get the exec header */
  358         error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
  359                         UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
  360         if (error)
  361                 goto bad2;
  362         epp->ep_hdrvalid = epp->ep_hdrlen - resid;
  363 
  364         /*
  365          * Set up default address space limits.  Can be overridden
  366          * by individual exec packages.
  367          *
  368          * XXX probably should be all done in the exec packages.
  369          */
  370         epp->ep_vm_minaddr = VM_MIN_ADDRESS;
  371         epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
  372         /*
  373          * set up the vmcmds for creation of the process
  374          * address space
  375          */
  376         error = ENOEXEC;
  377         for (i = 0; i < nexecs; i++) {
  378                 int newerror;
  379 
  380                 epp->ep_esch = execsw[i];
  381                 newerror = (*execsw[i]->es_makecmds)(l, epp);
  382 
  383                 if (!newerror) {
  384                         /* Seems ok: check that entry point is sane */
  385                         if (epp->ep_entry > VM_MAXUSER_ADDRESS) {
  386                                 error = ENOEXEC;
  387                                 break;
  388                         }
  389 
  390                         /* check limits */
  391                         if ((epp->ep_tsize > MAXTSIZ) ||
  392                             (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
  393                                                     [RLIMIT_DATA].rlim_cur)) {
  394                                 error = ENOMEM;
  395                                 break;
  396                         }
  397                         return 0;
  398                 }
  399 
  400                 if (epp->ep_emul_root != NULL) {
  401                         vrele(epp->ep_emul_root);
  402                         epp->ep_emul_root = NULL;
  403                 }
  404                 if (epp->ep_interp != NULL) {
  405                         vrele(epp->ep_interp);
  406                         epp->ep_interp = NULL;
  407                 }
  408 
  409                 /* make sure the first "interesting" error code is saved. */
  410                 if (error == ENOEXEC)
  411                         error = newerror;
  412 
  413                 if (epp->ep_flags & EXEC_DESTR)
  414                         /* Error from "#!" code, tidied up by recursive call */
  415                         return error;
  416         }
  417 
  418         /* not found, error */
  419 
  420         /*
  421          * free any vmspace-creation commands,
  422          * and release their references
  423          */
  424         kill_vmcmds(&epp->ep_vmcmds);
  425 
  426 bad2:
  427         /*
  428          * close and release the vnode, restore the old one, free the
  429          * pathname buf, and punt.
  430          */
  431         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  432         VOP_CLOSE(vp, FREAD, l->l_cred);
  433         vput(vp);
  434         PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
  435         return error;
  436 
  437 bad1:
  438         /*
  439          * free the namei pathname buffer, and put the vnode
  440          * (which we don't yet have open).
  441          */
  442         vput(vp);                               /* was still locked */
  443         PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
  444         return error;
  445 }
  446 
  447 #ifdef __MACHINE_STACK_GROWS_UP
  448 #define STACK_PTHREADSPACE NBPG
  449 #else
  450 #define STACK_PTHREADSPACE 0
  451 #endif
  452 
  453 static int
  454 execve_fetch_element(char * const *array, size_t index, char **value)
  455 {
  456         return copyin(array + index, value, sizeof(*value));
  457 }
  458 
  459 /*
  460  * exec system call
  461  */
  462 /* ARGSUSED */
  463 int
  464 sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
  465 {
  466         /* {
  467                 syscallarg(const char *)        path;
  468                 syscallarg(char * const *)      argp;
  469                 syscallarg(char * const *)      envp;
  470         } */
  471 
  472         return execve1(l, SCARG(uap, path), SCARG(uap, argp),
  473             SCARG(uap, envp), execve_fetch_element);
  474 }
  475 
  476 int
  477 execve1(struct lwp *l, const char *path, char * const *args,
  478     char * const *envs, execve_fetch_element_t fetch_element)
  479 {
  480         int                     error;
  481         struct exec_package     pack;
  482         struct nameidata        nid;
  483         struct vattr            attr;
  484         struct proc             *p;
  485         char                    *argp;
  486         char                    *dp, *sp;
  487         long                    argc, envc;
  488         size_t                  i, len;
  489         char                    *stack;
  490         struct ps_strings       arginfo;
  491         struct ps_strings       *aip = &arginfo;
  492         struct vmspace          *vm;
  493         struct exec_fakearg     *tmpfap;
  494         int                     szsigcode;
  495         struct exec_vmcmd       *base_vcp;
  496         int                     oldlwpflags;
  497         ksiginfo_t              ksi;
  498         ksiginfoq_t             kq;
  499         char                    *pathbuf;
  500         size_t                  pathbuflen;
  501         uid_t                   uid;
  502 
  503         p = l->l_proc;
  504 
  505         /*
  506          * Check if we have exceeded our number of processes limit.
  507          * This is so that we handle the case where a root daemon
  508          * forked, ran setuid to become the desired user and is trying
  509          * to exec. The obvious place to do the reference counting check
  510          * is setuid(), but we don't do the reference counting check there
  511          * like other OS's do because then all the programs that use setuid()
  512          * must be modified to check the return code of setuid() and exit().
  513          * It is dangerous to make setuid() fail, because it fails open and
  514          * the program will continue to run as root. If we make it succeed
  515          * and return an error code, again we are not enforcing the limit.
  516          * The best place to enforce the limit is here, when the process tries
  517          * to execute a new image, because eventually the process will need
  518          * to call exec in order to do something useful.
  519          */
  520                         
  521         if ((p->p_flag & PK_SUGID) && (uid = kauth_cred_getuid(l->l_cred)) != 0
  522             && chgproccnt(uid, 0) > p->p_rlimit[RLIMIT_NPROC].rlim_cur)
  523                 return EAGAIN;
  524 
  525         oldlwpflags = l->l_flag & (LW_SA | LW_SA_UPCALL);
  526         if (l->l_flag & LW_SA) {
  527                 lwp_lock(l);
  528                 l->l_flag &= ~(LW_SA | LW_SA_UPCALL);
  529                 lwp_unlock(l);
  530         }
  531 
  532         /*
  533          * Drain existing references and forbid new ones.  The process
  534          * should be left alone until we're done here.  This is necessary
  535          * to avoid race conditions - e.g. in ptrace() - that might allow
  536          * a local user to illicitly obtain elevated privileges.
  537          */
  538         rw_enter(&p->p_reflock, RW_WRITER);
  539 
  540         base_vcp = NULL;
  541         /*
  542          * Init the namei data to point the file user's program name.
  543          * This is done here rather than in check_exec(), so that it's
  544          * possible to override this settings if any of makecmd/probe
  545          * functions call check_exec() recursively - for example,
  546          * see exec_script_makecmds().
  547          */
  548         pathbuf = PNBUF_GET();
  549         error = copyinstr(path, pathbuf, MAXPATHLEN, &pathbuflen);
  550         if (error) {
  551                 DPRINTF(("execve: copyinstr path %d", error));
  552                 goto clrflg;
  553         }
  554 
  555         NDINIT(&nid, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_SYSSPACE, pathbuf);
  556 
  557         /*
  558          * initialize the fields of the exec package.
  559          */
  560         pack.ep_name = path;
  561         pack.ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
  562         pack.ep_hdrlen = exec_maxhdrsz;
  563         pack.ep_hdrvalid = 0;
  564         pack.ep_ndp = &nid;
  565         pack.ep_emul_arg = NULL;
  566         pack.ep_vmcmds.evs_cnt = 0;
  567         pack.ep_vmcmds.evs_used = 0;
  568         pack.ep_vap = &attr;
  569         pack.ep_flags = 0;
  570         pack.ep_emul_root = NULL;
  571         pack.ep_interp = NULL;
  572         pack.ep_esch = NULL;
  573         pack.ep_pax_flags = 0;
  574 
  575         rw_enter(&exec_lock, RW_READER);
  576 
  577         /* see if we can run it. */
  578         if ((error = check_exec(l, &pack)) != 0) {
  579                 if (error != ENOENT) {
  580                         DPRINTF(("execve: check exec failed %d\n", error));
  581                 }
  582                 goto freehdr;
  583         }
  584 
  585         /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
  586 
  587         /* allocate an argument buffer */
  588         argp = pool_get(&exec_pool, PR_WAITOK);
  589         KASSERT(argp != NULL);
  590         dp = argp;
  591         argc = 0;
  592 
  593         /* copy the fake args list, if there's one, freeing it as we go */
  594         if (pack.ep_flags & EXEC_HASARGL) {
  595                 tmpfap = pack.ep_fa;
  596                 while (tmpfap->fa_arg != NULL) {
  597                         const char *cp;
  598 
  599                         cp = tmpfap->fa_arg;
  600                         while (*cp)
  601                                 *dp++ = *cp++;
  602                         *dp++ = '\0';
  603 
  604                         kmem_free(tmpfap->fa_arg, tmpfap->fa_len);
  605                         tmpfap++; argc++;
  606                 }
  607                 kmem_free(pack.ep_fa, pack.ep_fa_len);
  608                 pack.ep_flags &= ~EXEC_HASARGL;
  609         }
  610 
  611         /* Now get argv & environment */
  612         if (args == NULL) {
  613                 DPRINTF(("execve: null args\n"));
  614                 error = EINVAL;
  615                 goto bad;
  616         }
  617         /* 'i' will index the argp/envp element to be retrieved */
  618         i = 0;
  619         if (pack.ep_flags & EXEC_SKIPARG)
  620                 i++;
  621 
  622         while (1) {
  623                 len = argp + ARG_MAX - dp;
  624                 if ((error = (*fetch_element)(args, i, &sp)) != 0) {
  625                         DPRINTF(("execve: fetch_element args %d\n", error));
  626                         goto bad;
  627                 }
  628                 if (!sp)
  629                         break;
  630                 if ((error = copyinstr(sp, dp, len, &len)) != 0) {
  631                         DPRINTF(("execve: copyinstr args %d\n", error));
  632                         if (error == ENAMETOOLONG)
  633                                 error = E2BIG;
  634                         goto bad;
  635                 }
  636                 ktrexecarg(dp, len - 1);
  637                 dp += len;
  638                 i++;
  639                 argc++;
  640         }
  641 
  642         envc = 0;
  643         /* environment need not be there */
  644         if (envs != NULL) {
  645                 i = 0;
  646                 while (1) {
  647                         len = argp + ARG_MAX - dp;
  648                         if ((error = (*fetch_element)(envs, i, &sp)) != 0) {
  649                                 DPRINTF(("execve: fetch_element env %d\n", error));
  650                                 goto bad;
  651                         }
  652                         if (!sp)
  653                                 break;
  654                         if ((error = copyinstr(sp, dp, len, &len)) != 0) {
  655                                 DPRINTF(("execve: copyinstr env %d\n", error));
  656                                 if (error == ENAMETOOLONG)
  657                                         error = E2BIG;
  658                                 goto bad;
  659                         }
  660                         ktrexecenv(dp, len - 1);
  661                         dp += len;
  662                         i++;
  663                         envc++;
  664                 }
  665         }
  666 
  667         dp = (char *) ALIGN(dp);
  668 
  669         szsigcode = pack.ep_esch->es_emul->e_esigcode -
  670             pack.ep_esch->es_emul->e_sigcode;
  671 
  672 #ifdef __MACHINE_STACK_GROWS_UP
  673 /* See big comment lower down */
  674 #define RTLD_GAP        32
  675 #else
  676 #define RTLD_GAP        0
  677 #endif
  678 
  679         /* Now check if args & environ fit into new stack */
  680         if (pack.ep_flags & EXEC_32)
  681                 len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
  682                     sizeof(int) + sizeof(int) + dp + RTLD_GAP +
  683                     szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
  684                     - argp;
  685         else
  686                 len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
  687                     sizeof(char *) + sizeof(int) + dp + RTLD_GAP +
  688                     szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
  689                     - argp;
  690 
  691 #ifdef PAX_ASLR
  692         if (pax_aslr_active(l))
  693                 len += (arc4random() % PAGE_SIZE);
  694 #endif /* PAX_ASLR */
  695 
  696 #ifdef STACKLALIGN      /* arm, etc. */
  697         len = STACKALIGN(len);  /* make the stack "safely" aligned */
  698 #else
  699         len = ALIGN(len);       /* make the stack "safely" aligned */
  700 #endif
  701 
  702         if (len > pack.ep_ssize) { /* in effect, compare to initial limit */
  703                 DPRINTF(("execve: stack limit exceeded %zu\n", len));
  704                 error = ENOMEM;
  705                 goto bad;
  706         }
  707 
  708         /* Get rid of other LWPs. */
  709         if (p->p_sa || p->p_nlwps > 1) {
  710                 mutex_enter(p->p_lock);
  711                 exit_lwps(l);
  712                 mutex_exit(p->p_lock);
  713         }
  714         KDASSERT(p->p_nlwps == 1);
  715 
  716         /* Destroy any lwpctl info. */
  717         if (p->p_lwpctl != NULL)
  718                 lwp_ctl_exit();
  719 
  720         /* This is now LWP 1 */
  721         l->l_lid = 1;
  722         p->p_nlwpid = 1;
  723 
  724 #ifdef KERN_SA
  725         /* Release any SA state. */
  726         if (p->p_sa)
  727                 sa_release(p);
  728 #endif /* KERN_SA */
  729 
  730         /* Remove POSIX timers */
  731         timers_free(p, TIMERS_POSIX);
  732 
  733         /* adjust "active stack depth" for process VSZ */
  734         pack.ep_ssize = len;    /* maybe should go elsewhere, but... */
  735 
  736         /*
  737          * Do whatever is necessary to prepare the address space
  738          * for remapping.  Note that this might replace the current
  739          * vmspace with another!
  740          */
  741         uvmspace_exec(l, pack.ep_vm_minaddr, pack.ep_vm_maxaddr);
  742 
  743         /* record proc's vnode, for use by procfs and others */
  744         if (p->p_textvp)
  745                 vrele(p->p_textvp);
  746         VREF(pack.ep_vp);
  747         p->p_textvp = pack.ep_vp;
  748 
  749         /* Now map address space */
  750         vm = p->p_vmspace;
  751         vm->vm_taddr = (void *)pack.ep_taddr;
  752         vm->vm_tsize = btoc(pack.ep_tsize);
  753         vm->vm_daddr = (void*)pack.ep_daddr;
  754         vm->vm_dsize = btoc(pack.ep_dsize);
  755         vm->vm_ssize = btoc(pack.ep_ssize);
  756         vm->vm_issize = 0;
  757         vm->vm_maxsaddr = (void *)pack.ep_maxsaddr;
  758         vm->vm_minsaddr = (void *)pack.ep_minsaddr;
  759 
  760 #ifdef PAX_ASLR
  761         pax_aslr_init(l, vm);
  762 #endif /* PAX_ASLR */
  763 
  764         /* create the new process's VM space by running the vmcmds */
  765 #ifdef DIAGNOSTIC
  766         if (pack.ep_vmcmds.evs_used == 0)
  767                 panic("execve: no vmcmds");
  768 #endif
  769         for (i = 0; i < pack.ep_vmcmds.evs_used && !error; i++) {
  770                 struct exec_vmcmd *vcp;
  771 
  772                 vcp = &pack.ep_vmcmds.evs_cmds[i];
  773                 if (vcp->ev_flags & VMCMD_RELATIVE) {
  774 #ifdef DIAGNOSTIC
  775                         if (base_vcp == NULL)
  776                                 panic("execve: relative vmcmd with no base");
  777                         if (vcp->ev_flags & VMCMD_BASE)
  778                                 panic("execve: illegal base & relative vmcmd");
  779 #endif
  780                         vcp->ev_addr += base_vcp->ev_addr;
  781                 }
  782                 error = (*vcp->ev_proc)(l, vcp);
  783 #ifdef DEBUG_EXEC
  784                 if (error) {
  785                         size_t j;
  786                         struct exec_vmcmd *vp = &pack.ep_vmcmds.evs_cmds[0];
  787                         for (j = 0; j <= i; j++)
  788                                 uprintf(
  789                         "vmcmd[%zu] = %#lx/%#lx fd@%#lx prot=0%o flags=%d\n",
  790                                     j, vp[j].ev_addr, vp[j].ev_len,
  791                                     vp[j].ev_offset, vp[j].ev_prot,
  792                                     vp[j].ev_flags);
  793                 }
  794 #endif /* DEBUG_EXEC */
  795                 if (vcp->ev_flags & VMCMD_BASE)
  796                         base_vcp = vcp;
  797         }
  798 
  799         /* free the vmspace-creation commands, and release their references */
  800         kill_vmcmds(&pack.ep_vmcmds);
  801 
  802         vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
  803         VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
  804         vput(pack.ep_vp);
  805 
  806         /* if an error happened, deallocate and punt */
  807         if (error) {
  808                 DPRINTF(("execve: vmcmd %zu failed: %d\n", i - 1, error));
  809                 goto exec_abort;
  810         }
  811 
  812         /* remember information about the process */
  813         arginfo.ps_nargvstr = argc;
  814         arginfo.ps_nenvstr = envc;
  815 
  816         /* set command name & other accounting info */
  817         i = min(nid.ni_cnd.cn_namelen, MAXCOMLEN);
  818         (void)memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, i);
  819         p->p_comm[i] = '\0';
  820 
  821         dp = PNBUF_GET();
  822         /*
  823          * If the path starts with /, we don't need to do any work.
  824          * This handles the majority of the cases.
  825          * In the future perhaps we could canonicalize it?
  826          */
  827         if (pathbuf[0] == '/')
  828                 (void)strlcpy(pack.ep_path = dp, pathbuf, MAXPATHLEN);
  829 #ifdef notyet
  830         /*
  831          * Although this works most of the time [since the entry was just
  832          * entered in the cache] we don't use it because it theoretically
  833          * can fail and it is not the cleanest interface, because there
  834          * could be races. When the namei cache is re-written, this can
  835          * be changed to use the appropriate function.
  836          */
  837         else if (!(error = vnode_to_path(dp, MAXPATHLEN, p->p_textvp, l, p)))
  838                 pack.ep_path = dp;
  839 #endif
  840         else {
  841 #ifdef notyet
  842                 printf("Cannot get path for pid %d [%s] (error %d)",
  843                     (int)p->p_pid, p->p_comm, error);
  844 #endif
  845                 pack.ep_path = NULL;
  846                 PNBUF_PUT(dp);
  847         }
  848 
  849         stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
  850                 STACK_PTHREADSPACE + sizeof(struct ps_strings) + szsigcode),
  851                 len - (sizeof(struct ps_strings) + szsigcode));
  852 
  853 #ifdef __MACHINE_STACK_GROWS_UP
  854         /*
  855          * The copyargs call always copies into lower addresses
  856          * first, moving towards higher addresses, starting with
  857          * the stack pointer that we give.  When the stack grows
  858          * down, this puts argc/argv/envp very shallow on the
  859          * stack, right at the first user stack pointer.
  860          * When the stack grows up, the situation is reversed.
  861          *
  862          * Normally, this is no big deal.  But the ld_elf.so _rtld()
  863          * function expects to be called with a single pointer to
  864          * a region that has a few words it can stash values into,
  865          * followed by argc/argv/envp.  When the stack grows down,
  866          * it's easy to decrement the stack pointer a little bit to
  867          * allocate the space for these few words and pass the new
  868          * stack pointer to _rtld.  When the stack grows up, however,
  869          * a few words before argc is part of the signal trampoline, XXX
  870          * so we have a problem.
  871          *
  872          * Instead of changing how _rtld works, we take the easy way
  873          * out and steal 32 bytes before we call copyargs.
  874          * This extra space was allowed for when 'len' was calculated.
  875          */
  876         stack += RTLD_GAP;
  877 #endif /* __MACHINE_STACK_GROWS_UP */
  878 
  879         /* Now copy argc, args & environ to new stack */
  880         error = (*pack.ep_esch->es_copyargs)(l, &pack, &arginfo, &stack, argp);
  881         if (pack.ep_path) {
  882                 PNBUF_PUT(pack.ep_path);
  883                 pack.ep_path = NULL;
  884         }
  885         if (error) {
  886                 DPRINTF(("execve: copyargs failed %d\n", error));
  887                 goto exec_abort;
  888         }
  889         /* Move the stack back to original point */
  890         stack = (char *)STACK_GROW(vm->vm_minsaddr, len);
  891 
  892         /* fill process ps_strings info */
  893         p->p_psstr = (struct ps_strings *)
  894             STACK_ALLOC(STACK_GROW(vm->vm_minsaddr, STACK_PTHREADSPACE),
  895             sizeof(struct ps_strings));
  896         p->p_psargv = offsetof(struct ps_strings, ps_argvstr);
  897         p->p_psnargv = offsetof(struct ps_strings, ps_nargvstr);
  898         p->p_psenv = offsetof(struct ps_strings, ps_envstr);
  899         p->p_psnenv = offsetof(struct ps_strings, ps_nenvstr);
  900 
  901         /* copy out the process's ps_strings structure */
  902         if ((error = copyout(aip, (char *)p->p_psstr,
  903             sizeof(arginfo))) != 0) {
  904                 DPRINTF(("execve: ps_strings copyout %p->%p size %ld failed\n",
  905                        aip, (char *)p->p_psstr, (long)sizeof(arginfo)));
  906                 goto exec_abort;
  907         }
  908 
  909         fd_closeexec();         /* handle close on exec */
  910         execsigs(p);            /* reset catched signals */
  911 
  912         l->l_ctxlink = NULL;    /* reset ucontext link */
  913 
  914 
  915         p->p_acflag &= ~AFORK;
  916         mutex_enter(p->p_lock);
  917         p->p_flag |= PK_EXEC;
  918         mutex_exit(p->p_lock);
  919 
  920         /*
  921          * Stop profiling.
  922          */
  923         if ((p->p_stflag & PST_PROFIL) != 0) {
  924                 mutex_spin_enter(&p->p_stmutex);
  925                 stopprofclock(p);
  926                 mutex_spin_exit(&p->p_stmutex);
  927         }
  928 
  929         /*
  930          * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
  931          * exited and exec()/exit() are the only places it will be cleared.
  932          */
  933         if ((p->p_lflag & PL_PPWAIT) != 0) {
  934                 mutex_enter(proc_lock);
  935                 p->p_lflag &= ~PL_PPWAIT;
  936                 cv_broadcast(&p->p_pptr->p_waitcv);
  937                 mutex_exit(proc_lock);
  938         }
  939 
  940         /*
  941          * Deal with set[ug]id.  MNT_NOSUID has already been used to disable
  942          * s[ug]id.  It's OK to check for PSL_TRACED here as we have blocked
  943          * out additional references on the process for the moment.
  944          */
  945         if ((p->p_slflag & PSL_TRACED) == 0 &&
  946 
  947             (((attr.va_mode & S_ISUID) != 0 &&
  948               kauth_cred_geteuid(l->l_cred) != attr.va_uid) ||
  949 
  950              ((attr.va_mode & S_ISGID) != 0 &&
  951               kauth_cred_getegid(l->l_cred) != attr.va_gid))) {
  952                 /*
  953                  * Mark the process as SUGID before we do
  954                  * anything that might block.
  955                  */
  956                 proc_crmod_enter();
  957                 proc_crmod_leave(NULL, NULL, true);
  958 
  959                 /* Make sure file descriptors 0..2 are in use. */
  960                 if ((error = fd_checkstd()) != 0) {
  961                         DPRINTF(("execve: fdcheckstd failed %d\n", error));
  962                         goto exec_abort;
  963                 }
  964 
  965                 /*
  966                  * Copy the credential so other references don't see our
  967                  * changes.
  968                  */
  969                 l->l_cred = kauth_cred_copy(l->l_cred);
  970 #ifdef KTRACE
  971                 /*
  972                  * If the persistent trace flag isn't set, turn off.
  973                  */
  974                 if (p->p_tracep) {
  975                         mutex_enter(&ktrace_lock);
  976                         if (!(p->p_traceflag & KTRFAC_PERSISTENT))
  977                                 ktrderef(p);
  978                         mutex_exit(&ktrace_lock);
  979                 }
  980 #endif
  981                 if (attr.va_mode & S_ISUID)
  982                         kauth_cred_seteuid(l->l_cred, attr.va_uid);
  983                 if (attr.va_mode & S_ISGID)
  984                         kauth_cred_setegid(l->l_cred, attr.va_gid);
  985         } else {
  986                 if (kauth_cred_geteuid(l->l_cred) ==
  987                     kauth_cred_getuid(l->l_cred) &&
  988                     kauth_cred_getegid(l->l_cred) ==
  989                     kauth_cred_getgid(l->l_cred))
  990                         p->p_flag &= ~PK_SUGID;
  991         }
  992 
  993         /*
  994          * Copy the credential so other references don't see our changes.
  995          * Test to see if this is necessary first, since in the common case
  996          * we won't need a private reference.
  997          */
  998         if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
  999             kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
 1000                 l->l_cred = kauth_cred_copy(l->l_cred);
 1001                 kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
 1002                 kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
 1003         }
 1004 
 1005         /* Update the master credentials. */
 1006         if (l->l_cred != p->p_cred) {
 1007                 kauth_cred_t ocred;
 1008 
 1009                 kauth_cred_hold(l->l_cred);
 1010                 mutex_enter(p->p_lock);
 1011                 ocred = p->p_cred;
 1012                 p->p_cred = l->l_cred;
 1013                 mutex_exit(p->p_lock);
 1014                 kauth_cred_free(ocred);
 1015         }
 1016 
 1017 #if defined(__HAVE_RAS)
 1018         /*
 1019          * Remove all RASs from the address space.
 1020          */
 1021         ras_purgeall();
 1022 #endif
 1023 
 1024         doexechooks(p);
 1025 
 1026         /* setup new registers and do misc. setup. */
 1027         (*pack.ep_esch->es_emul->e_setregs)(l, &pack, (u_long) stack);
 1028         if (pack.ep_esch->es_setregs)
 1029                 (*pack.ep_esch->es_setregs)(l, &pack, (u_long) stack);
 1030 
 1031         /* map the process's signal trampoline code */
 1032         if (exec_sigcode_map(p, pack.ep_esch->es_emul)) {
 1033                 DPRINTF(("execve: map sigcode failed %d\n", error));
 1034                 goto exec_abort;
 1035         }
 1036 
 1037         pool_put(&exec_pool, argp);
 1038 
 1039         PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
 1040 
 1041         /* notify others that we exec'd */
 1042         KNOTE(&p->p_klist, NOTE_EXEC);
 1043 
 1044         kmem_free(pack.ep_hdr, pack.ep_hdrlen);
 1045 
 1046         /* The emulation root will usually have been found when we looked
 1047          * for the elf interpreter (or similar), if not look now. */
 1048         if (pack.ep_esch->es_emul->e_path != NULL && pack.ep_emul_root == NULL)
 1049                 emul_find_root(l, &pack);
 1050 
 1051         /* Any old emulation root got removed by fdcloseexec */
 1052         rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
 1053         p->p_cwdi->cwdi_edir = pack.ep_emul_root;
 1054         rw_exit(&p->p_cwdi->cwdi_lock);
 1055         pack.ep_emul_root = NULL;
 1056         if (pack.ep_interp != NULL)
 1057                 vrele(pack.ep_interp);
 1058 
 1059         /*
 1060          * Call emulation specific exec hook. This can setup per-process
 1061          * p->p_emuldata or do any other per-process stuff an emulation needs.
 1062          *
 1063          * If we are executing process of different emulation than the
 1064          * original forked process, call e_proc_exit() of the old emulation
 1065          * first, then e_proc_exec() of new emulation. If the emulation is
 1066          * same, the exec hook code should deallocate any old emulation
 1067          * resources held previously by this process.
 1068          */
 1069         if (p->p_emul && p->p_emul->e_proc_exit
 1070             && p->p_emul != pack.ep_esch->es_emul)
 1071                 (*p->p_emul->e_proc_exit)(p);
 1072 
 1073         /*
 1074          * Call exec hook. Emulation code may NOT store reference to anything
 1075          * from &pack.
 1076          */
 1077         if (pack.ep_esch->es_emul->e_proc_exec)
 1078                 (*pack.ep_esch->es_emul->e_proc_exec)(p, &pack);
 1079 
 1080         /* update p_emul, the old value is no longer needed */
 1081         p->p_emul = pack.ep_esch->es_emul;
 1082 
 1083         /* ...and the same for p_execsw */
 1084         p->p_execsw = pack.ep_esch;
 1085 
 1086 #ifdef __HAVE_SYSCALL_INTERN
 1087         (*p->p_emul->e_syscall_intern)(p);
 1088 #endif
 1089         ktremul();
 1090 
 1091         /* Allow new references from the debugger/procfs. */
 1092         rw_exit(&p->p_reflock);
 1093         rw_exit(&exec_lock);
 1094 
 1095         mutex_enter(proc_lock);
 1096 
 1097         if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
 1098                 KSI_INIT_EMPTY(&ksi);
 1099                 ksi.ksi_signo = SIGTRAP;
 1100                 ksi.ksi_lid = l->l_lid;
 1101                 kpsignal(p, &ksi, NULL);
 1102         }
 1103 
 1104         if (p->p_sflag & PS_STOPEXEC) {
 1105                 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
 1106                 p->p_pptr->p_nstopchild++;
 1107                 p->p_pptr->p_waited = 0;
 1108                 mutex_enter(p->p_lock);
 1109                 ksiginfo_queue_init(&kq);
 1110                 sigclearall(p, &contsigmask, &kq);
 1111                 lwp_lock(l);
 1112                 l->l_stat = LSSTOP;
 1113                 p->p_stat = SSTOP;
 1114                 p->p_nrlwps--;
 1115                 mutex_exit(p->p_lock);
 1116                 mutex_exit(proc_lock);
 1117                 mi_switch(l);
 1118                 ksiginfo_queue_drain(&kq);
 1119                 KERNEL_LOCK(l->l_biglocks, l);
 1120         } else {
 1121                 mutex_exit(proc_lock);
 1122         }
 1123 
 1124         PNBUF_PUT(pathbuf);
 1125         return (EJUSTRETURN);
 1126 
 1127  bad:
 1128         /* free the vmspace-creation commands, and release their references */
 1129         kill_vmcmds(&pack.ep_vmcmds);
 1130         /* kill any opened file descriptor, if necessary */
 1131         if (pack.ep_flags & EXEC_HASFD) {
 1132                 pack.ep_flags &= ~EXEC_HASFD;
 1133                 fd_close(pack.ep_fd);
 1134         }
 1135         /* close and put the exec'd file */
 1136         vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
 1137         VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
 1138         vput(pack.ep_vp);
 1139         PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
 1140         pool_put(&exec_pool, argp);
 1141 
 1142  freehdr:
 1143         kmem_free(pack.ep_hdr, pack.ep_hdrlen);
 1144         if (pack.ep_emul_root != NULL)
 1145                 vrele(pack.ep_emul_root);
 1146         if (pack.ep_interp != NULL)
 1147                 vrele(pack.ep_interp);
 1148 
 1149         rw_exit(&exec_lock);
 1150 
 1151  clrflg:
 1152         lwp_lock(l);
 1153         l->l_flag |= oldlwpflags;
 1154         lwp_unlock(l);
 1155         PNBUF_PUT(pathbuf);
 1156         rw_exit(&p->p_reflock);
 1157 
 1158         return error;
 1159 
 1160  exec_abort:
 1161         PNBUF_PUT(pathbuf);
 1162         rw_exit(&p->p_reflock);
 1163         rw_exit(&exec_lock);
 1164 
 1165         /*
 1166          * the old process doesn't exist anymore.  exit gracefully.
 1167          * get rid of the (new) address space we have created, if any, get rid
 1168          * of our namei data and vnode, and exit noting failure
 1169          */
 1170         uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
 1171                 VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
 1172         if (pack.ep_emul_arg)
 1173                 FREE(pack.ep_emul_arg, M_TEMP);
 1174         PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
 1175         pool_put(&exec_pool, argp);
 1176         kmem_free(pack.ep_hdr, pack.ep_hdrlen);
 1177         if (pack.ep_emul_root != NULL)
 1178                 vrele(pack.ep_emul_root);
 1179         if (pack.ep_interp != NULL)
 1180                 vrele(pack.ep_interp);
 1181 
 1182         /* Acquire the sched-state mutex (exit1() will release it). */
 1183         mutex_enter(p->p_lock);
 1184         exit1(l, W_EXITCODE(error, SIGABRT));
 1185 
 1186         /* NOTREACHED */
 1187         return 0;
 1188 }
 1189 
 1190 
 1191 int
 1192 copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
 1193     char **stackp, void *argp)
 1194 {
 1195         char    **cpp, *dp, *sp;
 1196         size_t  len;
 1197         void    *nullp;
 1198         long    argc, envc;
 1199         int     error;
 1200 
 1201         cpp = (char **)*stackp;
 1202         nullp = NULL;
 1203         argc = arginfo->ps_nargvstr;
 1204         envc = arginfo->ps_nenvstr;
 1205         if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0)
 1206                 return error;
 1207 
 1208         dp = (char *) (cpp + argc + envc + 2 + pack->ep_esch->es_arglen);
 1209         sp = argp;
 1210 
 1211         /* XXX don't copy them out, remap them! */
 1212         arginfo->ps_argvstr = cpp; /* remember location of argv for later */
 1213 
 1214         for (; --argc >= 0; sp += len, dp += len)
 1215                 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
 1216                     (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
 1217                         return error;
 1218 
 1219         if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
 1220                 return error;
 1221 
 1222         arginfo->ps_envstr = cpp; /* remember location of envp for later */
 1223 
 1224         for (; --envc >= 0; sp += len, dp += len)
 1225                 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
 1226                     (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
 1227                         return error;
 1228 
 1229         if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
 1230                 return error;
 1231 
 1232         *stackp = (char *)cpp;
 1233         return 0;
 1234 }
 1235 
 1236 #ifdef LKM
 1237 /*
 1238  * Find an emulation of given name in list of emulations.
 1239  * Needs to be called with the exec_lock held.
 1240  */
 1241 const struct emul *
 1242 emul_search(const char *name)
 1243 {
 1244         struct emul_entry *it;
 1245 
 1246         LIST_FOREACH(it, &el_head, el_list) {
 1247                 if (strcmp(name, it->el_emul->e_name) == 0)
 1248                         return it->el_emul;
 1249         }
 1250 
 1251         return NULL;
 1252 }
 1253 
 1254 /*
 1255  * Add an emulation to list, if it's not there already.
 1256  */
 1257 int
 1258 emul_register(const struct emul *emul, int ro_entry)
 1259 {
 1260         struct emul_entry       *ee;
 1261         int                     error;
 1262 
 1263         error = 0;
 1264         rw_enter(&exec_lock, RW_WRITER);
 1265 
 1266         if (emul_search(emul->e_name)) {
 1267                 error = EEXIST;
 1268                 goto out;
 1269         }
 1270 
 1271         ee = kmem_alloc(sizeof(*ee), KM_SLEEP);
 1272         ee->el_emul = emul;
 1273         ee->ro_entry = ro_entry;
 1274         LIST_INSERT_HEAD(&el_head, ee, el_list);
 1275 
 1276  out:
 1277         rw_exit(&exec_lock);
 1278         return error;
 1279 }
 1280 
 1281 /*
 1282  * Remove emulation with name 'name' from list of supported emulations.
 1283  */
 1284 int
 1285 emul_unregister(const char *name)
 1286 {
 1287         const struct proclist_desc *pd;
 1288         struct emul_entry       *it;
 1289         int                     i, error;
 1290         struct proc             *ptmp;
 1291 
 1292         error = 0;
 1293         rw_enter(&exec_lock, RW_WRITER);
 1294 
 1295         LIST_FOREACH(it, &el_head, el_list) {
 1296                 if (strcmp(it->el_emul->e_name, name) == 0)
 1297                         break;
 1298         }
 1299 
 1300         if (!it) {
 1301                 error = ENOENT;
 1302                 goto out;
 1303         }
 1304 
 1305         if (it->ro_entry) {
 1306                 error = EBUSY;
 1307                 goto out;
 1308         }
 1309 
 1310         /* test if any execw[] entry is still using this */
 1311         for(i=0; i < nexecs; i++) {
 1312                 if (execsw[i]->es_emul == it->el_emul) {
 1313                         error = EBUSY;
 1314                         goto out;
 1315                 }
 1316         }
 1317 
 1318         /*
 1319          * Test if any process is running under this emulation - since
 1320          * emul_unregister() is running quite sendomly, it's better
 1321          * to do expensive check here than to use any locking.
 1322          */
 1323         mutex_enter(proc_lock);
 1324         for (pd = proclists; pd->pd_list != NULL && !error; pd++) {
 1325                 PROCLIST_FOREACH(ptmp, pd->pd_list) {
 1326                         if (ptmp->p_emul == it->el_emul) {
 1327                                 error = EBUSY;
 1328                                 break;
 1329                         }
 1330                 }
 1331         }
 1332         mutex_exit(proc_lock);
 1333 
 1334         if (error)
 1335                 goto out;
 1336 
 1337 
 1338         /* entry is not used, remove it */
 1339         LIST_REMOVE(it, el_list);
 1340         kmem_free(it, sizeof(*it));
 1341 
 1342  out:
 1343         rw_exit(&exec_lock);
 1344         return error;
 1345 }
 1346 
 1347 /*
 1348  * Add execsw[] entry.
 1349  */
 1350 int
 1351 exec_add(struct execsw *esp, const char *e_name)
 1352 {
 1353         struct exec_entry       *it;
 1354         int                     error;
 1355 
 1356         error = 0;
 1357         rw_enter(&exec_lock, RW_WRITER);
 1358 
 1359         if (!esp->es_emul) {
 1360                 esp->es_emul = emul_search(e_name);
 1361                 if (!esp->es_emul) {
 1362                         error = ENOENT;
 1363                         goto out;
 1364                 }
 1365         }
 1366 
 1367         LIST_FOREACH(it, &ex_head, ex_list) {
 1368                 /* assume tuple (makecmds, probe_func, emulation) is unique */
 1369                 if (it->es->es_makecmds == esp->es_makecmds
 1370                     && it->es->u.elf_probe_func == esp->u.elf_probe_func
 1371                     && it->es->es_emul == esp->es_emul) {
 1372                         error = EEXIST;
 1373                         goto out;
 1374                 }
 1375         }
 1376 
 1377         /* if we got here, the entry doesn't exist yet */
 1378         it = kmem_alloc(sizeof(*it), KM_SLEEP);
 1379         it->es = esp;
 1380         LIST_INSERT_HEAD(&ex_head, it, ex_list);
 1381 
 1382         /* update execsw[] */
 1383         exec_init(0);
 1384 
 1385  out:
 1386         rw_exit(&exec_lock);
 1387         return error;
 1388 }
 1389 
 1390 /*
 1391  * Remove execsw[] entry.
 1392  */
 1393 int
 1394 exec_remove(const struct execsw *esp)
 1395 {
 1396         struct exec_entry       *it;
 1397         int                     error;
 1398 
 1399         error = 0;
 1400         rw_enter(&exec_lock, RW_WRITER);
 1401 
 1402         LIST_FOREACH(it, &ex_head, ex_list) {
 1403                 /* assume tuple (makecmds, probe_func, emulation) is unique */
 1404                 if (it->es->es_makecmds == esp->es_makecmds
 1405                     && it->es->u.elf_probe_func == esp->u.elf_probe_func
 1406                     && it->es->es_emul == esp->es_emul)
 1407                         break;
 1408         }
 1409         if (!it) {
 1410                 error = ENOENT;
 1411                 goto out;
 1412         }
 1413 
 1414         /* remove item from list and free resources */
 1415         LIST_REMOVE(it, ex_list);
 1416         kmem_free(it, sizeof(*it));
 1417 
 1418         /* update execsw[] */
 1419         exec_init(0);
 1420 
 1421  out:
 1422         rw_exit(&exec_lock);
 1423         return error;
 1424 }
 1425 
 1426 static void
 1427 link_es(struct execsw_entry **listp, const struct execsw *esp)
 1428 {
 1429         struct execsw_entry *et, *e1;
 1430 
 1431         et = (struct execsw_entry *) malloc(sizeof(struct execsw_entry),
 1432                         M_TEMP, M_WAITOK);
 1433         et->next = NULL;
 1434         et->es = esp;
 1435         if (*listp == NULL) {
 1436                 *listp = et;
 1437                 return;
 1438         }
 1439 
 1440         switch(et->es->es_prio) {
 1441         case EXECSW_PRIO_FIRST:
 1442                 /* put new entry as the first */
 1443                 et->next = *listp;
 1444                 *listp = et;
 1445                 break;
 1446         case EXECSW_PRIO_ANY:
 1447                 /* put new entry after all *_FIRST and *_ANY entries */
 1448                 for(e1 = *listp; e1->next
 1449                         && e1->next->es->es_prio != EXECSW_PRIO_LAST;
 1450                         e1 = e1->next);
 1451                 et->next = e1->next;
 1452                 e1->next = et;
 1453                 break;
 1454         case EXECSW_PRIO_LAST:
 1455                 /* put new entry as the last one */
 1456                 for(e1 = *listp; e1->next; e1 = e1->next);
 1457                 e1->next = et;
 1458                 break;
 1459         default:
 1460 #ifdef DIAGNOSTIC
 1461                 panic("execw[] entry with unknown priority %d found",
 1462                         et->es->es_prio);
 1463 #else
 1464                 free(et, M_TEMP);
 1465 #endif
 1466                 break;
 1467         }
 1468 }
 1469 
 1470 /*
 1471  * Initialize exec structures. If init_boot is true, also does necessary
 1472  * one-time initialization (it's called from main() that way).
 1473  * Once system is multiuser, this should be called with exec_lock held,
 1474  * i.e. via exec_{add|remove}().
 1475  */
 1476 int
 1477 exec_init(int init_boot)
 1478 {
 1479         const struct execsw     **new_es, * const *old_es;
 1480         struct execsw_entry     *list, *e1;
 1481         struct exec_entry       *e2;
 1482         int                     i, es_sz;
 1483 
 1484         if (init_boot) {
 1485                 /* do one-time initializations */
 1486                 rw_init(&exec_lock);
 1487                 mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
 1488                 pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
 1489                     "execargs", &exec_palloc, IPL_NONE);
 1490                 pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
 1491 
 1492                 /* register compiled-in emulations */
 1493                 for(i=0; i < nexecs_builtin; i++) {
 1494                         if (execsw_builtin[i].es_emul)
 1495                                 emul_register(execsw_builtin[i].es_emul, 1);
 1496                 }
 1497 #ifdef DIAGNOSTIC
 1498                 if (i == 0)
 1499                         panic("no emulations found in execsw_builtin[]");
 1500 #endif
 1501         }
 1502 
 1503         /*
 1504          * Build execsw[] array from builtin entries and entries added
 1505          * at runtime.
 1506          */
 1507         list = NULL;
 1508         for(i=0; i < nexecs_builtin; i++)
 1509                 link_es(&list, &execsw_builtin[i]);
 1510 
 1511         /* Add dynamically loaded entries */
 1512         es_sz = nexecs_builtin;
 1513         LIST_FOREACH(e2, &ex_head, ex_list) {
 1514                 link_es(&list, e2->es);
 1515                 es_sz++;
 1516         }
 1517 
 1518         /*
 1519          * Now that we have sorted all execw entries, create new execsw[]
 1520          * and free no longer needed memory in the process.
 1521          */
 1522         new_es = kmem_alloc(es_sz * sizeof(struct execsw *), KM_SLEEP);
 1523         for(i=0; list; i++) {
 1524                 new_es[i] = list->es;
 1525                 e1 = list->next;
 1526                 free(list, M_TEMP);
 1527                 list = e1;
 1528         }
 1529 
 1530         /*
 1531          * New execsw[] array built, now replace old execsw[] and free
 1532          * used memory.
 1533          */
 1534         old_es = execsw;
 1535         if (old_es)
 1536                 /*XXXUNCONST*/
 1537                 kmem_free(__UNCONST(old_es), nexecs * sizeof(struct execsw *));
 1538         execsw = new_es;
 1539         nexecs = es_sz;
 1540 
 1541         /*
 1542          * Figure out the maximum size of an exec header.
 1543          */
 1544         exec_maxhdrsz = 0;
 1545         for (i = 0; i < nexecs; i++) {
 1546                 if (execsw[i]->es_hdrsz > exec_maxhdrsz)
 1547                         exec_maxhdrsz = execsw[i]->es_hdrsz;
 1548         }
 1549 
 1550         return 0;
 1551 }
 1552 #endif
 1553 
 1554 #ifndef LKM
 1555 /*
 1556  * Simplified exec_init() for kernels without LKMs. Only initialize
 1557  * exec_maxhdrsz and execsw[].
 1558  */
 1559 int
 1560 exec_init(int init_boot)
 1561 {
 1562         int i;
 1563 
 1564 #ifdef DIAGNOSTIC
 1565         if (!init_boot)
 1566                 panic("exec_init(): called with init_boot == 0");
 1567 #endif
 1568 
 1569         /* do one-time initializations */
 1570         nexecs = nexecs_builtin;
 1571         execsw = kmem_alloc(nexecs * sizeof(struct execsw *), KM_SLEEP);
 1572 
 1573         rw_init(&exec_lock);
 1574         pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
 1575             "execargs", &exec_palloc, IPL_NONE);
 1576         pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
 1577 
 1578         /*
 1579          * Fill in execsw[] and figure out the maximum size of an exec header.
 1580          */
 1581         exec_maxhdrsz = 0;
 1582         for(i=0; i < nexecs; i++) {
 1583                 execsw[i] = &execsw_builtin[i];
 1584                 if (execsw_builtin[i].es_hdrsz > exec_maxhdrsz)
 1585                         exec_maxhdrsz = execsw_builtin[i].es_hdrsz;
 1586         }
 1587 
 1588         return 0;
 1589 
 1590 }
 1591 #endif /* !LKM */
 1592 
 1593 static int
 1594 exec_sigcode_map(struct proc *p, const struct emul *e)
 1595 {
 1596         vaddr_t va;
 1597         vsize_t sz;
 1598         int error;
 1599         struct uvm_object *uobj;
 1600 
 1601         sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
 1602 
 1603         if (e->e_sigobject == NULL || sz == 0) {
 1604                 return 0;
 1605         }
 1606 
 1607         /*
 1608          * If we don't have a sigobject for this emulation, create one.
 1609          *
 1610          * sigobject is an anonymous memory object (just like SYSV shared
 1611          * memory) that we keep a permanent reference to and that we map
 1612          * in all processes that need this sigcode. The creation is simple,
 1613          * we create an object, add a permanent reference to it, map it in
 1614          * kernel space, copy out the sigcode to it and unmap it.
 1615          * We map it with PROT_READ|PROT_EXEC into the process just
 1616          * the way sys_mmap() would map it.
 1617          */
 1618 
 1619         uobj = *e->e_sigobject;
 1620         if (uobj == NULL) {
 1621                 mutex_enter(&sigobject_lock);
 1622                 if ((uobj = *e->e_sigobject) == NULL) {
 1623                         uobj = uao_create(sz, 0);
 1624                         (*uobj->pgops->pgo_reference)(uobj);
 1625                         va = vm_map_min(kernel_map);
 1626                         if ((error = uvm_map(kernel_map, &va, round_page(sz),
 1627                             uobj, 0, 0,
 1628                             UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
 1629                             UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
 1630                                 printf("kernel mapping failed %d\n", error);
 1631                                 (*uobj->pgops->pgo_detach)(uobj);
 1632                                 mutex_exit(&sigobject_lock);
 1633                                 return (error);
 1634                         }
 1635                         memcpy((void *)va, e->e_sigcode, sz);
 1636 #ifdef PMAP_NEED_PROCWR
 1637                         pmap_procwr(&proc0, va, sz);
 1638 #endif
 1639                         uvm_unmap(kernel_map, va, va + round_page(sz));
 1640                         *e->e_sigobject = uobj;
 1641                 }
 1642                 mutex_exit(&sigobject_lock);
 1643         }
 1644 
 1645         /* Just a hint to uvm_map where to put it. */
 1646         va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
 1647             round_page(sz));
 1648 
 1649 #ifdef __alpha__
 1650         /*
 1651          * Tru64 puts /sbin/loader at the end of user virtual memory,
 1652          * which causes the above calculation to put the sigcode at
 1653          * an invalid address.  Put it just below the text instead.
 1654          */
 1655         if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
 1656                 va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
 1657         }
 1658 #endif
 1659 
 1660         (*uobj->pgops->pgo_reference)(uobj);
 1661         error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
 1662                         uobj, 0, 0,
 1663                         UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
 1664                                     UVM_ADV_RANDOM, 0));
 1665         if (error) {
 1666                 (*uobj->pgops->pgo_detach)(uobj);
 1667                 return (error);
 1668         }
 1669         p->p_sigctx.ps_sigcode = (void *)va;
 1670         return (0);
 1671 }

Cache object: ab9a9b47eb5ef49be007c9320c0bbb50


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.