kern_exec.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: kern_exec.c,v 1.518 2022/07/01 01:05:31 riastradh Exp $        */
    2 
    3 /*-
    4  * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Andrew Doran.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*-
   33  * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
   34  * Copyright (C) 1992 Wolfgang Solfrank.
   35  * Copyright (C) 1992 TooLs GmbH.
   36  * All rights reserved.
   37  *
   38  * Redistribution and use in source and binary forms, with or without
   39  * modification, are permitted provided that the following conditions
   40  * are met:
   41  * 1. Redistributions of source code must retain the above copyright
   42  *    notice, this list of conditions and the following disclaimer.
   43  * 2. Redistributions in binary form must reproduce the above copyright
   44  *    notice, this list of conditions and the following disclaimer in the
   45  *    documentation and/or other materials provided with the distribution.
   46  * 3. All advertising materials mentioning features or use of this software
   47  *    must display the following acknowledgement:
   48  *      This product includes software developed by TooLs GmbH.
   49  * 4. The name of TooLs GmbH may not be used to endorse or promote products
   50  *    derived from this software without specific prior written permission.
   51  *
   52  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
   53  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   54  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   55  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   56  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   57  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
   58  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
   59  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
   60  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
   61  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   62  */
   63 
   64 #include <sys/cdefs.h>
   65 __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.518 2022/07/01 01:05:31 riastradh Exp $");
   66 
   67 #include "opt_exec.h"
   68 #include "opt_execfmt.h"
   69 #include "opt_ktrace.h"
   70 #include "opt_modular.h"
   71 #include "opt_syscall_debug.h"
   72 #include "veriexec.h"
   73 #include "opt_pax.h"
   74 
   75 #include <sys/param.h>
   76 #include <sys/systm.h>
   77 #include <sys/filedesc.h>
   78 #include <sys/kernel.h>
   79 #include <sys/proc.h>
   80 #include <sys/ptrace.h>
   81 #include <sys/mount.h>
   82 #include <sys/kmem.h>
   83 #include <sys/namei.h>
   84 #include <sys/vnode.h>
   85 #include <sys/file.h>
   86 #include <sys/filedesc.h>
   87 #include <sys/acct.h>
   88 #include <sys/atomic.h>
   89 #include <sys/exec.h>
   90 #include <sys/futex.h>
   91 #include <sys/ktrace.h>
   92 #include <sys/uidinfo.h>
   93 #include <sys/wait.h>
   94 #include <sys/mman.h>
   95 #include <sys/ras.h>
   96 #include <sys/signalvar.h>
   97 #include <sys/stat.h>
   98 #include <sys/syscall.h>
   99 #include <sys/kauth.h>
  100 #include <sys/lwpctl.h>
  101 #include <sys/pax.h>
  102 #include <sys/cpu.h>
  103 #include <sys/module.h>
  104 #include <sys/syscallvar.h>
  105 #include <sys/syscallargs.h>
  106 #include <sys/vfs_syscalls.h>
  107 #if NVERIEXEC > 0
  108 #include <sys/verified_exec.h>
  109 #endif /* NVERIEXEC > 0 */
  110 #include <sys/sdt.h>
  111 #include <sys/spawn.h>
  112 #include <sys/prot.h>
  113 #include <sys/cprng.h>
  114 
  115 #include <uvm/uvm_extern.h>
  116 
  117 #include <machine/reg.h>
  118 
  119 #include <compat/common/compat_util.h>
  120 
  121 #ifndef MD_TOPDOWN_INIT
  122 #ifdef __USE_TOPDOWN_VM
  123 #define MD_TOPDOWN_INIT(epp)    (epp)->ep_flags |= EXEC_TOPDOWN_VM
  124 #else
  125 #define MD_TOPDOWN_INIT(epp)
  126 #endif
  127 #endif
  128 
  129 struct execve_data;
  130 
  131 extern int user_va0_disable;
  132 
  133 static size_t calcargs(struct execve_data * restrict, const size_t);
  134 static size_t calcstack(struct execve_data * restrict, const size_t);
  135 static int copyoutargs(struct execve_data * restrict, struct lwp *,
  136     char * const);
  137 static int copyoutpsstrs(struct execve_data * restrict, struct proc *);
  138 static int copyinargs(struct execve_data * restrict, char * const *,
  139     char * const *, execve_fetch_element_t, char **);
  140 static int copyinargstrs(struct execve_data * restrict, char * const *,
  141     execve_fetch_element_t, char **, size_t *, void (*)(const void *, size_t));
  142 static int exec_sigcode_map(struct proc *, const struct emul *);
  143 
  144 #if defined(DEBUG) && !defined(DEBUG_EXEC)
  145 #define DEBUG_EXEC
  146 #endif
  147 #ifdef DEBUG_EXEC
  148 #define DPRINTF(a) printf a
  149 #define COPYPRINTF(s, a, b) printf("%s, %d: copyout%s @%p %zu\n", __func__, \
  150     __LINE__, (s), (a), (b))
  151 static void dump_vmcmds(const struct exec_package * const, size_t, int);
  152 #define DUMPVMCMDS(p, x, e) do { dump_vmcmds((p), (x), (e)); } while (0)
  153 #else
  154 #define DPRINTF(a)
  155 #define COPYPRINTF(s, a, b)
  156 #define DUMPVMCMDS(p, x, e) do {} while (0)
  157 #endif /* DEBUG_EXEC */
  158 
  159 /*
  160  * DTrace SDT provider definitions
  161  */
  162 SDT_PROVIDER_DECLARE(proc);
  163 SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *");
  164 SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *");
  165 SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int");
  166 
  167 /*
  168  * Exec function switch:
  169  *
  170  * Note that each makecmds function is responsible for loading the
  171  * exec package with the necessary functions for any exec-type-specific
  172  * handling.
  173  *
  174  * Functions for specific exec types should be defined in their own
  175  * header file.
  176  */
  177 static const struct execsw      **execsw = NULL;
  178 static int                      nexecs;
  179 
  180 u_int   exec_maxhdrsz;   /* must not be static - used by netbsd32 */
  181 
  182 /* list of dynamically loaded execsw entries */
  183 static LIST_HEAD(execlist_head, exec_entry) ex_head =
  184     LIST_HEAD_INITIALIZER(ex_head);
  185 struct exec_entry {
  186         LIST_ENTRY(exec_entry)  ex_list;
  187         SLIST_ENTRY(exec_entry) ex_slist;
  188         const struct execsw     *ex_sw;
  189 };
  190 
  191 #ifndef __HAVE_SYSCALL_INTERN
  192 void    syscall(void);
  193 #endif
  194 
  195 /* NetBSD autoloadable syscalls */
  196 #ifdef MODULAR
  197 #include <kern/syscalls_autoload.c>
  198 #endif
  199 
  200 /* NetBSD emul struct */
  201 struct emul emul_netbsd = {
  202         .e_name =               "netbsd",
  203 #ifdef EMUL_NATIVEROOT
  204         .e_path =               EMUL_NATIVEROOT,
  205 #else
  206         .e_path =               NULL,
  207 #endif
  208 #ifndef __HAVE_MINIMAL_EMUL
  209         .e_flags =              EMUL_HAS_SYS___syscall,
  210         .e_errno =              NULL,
  211         .e_nosys =              SYS_syscall,
  212         .e_nsysent =            SYS_NSYSENT,
  213 #endif
  214 #ifdef MODULAR
  215         .e_sc_autoload =        netbsd_syscalls_autoload,
  216 #endif
  217         .e_sysent =             sysent,
  218         .e_nomodbits =          sysent_nomodbits,
  219 #ifdef SYSCALL_DEBUG
  220         .e_syscallnames =       syscallnames,
  221 #else
  222         .e_syscallnames =       NULL,
  223 #endif
  224         .e_sendsig =            sendsig,
  225         .e_trapsignal =         trapsignal,
  226         .e_sigcode =            NULL,
  227         .e_esigcode =           NULL,
  228         .e_sigobject =          NULL,
  229         .e_setregs =            setregs,
  230         .e_proc_exec =          NULL,
  231         .e_proc_fork =          NULL,
  232         .e_proc_exit =          NULL,
  233         .e_lwp_fork =           NULL,
  234         .e_lwp_exit =           NULL,
  235 #ifdef __HAVE_SYSCALL_INTERN
  236         .e_syscall_intern =     syscall_intern,
  237 #else
  238         .e_syscall =            syscall,
  239 #endif
  240         .e_sysctlovly =         NULL,
  241         .e_vm_default_addr =    uvm_default_mapaddr,
  242         .e_usertrap =           NULL,
  243         .e_ucsize =             sizeof(ucontext_t),
  244         .e_startlwp =           startlwp
  245 };
  246 
  247 /*
  248  * Exec lock. Used to control access to execsw[] structures.
  249  * This must not be static so that netbsd32 can access it, too.
  250  */
  251 krwlock_t exec_lock __cacheline_aligned;
  252 
  253 /*
  254  * Data used between a loadvm and execve part of an "exec" operation
  255  */
  256 struct execve_data {
  257         struct exec_package     ed_pack;
  258         struct pathbuf          *ed_pathbuf;
  259         struct vattr            ed_attr;
  260         struct ps_strings       ed_arginfo;
  261         char                    *ed_argp;
  262         const char              *ed_pathstring;
  263         char                    *ed_resolvedname;
  264         size_t                  ed_ps_strings_sz;
  265         int                     ed_szsigcode;
  266         size_t                  ed_argslen;
  267         long                    ed_argc;
  268         long                    ed_envc;
  269 };
  270 
  271 /*
  272  * data passed from parent lwp to child during a posix_spawn()
  273  */
  274 struct spawn_exec_data {
  275         struct execve_data      sed_exec;
  276         struct posix_spawn_file_actions
  277                                 *sed_actions;
  278         struct posix_spawnattr  *sed_attrs;
  279         struct proc             *sed_parent;
  280         kcondvar_t              sed_cv_child_ready;
  281         kmutex_t                sed_mtx_child;
  282         int                     sed_error;
  283         volatile uint32_t       sed_refcnt;
  284 };
  285 
  286 static struct vm_map *exec_map;
  287 static struct pool exec_pool;
  288 
  289 static void *
  290 exec_pool_alloc(struct pool *pp, int flags)
  291 {
  292 
  293         return (void *)uvm_km_alloc(exec_map, NCARGS, 0,
  294             UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
  295 }
  296 
  297 static void
  298 exec_pool_free(struct pool *pp, void *addr)
  299 {
  300 
  301         uvm_km_free(exec_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
  302 }
  303 
  304 static struct pool_allocator exec_palloc = {
  305         .pa_alloc = exec_pool_alloc,
  306         .pa_free = exec_pool_free,
  307         .pa_pagesz = NCARGS
  308 };
  309 
  310 static void
  311 exec_path_free(struct execve_data *data)
  312 {              
  313         pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
  314         pathbuf_destroy(data->ed_pathbuf);
  315         if (data->ed_resolvedname)
  316                 PNBUF_PUT(data->ed_resolvedname);
  317 }
  318 
  319 static int
  320 exec_resolvename(struct lwp *l, struct exec_package *epp, struct vnode *vp,
  321     char **rpath)
  322 {
  323         int error;
  324         char *p;
  325 
  326         KASSERT(rpath != NULL);
  327 
  328         *rpath = PNBUF_GET();
  329         error = vnode_to_path(*rpath, MAXPATHLEN, vp, l, l->l_proc);
  330         if (error) {
  331                 DPRINTF(("%s: can't resolve name for %s, error %d\n",
  332                     __func__, epp->ep_kname, error));
  333                 PNBUF_PUT(*rpath);
  334                 *rpath = NULL;
  335                 return error;
  336         }
  337         epp->ep_resolvedname = *rpath;
  338         if ((p = strrchr(*rpath, '/')) != NULL)
  339                 epp->ep_kname = p + 1;
  340         return 0;
  341 }
  342 
  343 
  344 /*
  345  * check exec:
  346  * given an "executable" described in the exec package's namei info,
  347  * see what we can do with it.
  348  *
  349  * ON ENTRY:
  350  *      exec package with appropriate namei info
  351  *      lwp pointer of exec'ing lwp
  352  *      NO SELF-LOCKED VNODES
  353  *
  354  * ON EXIT:
  355  *      error:  nothing held, etc.  exec header still allocated.
  356  *      ok:     filled exec package, executable's vnode (unlocked).
  357  *
  358  * EXEC SWITCH ENTRY:
  359  *      Locked vnode to check, exec package, proc.
  360  *
  361  * EXEC SWITCH EXIT:
  362  *      ok:     return 0, filled exec package, executable's vnode (unlocked).
  363  *      error:  destructive:
  364  *                      everything deallocated execept exec header.
  365  *              non-destructive:
  366  *                      error code, executable's vnode (unlocked),
  367  *                      exec header unmodified.
  368  */
  369 int
  370 /*ARGSUSED*/
  371 check_exec(struct lwp *l, struct exec_package *epp, struct pathbuf *pb,
  372     char **rpath)
  373 {
  374         int             error, i;
  375         struct vnode    *vp;
  376         size_t          resid;
  377 
  378         if (epp->ep_resolvedname) {
  379                 struct nameidata nd;
  380 
  381                 // grab the absolute pathbuf here before namei() trashes it.
  382                 pathbuf_copystring(pb, epp->ep_resolvedname, PATH_MAX);
  383                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
  384 
  385                 /* first get the vnode */
  386                 if ((error = namei(&nd)) != 0)
  387                         return error;
  388 
  389                 epp->ep_vp = vp = nd.ni_vp;
  390 #ifdef DIAGNOSTIC
  391                 /* paranoia (take this out once namei stuff stabilizes) */
  392                 memset(nd.ni_pnbuf, '~', PATH_MAX);
  393 #endif
  394         } else {
  395                 struct file *fp;
  396 
  397                 if ((error = fd_getvnode(epp->ep_xfd, &fp)) != 0)
  398                         return error;
  399                 epp->ep_vp = vp = fp->f_vnode;
  400                 vref(vp);
  401                 fd_putfile(epp->ep_xfd);
  402                 if ((error = exec_resolvename(l, epp, vp, rpath)) != 0)
  403                         return error;
  404                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  405         }
  406 
  407         /* check access and type */
  408         if (vp->v_type != VREG) {
  409                 error = EACCES;
  410                 goto bad1;
  411         }
  412         if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
  413                 goto bad1;
  414 
  415         /* get attributes */
  416         /* XXX VOP_GETATTR is the only thing that needs LK_EXCLUSIVE here */
  417         if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
  418                 goto bad1;
  419 
  420         /* Check mount point */
  421         if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
  422                 error = EACCES;
  423                 goto bad1;
  424         }
  425         if (vp->v_mount->mnt_flag & MNT_NOSUID)
  426                 epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
  427 
  428         /* try to open it */
  429         if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
  430                 goto bad1;
  431 
  432         /* now we have the file, get the exec header */
  433         error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
  434                         UIO_SYSSPACE, IO_NODELOCKED, l->l_cred, &resid, NULL);
  435         if (error)
  436                 goto bad1;
  437 
  438         /* unlock vp, since we need it unlocked from here on out. */
  439         VOP_UNLOCK(vp);
  440 
  441 #if NVERIEXEC > 0
  442         error = veriexec_verify(l, vp,
  443             epp->ep_resolvedname ? epp->ep_resolvedname : epp->ep_kname,
  444             epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
  445             NULL);
  446         if (error)
  447                 goto bad2;
  448 #endif /* NVERIEXEC > 0 */
  449 
  450 #ifdef PAX_SEGVGUARD
  451         error = pax_segvguard(l, vp, epp->ep_resolvedname, false);
  452         if (error)
  453                 goto bad2;
  454 #endif /* PAX_SEGVGUARD */
  455 
  456         epp->ep_hdrvalid = epp->ep_hdrlen - resid;
  457 
  458         /*
  459          * Set up default address space limits.  Can be overridden
  460          * by individual exec packages.
  461          */
  462         epp->ep_vm_minaddr = exec_vm_minaddr(VM_MIN_ADDRESS);
  463         epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
  464 
  465         /*
  466          * set up the vmcmds for creation of the process
  467          * address space
  468          */
  469         error = ENOEXEC;
  470         for (i = 0; i < nexecs; i++) {
  471                 int newerror;
  472 
  473                 epp->ep_esch = execsw[i];
  474                 newerror = (*execsw[i]->es_makecmds)(l, epp);
  475 
  476                 if (!newerror) {
  477                         /* Seems ok: check that entry point is not too high */
  478                         if (epp->ep_entry >= epp->ep_vm_maxaddr) {
  479 #ifdef DIAGNOSTIC
  480                                 printf("%s: rejecting %p due to "
  481                                     "too high entry address (>= %p)\n",
  482                                          __func__, (void *)epp->ep_entry,
  483                                          (void *)epp->ep_vm_maxaddr);
  484 #endif
  485                                 error = ENOEXEC;
  486                                 break;
  487                         }
  488                         /* Seems ok: check that entry point is not too low */
  489                         if (epp->ep_entry < epp->ep_vm_minaddr) {
  490 #ifdef DIAGNOSTIC
  491                                 printf("%s: rejecting %p due to "
  492                                     "too low entry address (< %p)\n",
  493                                      __func__, (void *)epp->ep_entry,
  494                                      (void *)epp->ep_vm_minaddr);
  495 #endif
  496                                 error = ENOEXEC;
  497                                 break;
  498                         }
  499 
  500                         /* check limits */
  501 #ifdef DIAGNOSTIC
  502 #define LMSG "%s: rejecting due to %s limit (%ju > %ju)\n"
  503 #endif
  504 #ifdef MAXTSIZ
  505                         if (epp->ep_tsize > MAXTSIZ) {
  506 #ifdef DIAGNOSTIC
  507                                 printf(LMSG, __func__, "text",
  508                                     (uintmax_t)epp->ep_tsize,
  509                                     (uintmax_t)MAXTSIZ);
  510 #endif
  511                                 error = ENOMEM;
  512                                 break;
  513                         }
  514 #endif
  515                         vsize_t dlimit =
  516                             (vsize_t)l->l_proc->p_rlimit[RLIMIT_DATA].rlim_cur;
  517                         if (epp->ep_dsize > dlimit) {
  518 #ifdef DIAGNOSTIC
  519                                 printf(LMSG, __func__, "data",
  520                                     (uintmax_t)epp->ep_dsize,
  521                                     (uintmax_t)dlimit);
  522 #endif
  523                                 error = ENOMEM;
  524                                 break;
  525                         }
  526                         return 0;
  527                 }
  528 
  529                 /*
  530                  * Reset all the fields that may have been modified by the
  531                  * loader.
  532                  */
  533                 KASSERT(epp->ep_emul_arg == NULL);
  534                 if (epp->ep_emul_root != NULL) {
  535                         vrele(epp->ep_emul_root);
  536                         epp->ep_emul_root = NULL;
  537                 }
  538                 if (epp->ep_interp != NULL) {
  539                         vrele(epp->ep_interp);
  540                         epp->ep_interp = NULL;
  541                 }
  542                 epp->ep_pax_flags = 0;
  543 
  544                 /* make sure the first "interesting" error code is saved. */
  545                 if (error == ENOEXEC)
  546                         error = newerror;
  547 
  548                 if (epp->ep_flags & EXEC_DESTR)
  549                         /* Error from "#!" code, tidied up by recursive call */
  550                         return error;
  551         }
  552 
  553         /* not found, error */
  554 
  555         /*
  556          * free any vmspace-creation commands,
  557          * and release their references
  558          */
  559         kill_vmcmds(&epp->ep_vmcmds);
  560 
  561 #if NVERIEXEC > 0 || defined(PAX_SEGVGUARD)
  562 bad2:
  563 #endif
  564         /*
  565          * close and release the vnode, restore the old one, free the
  566          * pathname buf, and punt.
  567          */
  568         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  569         VOP_CLOSE(vp, FREAD, l->l_cred);
  570         vput(vp);
  571         return error;
  572 
  573 bad1:
  574         /*
  575          * free the namei pathname buffer, and put the vnode
  576          * (which we don't yet have open).
  577          */
  578         vput(vp);                               /* was still locked */
  579         return error;
  580 }
  581 
  582 #ifdef __MACHINE_STACK_GROWS_UP
  583 #define STACK_PTHREADSPACE NBPG
  584 #else
  585 #define STACK_PTHREADSPACE 0
  586 #endif
  587 
  588 static int
  589 execve_fetch_element(char * const *array, size_t index, char **value)
  590 {
  591         return copyin(array + index, value, sizeof(*value));
  592 }
  593 
  594 /*
  595  * exec system call
  596  */
  597 int
  598 sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
  599 {
  600         /* {
  601                 syscallarg(const char *)        path;
  602                 syscallarg(char * const *)      argp;
  603                 syscallarg(char * const *)      envp;
  604         } */
  605 
  606         return execve1(l, true, SCARG(uap, path), -1, SCARG(uap, argp),
  607             SCARG(uap, envp), execve_fetch_element);
  608 }
  609 
  610 int
  611 sys_fexecve(struct lwp *l, const struct sys_fexecve_args *uap,
  612     register_t *retval)
  613 {
  614         /* {
  615                 syscallarg(int)                 fd;
  616                 syscallarg(char * const *)      argp;
  617                 syscallarg(char * const *)      envp;
  618         } */
  619 
  620         return execve1(l, false, NULL, SCARG(uap, fd), SCARG(uap, argp),
  621             SCARG(uap, envp), execve_fetch_element);
  622 }
  623 
  624 /*
  625  * Load modules to try and execute an image that we do not understand.
  626  * If no execsw entries are present, we load those likely to be needed
  627  * in order to run native images only.  Otherwise, we autoload all
  628  * possible modules that could let us run the binary.  XXX lame
  629  */
  630 static void
  631 exec_autoload(void)
  632 {
  633 #ifdef MODULAR
  634         static const char * const native[] = {
  635                 "exec_elf32",
  636                 "exec_elf64",
  637                 "exec_script",
  638                 NULL
  639         };
  640         static const char * const compat[] = {
  641                 "exec_elf32",
  642                 "exec_elf64",
  643                 "exec_script",
  644                 "exec_aout",
  645                 "exec_coff",
  646                 "exec_ecoff",
  647                 "compat_aoutm68k",
  648                 "compat_netbsd32",
  649 #if 0
  650                 "compat_linux",
  651                 "compat_linux32",
  652 #endif
  653                 "compat_sunos",
  654                 "compat_sunos32",
  655                 "compat_ultrix",
  656                 NULL
  657         };
  658         char const * const *list;
  659         int i;
  660 
  661         list = nexecs == 0 ? native : compat;
  662         for (i = 0; list[i] != NULL; i++) {
  663                 if (module_autoload(list[i], MODULE_CLASS_EXEC) != 0) {
  664                         continue;
  665                 }
  666                 yield();
  667         }
  668 #endif
  669 }
  670 
  671 /*
  672  * Copy the user or kernel supplied upath to the allocated pathbuffer pbp
  673  * making it absolute in the process, by prepending the current working
  674  * directory if it is not. If offs is supplied it will contain the offset
  675  * where the original supplied copy of upath starts.
  676  */
  677 int
  678 exec_makepathbuf(struct lwp *l, const char *upath, enum uio_seg seg,
  679     struct pathbuf **pbp, size_t *offs)
  680 {
  681         char *path, *bp;
  682         size_t len, tlen;
  683         int error;
  684         struct cwdinfo *cwdi;
  685 
  686         path = PNBUF_GET();
  687         if (seg == UIO_SYSSPACE) {
  688                 error = copystr(upath, path, MAXPATHLEN, &len);
  689         } else {
  690                 error = copyinstr(upath, path, MAXPATHLEN, &len);
  691         }
  692         if (error)
  693                 goto err;
  694 
  695         if (path[0] == '/') {
  696                 if (offs)
  697                         *offs = 0;
  698                 goto out;
  699         }
  700 
  701         len++;
  702         if (len + 1 >= MAXPATHLEN) {
  703                 error = ENAMETOOLONG;
  704                 goto err;
  705         }
  706         bp = path + MAXPATHLEN - len;
  707         memmove(bp, path, len);
  708         *(--bp) = '/';
  709 
  710         cwdi = l->l_proc->p_cwdi;
  711         rw_enter(&cwdi->cwdi_lock, RW_READER);
  712         error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path, MAXPATHLEN / 2,
  713             GETCWD_CHECK_ACCESS, l);
  714         rw_exit(&cwdi->cwdi_lock);
  715 
  716         if (error)
  717                 goto err;
  718         tlen = path + MAXPATHLEN - bp;
  719 
  720         memmove(path, bp, tlen);
  721         path[tlen - 1] = '\0';
  722         if (offs)
  723                 *offs = tlen - len;
  724 out:
  725         *pbp = pathbuf_assimilate(path);
  726         return 0;
  727 err:
  728         PNBUF_PUT(path);
  729         return error;
  730 }
  731 
  732 vaddr_t
  733 exec_vm_minaddr(vaddr_t va_min)
  734 {
  735         /*
  736          * Increase va_min if we don't want NULL to be mappable by the
  737          * process.
  738          */
  739 #define VM_MIN_GUARD    PAGE_SIZE
  740         if (user_va0_disable && (va_min < VM_MIN_GUARD))
  741                 return VM_MIN_GUARD;
  742         return va_min;
  743 }
  744 
  745 static int
  746 execve_loadvm(struct lwp *l, bool has_path, const char *path, int fd,
  747         char * const *args, char * const *envs,
  748         execve_fetch_element_t fetch_element,
  749         struct execve_data * restrict data)
  750 {
  751         struct exec_package     * const epp = &data->ed_pack;
  752         int                     error;
  753         struct proc             *p;
  754         char                    *dp;
  755         u_int                   modgen;
  756 
  757         KASSERT(data != NULL);
  758 
  759         p = l->l_proc;
  760         modgen = 0;
  761 
  762         SDT_PROBE(proc, kernel, , exec, path, 0, 0, 0, 0);
  763 
  764         /*
  765          * Check if we have exceeded our number of processes limit.
  766          * This is so that we handle the case where a root daemon
  767          * forked, ran setuid to become the desired user and is trying
  768          * to exec. The obvious place to do the reference counting check
  769          * is setuid(), but we don't do the reference counting check there
  770          * like other OS's do because then all the programs that use setuid()
  771          * must be modified to check the return code of setuid() and exit().
  772          * It is dangerous to make setuid() fail, because it fails open and
  773          * the program will continue to run as root. If we make it succeed
  774          * and return an error code, again we are not enforcing the limit.
  775          * The best place to enforce the limit is here, when the process tries
  776          * to execute a new image, because eventually the process will need
  777          * to call exec in order to do something useful.
  778          */
  779  retry:
  780         if (p->p_flag & PK_SUGID) {
  781                 if (kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
  782                      p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
  783                      &p->p_rlimit[RLIMIT_NPROC],
  784                      KAUTH_ARG(RLIMIT_NPROC)) != 0 &&
  785                     chgproccnt(kauth_cred_getuid(l->l_cred), 0) >
  786                      p->p_rlimit[RLIMIT_NPROC].rlim_cur)
  787                 return EAGAIN;
  788         }
  789 
  790         /*
  791          * Drain existing references and forbid new ones.  The process
  792          * should be left alone until we're done here.  This is necessary
  793          * to avoid race conditions - e.g. in ptrace() - that might allow
  794          * a local user to illicitly obtain elevated privileges.
  795          */
  796         rw_enter(&p->p_reflock, RW_WRITER);
  797 
  798         if (has_path) {
  799                 size_t  offs;
  800                 /*
  801                  * Init the namei data to point the file user's program name.
  802                  * This is done here rather than in check_exec(), so that it's
  803                  * possible to override this settings if any of makecmd/probe
  804                  * functions call check_exec() recursively - for example,
  805                  * see exec_script_makecmds().
  806                  */
  807                 if ((error = exec_makepathbuf(l, path, UIO_USERSPACE,
  808                     &data->ed_pathbuf, &offs)) != 0)
  809                         goto clrflg;
  810                 data->ed_pathstring = pathbuf_stringcopy_get(data->ed_pathbuf);
  811                 epp->ep_kname = data->ed_pathstring + offs;
  812                 data->ed_resolvedname = PNBUF_GET();
  813                 epp->ep_resolvedname = data->ed_resolvedname;
  814                 epp->ep_xfd = -1;
  815         } else {
  816                 data->ed_pathbuf = pathbuf_assimilate(strcpy(PNBUF_GET(), "/"));
  817                 data->ed_pathstring = pathbuf_stringcopy_get(data->ed_pathbuf);
  818                 epp->ep_kname = "*fexecve*";
  819                 data->ed_resolvedname = NULL;
  820                 epp->ep_resolvedname = NULL;
  821                 epp->ep_xfd = fd;
  822         }
  823 
  824 
  825         /*
  826          * initialize the fields of the exec package.
  827          */
  828         epp->ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
  829         epp->ep_hdrlen = exec_maxhdrsz;
  830         epp->ep_hdrvalid = 0;
  831         epp->ep_emul_arg = NULL;
  832         epp->ep_emul_arg_free = NULL;
  833         memset(&epp->ep_vmcmds, 0, sizeof(epp->ep_vmcmds));
  834         epp->ep_vap = &data->ed_attr;
  835         epp->ep_flags = (p->p_flag & PK_32) ? EXEC_FROM32 : 0;
  836         MD_TOPDOWN_INIT(epp);
  837         epp->ep_emul_root = NULL;
  838         epp->ep_interp = NULL;
  839         epp->ep_esch = NULL;
  840         epp->ep_pax_flags = 0;
  841         memset(epp->ep_machine_arch, 0, sizeof(epp->ep_machine_arch));
  842 
  843         rw_enter(&exec_lock, RW_READER);
  844 
  845         /* see if we can run it. */
  846         if ((error = check_exec(l, epp, data->ed_pathbuf,
  847             &data->ed_resolvedname)) != 0) {
  848                 if (error != ENOENT && error != EACCES && error != ENOEXEC) {
  849                         DPRINTF(("%s: check exec failed for %s, error %d\n",
  850                             __func__, epp->ep_kname, error));
  851                 }
  852                 goto freehdr;
  853         }
  854 
  855         /* allocate an argument buffer */
  856         data->ed_argp = pool_get(&exec_pool, PR_WAITOK);
  857         KASSERT(data->ed_argp != NULL);
  858         dp = data->ed_argp;
  859 
  860         if ((error = copyinargs(data, args, envs, fetch_element, &dp)) != 0) {
  861                 goto bad;
  862         }
  863 
  864         /*
  865          * Calculate the new stack size.
  866          */
  867 
  868 #ifdef __MACHINE_STACK_GROWS_UP
  869 /*
  870  * copyargs() fills argc/argv/envp from the lower address even on
  871  * __MACHINE_STACK_GROWS_UP machines.  Reserve a few words just below the SP
  872  * so that _rtld() use it.
  873  */
  874 #define RTLD_GAP        32
  875 #else
  876 #define RTLD_GAP        0
  877 #endif
  878 
  879         const size_t argenvstrlen = (char *)ALIGN(dp) - data->ed_argp;
  880 
  881         data->ed_argslen = calcargs(data, argenvstrlen);
  882 
  883         const size_t len = calcstack(data, pax_aslr_stack_gap(epp) + RTLD_GAP);
  884 
  885         if (len > epp->ep_ssize) {
  886                 /* in effect, compare to initial limit */
  887                 DPRINTF(("%s: stack limit exceeded %zu\n", __func__, len));
  888                 error = ENOMEM;
  889                 goto bad;
  890         }
  891         /* adjust "active stack depth" for process VSZ */
  892         epp->ep_ssize = len;
  893 
  894         return 0;
  895 
  896  bad:
  897         /* free the vmspace-creation commands, and release their references */
  898         kill_vmcmds(&epp->ep_vmcmds);
  899         /* kill any opened file descriptor, if necessary */
  900         if (epp->ep_flags & EXEC_HASFD) {
  901                 epp->ep_flags &= ~EXEC_HASFD;
  902                 fd_close(epp->ep_fd);
  903         }
  904         /* close and put the exec'd file */
  905         vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
  906         VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred);
  907         vput(epp->ep_vp);
  908         pool_put(&exec_pool, data->ed_argp);
  909 
  910  freehdr:
  911         kmem_free(epp->ep_hdr, epp->ep_hdrlen);
  912         if (epp->ep_emul_root != NULL)
  913                 vrele(epp->ep_emul_root);
  914         if (epp->ep_interp != NULL)
  915                 vrele(epp->ep_interp);
  916 
  917         rw_exit(&exec_lock);
  918 
  919         exec_path_free(data);
  920 
  921  clrflg:
  922         rw_exit(&p->p_reflock);
  923 
  924         if (modgen != module_gen && error == ENOEXEC) {
  925                 modgen = module_gen;
  926                 exec_autoload();
  927                 goto retry;
  928         }
  929 
  930         SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0);
  931         return error;
  932 }
  933 
  934 static int
  935 execve_dovmcmds(struct lwp *l, struct execve_data * restrict data)
  936 {
  937         struct exec_package     * const epp = &data->ed_pack;
  938         struct proc             *p = l->l_proc;
  939         struct exec_vmcmd       *base_vcp;
  940         int                     error = 0;
  941         size_t                  i;
  942 
  943         /* record proc's vnode, for use by procfs and others */
  944         if (p->p_textvp)
  945                 vrele(p->p_textvp);
  946         vref(epp->ep_vp);
  947         p->p_textvp = epp->ep_vp;
  948 
  949         /* create the new process's VM space by running the vmcmds */
  950         KASSERTMSG(epp->ep_vmcmds.evs_used != 0, "%s: no vmcmds", __func__);
  951 
  952 #ifdef TRACE_EXEC
  953         DUMPVMCMDS(epp, 0, 0);
  954 #endif
  955 
  956         base_vcp = NULL;
  957 
  958         for (i = 0; i < epp->ep_vmcmds.evs_used && !error; i++) {
  959                 struct exec_vmcmd *vcp;
  960 
  961                 vcp = &epp->ep_vmcmds.evs_cmds[i];
  962                 if (vcp->ev_flags & VMCMD_RELATIVE) {
  963                         KASSERTMSG(base_vcp != NULL,
  964                             "%s: relative vmcmd with no base", __func__);
  965                         KASSERTMSG((vcp->ev_flags & VMCMD_BASE) == 0,
  966                             "%s: illegal base & relative vmcmd", __func__);
  967                         vcp->ev_addr += base_vcp->ev_addr;
  968                 }
  969                 error = (*vcp->ev_proc)(l, vcp);
  970                 if (error)
  971                         DUMPVMCMDS(epp, i, error);
  972                 if (vcp->ev_flags & VMCMD_BASE)
  973                         base_vcp = vcp;
  974         }
  975 
  976         /* free the vmspace-creation commands, and release their references */
  977         kill_vmcmds(&epp->ep_vmcmds);
  978 
  979         vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
  980         VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred);
  981         vput(epp->ep_vp);
  982 
  983         /* if an error happened, deallocate and punt */
  984         if (error != 0) {
  985                 DPRINTF(("%s: vmcmd %zu failed: %d\n", __func__, i - 1, error));
  986         }
  987         return error;
  988 }
  989 
  990 static void
  991 execve_free_data(struct execve_data *data)
  992 {
  993         struct exec_package     * const epp = &data->ed_pack;
  994 
  995         /* free the vmspace-creation commands, and release their references */
  996         kill_vmcmds(&epp->ep_vmcmds);
  997         /* kill any opened file descriptor, if necessary */
  998         if (epp->ep_flags & EXEC_HASFD) {
  999                 epp->ep_flags &= ~EXEC_HASFD;
 1000                 fd_close(epp->ep_fd);
 1001         }
 1002 
 1003         /* close and put the exec'd file */
 1004         vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
 1005         VOP_CLOSE(epp->ep_vp, FREAD, curlwp->l_cred);
 1006         vput(epp->ep_vp);
 1007         pool_put(&exec_pool, data->ed_argp);
 1008 
 1009         kmem_free(epp->ep_hdr, epp->ep_hdrlen);
 1010         if (epp->ep_emul_root != NULL)
 1011                 vrele(epp->ep_emul_root);
 1012         if (epp->ep_interp != NULL)
 1013                 vrele(epp->ep_interp);
 1014 
 1015         exec_path_free(data);
 1016 }
 1017 
 1018 static void
 1019 pathexec(struct proc *p, const char *resolvedname)
 1020 {
 1021         /* set command name & other accounting info */
 1022         const char *cmdname;
 1023 
 1024         if (resolvedname == NULL) {
 1025                 cmdname = "*fexecve*";
 1026                 resolvedname = "/";
 1027         } else {
 1028                 cmdname = strrchr(resolvedname, '/') + 1;
 1029         }
 1030         KASSERTMSG(resolvedname[0] == '/', "bad resolvedname `%s'",
 1031             resolvedname);
 1032 
 1033         strlcpy(p->p_comm, cmdname, sizeof(p->p_comm));
 1034 
 1035         kmem_strfree(p->p_path);
 1036         p->p_path = kmem_strdupsize(resolvedname, NULL, KM_SLEEP);
 1037 }
 1038 
 1039 /* XXX elsewhere */
 1040 static int
 1041 credexec(struct lwp *l, struct execve_data *data)
 1042 {
 1043         struct proc *p = l->l_proc;
 1044         struct vattr *attr = &data->ed_attr;
 1045         int error;
 1046 
 1047         /*
 1048          * Deal with set[ug]id.  MNT_NOSUID has already been used to disable
 1049          * s[ug]id.  It's OK to check for PSL_TRACED here as we have blocked
 1050          * out additional references on the process for the moment.
 1051          */
 1052         if ((p->p_slflag & PSL_TRACED) == 0 &&
 1053 
 1054             (((attr->va_mode & S_ISUID) != 0 &&
 1055               kauth_cred_geteuid(l->l_cred) != attr->va_uid) ||
 1056 
 1057              ((attr->va_mode & S_ISGID) != 0 &&
 1058               kauth_cred_getegid(l->l_cred) != attr->va_gid))) {
 1059                 /*
 1060                  * Mark the process as SUGID before we do
 1061                  * anything that might block.
 1062                  */
 1063                 proc_crmod_enter();
 1064                 proc_crmod_leave(NULL, NULL, true);
 1065                 if (data->ed_argc == 0) {
 1066                         DPRINTF((
 1067                             "%s: not executing set[ug]id binary with no args\n",
 1068                             __func__));
 1069                         return EINVAL;
 1070                 }
 1071 
 1072                 /* Make sure file descriptors 0..2 are in use. */
 1073                 if ((error = fd_checkstd()) != 0) {
 1074                         DPRINTF(("%s: fdcheckstd failed %d\n",
 1075                             __func__, error));
 1076                         return error;
 1077                 }
 1078 
 1079                 /*
 1080                  * Copy the credential so other references don't see our
 1081                  * changes.
 1082                  */
 1083                 l->l_cred = kauth_cred_copy(l->l_cred);
 1084 #ifdef KTRACE
 1085                 /*
 1086                  * If the persistent trace flag isn't set, turn off.
 1087                  */
 1088                 if (p->p_tracep) {
 1089                         mutex_enter(&ktrace_lock);
 1090                         if (!(p->p_traceflag & KTRFAC_PERSISTENT))
 1091                                 ktrderef(p);
 1092                         mutex_exit(&ktrace_lock);
 1093                 }
 1094 #endif
 1095                 if (attr->va_mode & S_ISUID)
 1096                         kauth_cred_seteuid(l->l_cred, attr->va_uid);
 1097                 if (attr->va_mode & S_ISGID)
 1098                         kauth_cred_setegid(l->l_cred, attr->va_gid);
 1099         } else {
 1100                 if (kauth_cred_geteuid(l->l_cred) ==
 1101                     kauth_cred_getuid(l->l_cred) &&
 1102                     kauth_cred_getegid(l->l_cred) ==
 1103                     kauth_cred_getgid(l->l_cred))
 1104                         p->p_flag &= ~PK_SUGID;
 1105         }
 1106 
 1107         /*
 1108          * Copy the credential so other references don't see our changes.
 1109          * Test to see if this is necessary first, since in the common case
 1110          * we won't need a private reference.
 1111          */
 1112         if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
 1113             kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
 1114                 l->l_cred = kauth_cred_copy(l->l_cred);
 1115                 kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
 1116                 kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
 1117         }
 1118 
 1119         /* Update the master credentials. */
 1120         if (l->l_cred != p->p_cred) {
 1121                 kauth_cred_t ocred;
 1122 
 1123                 kauth_cred_hold(l->l_cred);
 1124                 mutex_enter(p->p_lock);
 1125                 ocred = p->p_cred;
 1126                 p->p_cred = l->l_cred;
 1127                 mutex_exit(p->p_lock);
 1128                 kauth_cred_free(ocred);
 1129         }
 1130 
 1131         return 0;
 1132 }
 1133 
 1134 static void
 1135 emulexec(struct lwp *l, struct exec_package *epp)
 1136 {
 1137         struct proc             *p = l->l_proc;
 1138 
 1139         /* The emulation root will usually have been found when we looked
 1140          * for the elf interpreter (or similar), if not look now. */
 1141         if (epp->ep_esch->es_emul->e_path != NULL &&
 1142             epp->ep_emul_root == NULL)
 1143                 emul_find_root(l, epp);
 1144 
 1145         /* Any old emulation root got removed by fdcloseexec */
 1146         rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
 1147         p->p_cwdi->cwdi_edir = epp->ep_emul_root;
 1148         rw_exit(&p->p_cwdi->cwdi_lock);
 1149         epp->ep_emul_root = NULL;
 1150         if (epp->ep_interp != NULL)
 1151                 vrele(epp->ep_interp);
 1152 
 1153         /*
 1154          * Call emulation specific exec hook. This can setup per-process
 1155          * p->p_emuldata or do any other per-process stuff an emulation needs.
 1156          *
 1157          * If we are executing process of different emulation than the
 1158          * original forked process, call e_proc_exit() of the old emulation
 1159          * first, then e_proc_exec() of new emulation. If the emulation is
 1160          * same, the exec hook code should deallocate any old emulation
 1161          * resources held previously by this process.
 1162          */
 1163         if (p->p_emul && p->p_emul->e_proc_exit
 1164             && p->p_emul != epp->ep_esch->es_emul)
 1165                 (*p->p_emul->e_proc_exit)(p);
 1166 
 1167         /*
 1168          * Call exec hook. Emulation code may NOT store reference to anything
 1169          * from &pack.
 1170          */
 1171         if (epp->ep_esch->es_emul->e_proc_exec)
 1172                 (*epp->ep_esch->es_emul->e_proc_exec)(p, epp);
 1173 
 1174         /* update p_emul, the old value is no longer needed */
 1175         p->p_emul = epp->ep_esch->es_emul;
 1176 
 1177         /* ...and the same for p_execsw */
 1178         p->p_execsw = epp->ep_esch;
 1179 
 1180 #ifdef __HAVE_SYSCALL_INTERN
 1181         (*p->p_emul->e_syscall_intern)(p);
 1182 #endif
 1183         ktremul();
 1184 }
 1185 
 1186 static int
 1187 execve_runproc(struct lwp *l, struct execve_data * restrict data,
 1188         bool no_local_exec_lock, bool is_spawn)
 1189 {
 1190         struct exec_package     * const epp = &data->ed_pack;
 1191         int error = 0;
 1192         struct proc             *p;
 1193         struct vmspace          *vm;
 1194 
 1195         /*
 1196          * In case of a posix_spawn operation, the child doing the exec
 1197          * might not hold the reader lock on exec_lock, but the parent
 1198          * will do this instead.
 1199          */
 1200         KASSERT(no_local_exec_lock || rw_lock_held(&exec_lock));
 1201         KASSERT(!no_local_exec_lock || is_spawn);
 1202         KASSERT(data != NULL);
 1203 
 1204         p = l->l_proc;
 1205 
 1206         /* Get rid of other LWPs. */
 1207         if (p->p_nlwps > 1) {
 1208                 mutex_enter(p->p_lock);
 1209                 exit_lwps(l);
 1210                 mutex_exit(p->p_lock);
 1211         }
 1212         KDASSERT(p->p_nlwps == 1);
 1213 
 1214         /*
 1215          * All of the other LWPs got rid of their robust futexes
 1216          * when they exited above, but we might still have some
 1217          * to dispose of.  Do that now.
 1218          */
 1219         if (__predict_false(l->l_robust_head != 0)) {
 1220                 futex_release_all_lwp(l);
 1221                 /*
 1222                  * Since this LWP will live on with a different
 1223                  * program image, we need to clear the robust
 1224                  * futex list pointer here.
 1225                  */
 1226                 l->l_robust_head = 0;
 1227         }
 1228 
 1229         /* Destroy any lwpctl info. */
 1230         if (p->p_lwpctl != NULL)
 1231                 lwp_ctl_exit();
 1232 
 1233         /* Remove POSIX timers */
 1234         ptimers_free(p, TIMERS_POSIX);
 1235 
 1236         /* Set the PaX flags. */
 1237         pax_set_flags(epp, p);
 1238 
 1239         /*
 1240          * Do whatever is necessary to prepare the address space
 1241          * for remapping.  Note that this might replace the current
 1242          * vmspace with another!
 1243          *
 1244          * vfork(): do not touch any user space data in the new child
 1245          * until we have awoken the parent below, or it will defeat
 1246          * lazy pmap switching (on x86).
 1247          */
 1248         if (is_spawn)
 1249                 uvmspace_spawn(l, epp->ep_vm_minaddr,
 1250                     epp->ep_vm_maxaddr,
 1251                     epp->ep_flags & EXEC_TOPDOWN_VM);
 1252         else
 1253                 uvmspace_exec(l, epp->ep_vm_minaddr,
 1254                     epp->ep_vm_maxaddr,
 1255                     epp->ep_flags & EXEC_TOPDOWN_VM);
 1256         vm = p->p_vmspace;
 1257 
 1258         vm->vm_taddr = (void *)epp->ep_taddr;
 1259         vm->vm_tsize = btoc(epp->ep_tsize);
 1260         vm->vm_daddr = (void*)epp->ep_daddr;
 1261         vm->vm_dsize = btoc(epp->ep_dsize);
 1262         vm->vm_ssize = btoc(epp->ep_ssize);
 1263         vm->vm_issize = 0;
 1264         vm->vm_maxsaddr = (void *)epp->ep_maxsaddr;
 1265         vm->vm_minsaddr = (void *)epp->ep_minsaddr;
 1266 
 1267         pax_aslr_init_vm(l, vm, epp);
 1268 
 1269         cwdexec(p);
 1270         fd_closeexec();         /* handle close on exec */
 1271 
 1272         if (__predict_false(ktrace_on))
 1273                 fd_ktrexecfd();
 1274 
 1275         execsigs(p);            /* reset caught signals */
 1276 
 1277         mutex_enter(p->p_lock);
 1278         l->l_ctxlink = NULL;    /* reset ucontext link */
 1279         p->p_acflag &= ~AFORK;
 1280         p->p_flag |= PK_EXEC;
 1281         mutex_exit(p->p_lock);
 1282 
 1283         error = credexec(l, data);
 1284         if (error)
 1285                 goto exec_abort;
 1286 
 1287 #if defined(__HAVE_RAS)
 1288         /*
 1289          * Remove all RASs from the address space.
 1290          */
 1291         ras_purgeall();
 1292 #endif
 1293 
 1294         /*
 1295          * Stop profiling.
 1296          */
 1297         if ((p->p_stflag & PST_PROFIL) != 0) {
 1298                 mutex_spin_enter(&p->p_stmutex);
 1299                 stopprofclock(p);
 1300                 mutex_spin_exit(&p->p_stmutex);
 1301         }
 1302 
 1303         /*
 1304          * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
 1305          * exited and exec()/exit() are the only places it will be cleared.
 1306          *
 1307          * Once the parent has been awoken, curlwp may teleport to a new CPU
 1308          * in sched_vforkexec(), and it's then OK to start messing with user
 1309          * data.  See comment above.
 1310          */
 1311         if ((p->p_lflag & PL_PPWAIT) != 0) {
 1312                 bool samecpu;
 1313                 lwp_t *lp;
 1314 
 1315                 mutex_enter(&proc_lock);
 1316                 lp = p->p_vforklwp;
 1317                 p->p_vforklwp = NULL;
 1318                 l->l_lwpctl = NULL; /* was on loan from blocked parent */
 1319                 cv_broadcast(&lp->l_waitcv);
 1320 
 1321                 /* Clear flags after cv_broadcast() (scheduler needs them). */
 1322                 p->p_lflag &= ~PL_PPWAIT;
 1323                 lp->l_vforkwaiting = false;
 1324 
 1325                 /* If parent is still on same CPU, teleport curlwp elsewhere. */
 1326                 samecpu = (lp->l_cpu == curlwp->l_cpu);
 1327                 mutex_exit(&proc_lock);
 1328 
 1329                 /* Give the parent its CPU back - find a new home. */
 1330                 KASSERT(!is_spawn);
 1331                 sched_vforkexec(l, samecpu);
 1332         }
 1333 
 1334         /* Now map address space. */
 1335         error = execve_dovmcmds(l, data);
 1336         if (error != 0)
 1337                 goto exec_abort;
 1338 
 1339         pathexec(p, epp->ep_resolvedname);
 1340 
 1341         char * const newstack = STACK_GROW(vm->vm_minsaddr, epp->ep_ssize);
 1342 
 1343         error = copyoutargs(data, l, newstack);
 1344         if (error != 0)
 1345                 goto exec_abort;
 1346 
 1347         doexechooks(p);
 1348 
 1349         /*
 1350          * Set initial SP at the top of the stack.
 1351          *
 1352          * Note that on machines where stack grows up (e.g. hppa), SP points to
 1353          * the end of arg/env strings.  Userland guesses the address of argc
 1354          * via ps_strings::ps_argvstr.
 1355          */
 1356 
 1357         /* Setup new registers and do misc. setup. */
 1358         (*epp->ep_esch->es_emul->e_setregs)(l, epp, (vaddr_t)newstack);
 1359         if (epp->ep_esch->es_setregs)
 1360                 (*epp->ep_esch->es_setregs)(l, epp, (vaddr_t)newstack);
 1361 
 1362         /* Provide a consistent LWP private setting */
 1363         (void)lwp_setprivate(l, NULL);
 1364 
 1365         /* Discard all PCU state; need to start fresh */
 1366         pcu_discard_all(l);
 1367 
 1368         /* map the process's signal trampoline code */
 1369         if ((error = exec_sigcode_map(p, epp->ep_esch->es_emul)) != 0) {
 1370                 DPRINTF(("%s: map sigcode failed %d\n", __func__, error));
 1371                 goto exec_abort;
 1372         }
 1373 
 1374         pool_put(&exec_pool, data->ed_argp);
 1375 
 1376         /*
 1377          * Notify anyone who might care that we've exec'd.
 1378          *
 1379          * This is slightly racy; someone could sneak in and
 1380          * attach a knote after we've decided not to notify,
 1381          * or vice-versa, but that's not particularly bothersome.
 1382          * knote_proc_exec() will acquire p->p_lock as needed.
 1383          */
 1384         if (!SLIST_EMPTY(&p->p_klist)) {
 1385                 knote_proc_exec(p);
 1386         }
 1387 
 1388         kmem_free(epp->ep_hdr, epp->ep_hdrlen);
 1389 
 1390         SDT_PROBE(proc, kernel, , exec__success, epp->ep_kname, 0, 0, 0, 0);
 1391 
 1392         emulexec(l, epp);
 1393 
 1394         /* Allow new references from the debugger/procfs. */
 1395         rw_exit(&p->p_reflock);
 1396         if (!no_local_exec_lock)
 1397                 rw_exit(&exec_lock);
 1398 
 1399         mutex_enter(&proc_lock);
 1400 
 1401         /* posix_spawn(3) reports a single event with implied exec(3) */
 1402         if ((p->p_slflag & PSL_TRACED) && !is_spawn) {
 1403                 mutex_enter(p->p_lock);
 1404                 eventswitch(TRAP_EXEC, 0, 0);
 1405                 mutex_enter(&proc_lock);
 1406         }
 1407 
 1408         if (p->p_sflag & PS_STOPEXEC) {
 1409                 ksiginfoq_t kq;
 1410 
 1411                 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
 1412                 p->p_pptr->p_nstopchild++;
 1413                 p->p_waited = 0;
 1414                 mutex_enter(p->p_lock);
 1415                 ksiginfo_queue_init(&kq);
 1416                 sigclearall(p, &contsigmask, &kq);
 1417                 lwp_lock(l);
 1418                 l->l_stat = LSSTOP;
 1419                 p->p_stat = SSTOP;
 1420                 p->p_nrlwps--;
 1421                 lwp_unlock(l);
 1422                 mutex_exit(p->p_lock);
 1423                 mutex_exit(&proc_lock);
 1424                 lwp_lock(l);
 1425                 spc_lock(l->l_cpu);
 1426                 mi_switch(l);
 1427                 ksiginfo_queue_drain(&kq);
 1428         } else {
 1429                 mutex_exit(&proc_lock);
 1430         }
 1431 
 1432         exec_path_free(data);
 1433 #ifdef TRACE_EXEC
 1434         DPRINTF(("%s finished\n", __func__));
 1435 #endif
 1436         return EJUSTRETURN;
 1437 
 1438  exec_abort:
 1439         SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0);
 1440         rw_exit(&p->p_reflock);
 1441         if (!no_local_exec_lock)
 1442                 rw_exit(&exec_lock);
 1443 
 1444         exec_path_free(data);
 1445 
 1446         /*
 1447          * the old process doesn't exist anymore.  exit gracefully.
 1448          * get rid of the (new) address space we have created, if any, get rid
 1449          * of our namei data and vnode, and exit noting failure
 1450          */
 1451         if (vm != NULL) {
 1452                 uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
 1453                         VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
 1454         }
 1455 
 1456         exec_free_emul_arg(epp);
 1457         pool_put(&exec_pool, data->ed_argp);
 1458         kmem_free(epp->ep_hdr, epp->ep_hdrlen);
 1459         if (epp->ep_emul_root != NULL)
 1460                 vrele(epp->ep_emul_root);
 1461         if (epp->ep_interp != NULL)
 1462                 vrele(epp->ep_interp);
 1463 
 1464         /* Acquire the sched-state mutex (exit1() will release it). */
 1465         if (!is_spawn) {
 1466                 mutex_enter(p->p_lock);
 1467                 exit1(l, error, SIGABRT);
 1468         }
 1469 
 1470         return error;
 1471 }
 1472 
 1473 int
 1474 execve1(struct lwp *l, bool has_path, const char *path, int fd,
 1475     char * const *args, char * const *envs,
 1476     execve_fetch_element_t fetch_element)
 1477 {
 1478         struct execve_data data;
 1479         int error;
 1480 
 1481         error = execve_loadvm(l, has_path, path, fd, args, envs, fetch_element,
 1482             &data);
 1483         if (error)
 1484                 return error;
 1485         error = execve_runproc(l, &data, false, false);
 1486         return error;
 1487 }
 1488 
 1489 static size_t
 1490 fromptrsz(const struct exec_package *epp)
 1491 {
 1492         return (epp->ep_flags & EXEC_FROM32) ? sizeof(int) : sizeof(char *);
 1493 }
 1494 
 1495 static size_t
 1496 ptrsz(const struct exec_package *epp)
 1497 {
 1498         return (epp->ep_flags & EXEC_32) ? sizeof(int) : sizeof(char *);
 1499 }
 1500 
 1501 static size_t
 1502 calcargs(struct execve_data * restrict data, const size_t argenvstrlen)
 1503 {
 1504         struct exec_package     * const epp = &data->ed_pack;
 1505 
 1506         const size_t nargenvptrs =
 1507             1 +                         /* long argc */
 1508             data->ed_argc +             /* char *argv[] */
 1509             1 +                         /* \0 */
 1510             data->ed_envc +             /* char *env[] */
 1511             1;                          /* \0 */
 1512 
 1513         return (nargenvptrs * ptrsz(epp))       /* pointers */
 1514             + argenvstrlen                      /* strings */
 1515             + epp->ep_esch->es_arglen;          /* auxinfo */
 1516 }
 1517 
 1518 static size_t
 1519 calcstack(struct execve_data * restrict data, const size_t gaplen)
 1520 {
 1521         struct exec_package     * const epp = &data->ed_pack;
 1522 
 1523         data->ed_szsigcode = epp->ep_esch->es_emul->e_esigcode -
 1524             epp->ep_esch->es_emul->e_sigcode;
 1525 
 1526         data->ed_ps_strings_sz = (epp->ep_flags & EXEC_32) ?
 1527             sizeof(struct ps_strings32) : sizeof(struct ps_strings);
 1528 
 1529         const size_t sigcode_psstr_sz =
 1530             data->ed_szsigcode +        /* sigcode */
 1531             data->ed_ps_strings_sz +    /* ps_strings */
 1532             STACK_PTHREADSPACE;         /* pthread space */
 1533 
 1534         const size_t stacklen =
 1535             data->ed_argslen +
 1536             gaplen +
 1537             sigcode_psstr_sz;
 1538 
 1539         /* make the stack "safely" aligned */
 1540         return STACK_LEN_ALIGN(stacklen, STACK_ALIGNBYTES);
 1541 }
 1542 
 1543 static int
 1544 copyoutargs(struct execve_data * restrict data, struct lwp *l,
 1545     char * const newstack)
 1546 {
 1547         struct exec_package     * const epp = &data->ed_pack;
 1548         struct proc             *p = l->l_proc;
 1549         int                     error;
 1550 
 1551         memset(&data->ed_arginfo, 0, sizeof(data->ed_arginfo));
 1552 
 1553         /* remember information about the process */
 1554         data->ed_arginfo.ps_nargvstr = data->ed_argc;
 1555         data->ed_arginfo.ps_nenvstr = data->ed_envc;
 1556 
 1557         /*
 1558          * Allocate the stack address passed to the newly execve()'ed process.
 1559          *
 1560          * The new stack address will be set to the SP (stack pointer) register
 1561          * in setregs().
 1562          */
 1563 
 1564         char *newargs = STACK_ALLOC(
 1565             STACK_SHRINK(newstack, data->ed_argslen), data->ed_argslen);
 1566 
 1567         error = (*epp->ep_esch->es_copyargs)(l, epp,
 1568             &data->ed_arginfo, &newargs, data->ed_argp);
 1569 
 1570         if (error) {
 1571                 DPRINTF(("%s: copyargs failed %d\n", __func__, error));
 1572                 return error;
 1573         }
 1574 
 1575         error = copyoutpsstrs(data, p);
 1576         if (error != 0)
 1577                 return error;
 1578 
 1579         return 0;
 1580 }
 1581 
 1582 static int
 1583 copyoutpsstrs(struct execve_data * restrict data, struct proc *p)
 1584 {
 1585         struct exec_package     * const epp = &data->ed_pack;
 1586         struct ps_strings32     arginfo32;
 1587         void                    *aip;
 1588         int                     error;
 1589 
 1590         /* fill process ps_strings info */
 1591         p->p_psstrp = (vaddr_t)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
 1592             STACK_PTHREADSPACE), data->ed_ps_strings_sz);
 1593 
 1594         if (epp->ep_flags & EXEC_32) {
 1595                 aip = &arginfo32;
 1596                 arginfo32.ps_argvstr = (vaddr_t)data->ed_arginfo.ps_argvstr;
 1597                 arginfo32.ps_nargvstr = data->ed_arginfo.ps_nargvstr;
 1598                 arginfo32.ps_envstr = (vaddr_t)data->ed_arginfo.ps_envstr;
 1599                 arginfo32.ps_nenvstr = data->ed_arginfo.ps_nenvstr;
 1600         } else
 1601                 aip = &data->ed_arginfo;
 1602 
 1603         /* copy out the process's ps_strings structure */
 1604         if ((error = copyout(aip, (void *)p->p_psstrp, data->ed_ps_strings_sz))
 1605             != 0) {
 1606                 DPRINTF(("%s: ps_strings copyout %p->%p size %zu failed\n",
 1607                     __func__, aip, (void *)p->p_psstrp, data->ed_ps_strings_sz));
 1608                 return error;
 1609         }
 1610 
 1611         return 0;
 1612 }
 1613 
 1614 static int
 1615 copyinargs(struct execve_data * restrict data, char * const *args,
 1616     char * const *envs, execve_fetch_element_t fetch_element, char **dpp)
 1617 {
 1618         struct exec_package     * const epp = &data->ed_pack;
 1619         char                    *dp;
 1620         size_t                  i;
 1621         int                     error;
 1622 
 1623         dp = *dpp;
 1624 
 1625         data->ed_argc = 0;
 1626 
 1627         /* copy the fake args list, if there's one, freeing it as we go */
 1628         if (epp->ep_flags & EXEC_HASARGL) {
 1629                 struct exec_fakearg     *fa = epp->ep_fa;
 1630 
 1631                 while (fa->fa_arg != NULL) {
 1632                         const size_t maxlen = ARG_MAX - (dp - data->ed_argp);
 1633                         size_t len;
 1634 
 1635                         len = strlcpy(dp, fa->fa_arg, maxlen);
 1636                         /* Count NUL into len. */
 1637                         if (len < maxlen)
 1638                                 len++;
 1639                         else {
 1640                                 while (fa->fa_arg != NULL) {
 1641                                         kmem_free(fa->fa_arg, fa->fa_len);
 1642                                         fa++;
 1643                                 }
 1644                                 kmem_free(epp->ep_fa, epp->ep_fa_len);
 1645                                 epp->ep_flags &= ~EXEC_HASARGL;
 1646                                 return E2BIG;
 1647                         }
 1648                         ktrexecarg(fa->fa_arg, len - 1);
 1649                         dp += len;
 1650 
 1651                         kmem_free(fa->fa_arg, fa->fa_len);
 1652                         fa++;
 1653                         data->ed_argc++;
 1654                 }
 1655                 kmem_free(epp->ep_fa, epp->ep_fa_len);
 1656                 epp->ep_flags &= ~EXEC_HASARGL;
 1657         }
 1658 
 1659         /*
 1660          * Read and count argument strings from user.
 1661          */
 1662 
 1663         if (args == NULL) {
 1664                 DPRINTF(("%s: null args\n", __func__));
 1665                 return EINVAL;
 1666         }
 1667         if (epp->ep_flags & EXEC_SKIPARG)
 1668                 args = (const void *)((const char *)args + fromptrsz(epp));
 1669         i = 0;
 1670         error = copyinargstrs(data, args, fetch_element, &dp, &i, ktr_execarg);
 1671         if (error != 0) {
 1672                 DPRINTF(("%s: copyin arg %d\n", __func__, error));
 1673                 return error;
 1674         }
 1675         data->ed_argc += i;
 1676 
 1677         /*
 1678          * Read and count environment strings from user.
 1679          */
 1680 
 1681         data->ed_envc = 0;
 1682         /* environment need not be there */
 1683         if (envs == NULL)
 1684                 goto done;
 1685         i = 0;
 1686         error = copyinargstrs(data, envs, fetch_element, &dp, &i, ktr_execenv);
 1687         if (error != 0) {
 1688                 DPRINTF(("%s: copyin env %d\n", __func__, error));
 1689                 return error;
 1690         }
 1691         data->ed_envc += i;
 1692 
 1693 done:
 1694         *dpp = dp;
 1695 
 1696         return 0;
 1697 }
 1698 
 1699 static int
 1700 copyinargstrs(struct execve_data * restrict data, char * const *strs,
 1701     execve_fetch_element_t fetch_element, char **dpp, size_t *ip,
 1702     void (*ktr)(const void *, size_t))
 1703 {
 1704         char                    *dp, *sp;
 1705         size_t                  i;
 1706         int                     error;
 1707 
 1708         dp = *dpp;
 1709 
 1710         i = 0;
 1711         while (1) {
 1712                 const size_t maxlen = ARG_MAX - (dp - data->ed_argp);
 1713                 size_t len;
 1714 
 1715                 if ((error = (*fetch_element)(strs, i, &sp)) != 0) {
 1716                         return error;
 1717                 }
 1718                 if (!sp)
 1719                         break;
 1720                 if ((error = copyinstr(sp, dp, maxlen, &len)) != 0) {
 1721                         if (error == ENAMETOOLONG)
 1722                                 error = E2BIG;
 1723                         return error;
 1724                 }
 1725                 if (__predict_false(ktrace_on))
 1726                         (*ktr)(dp, len - 1);
 1727                 dp += len;
 1728                 i++;
 1729         }
 1730 
 1731         *dpp = dp;
 1732         *ip = i;
 1733 
 1734         return 0;
 1735 }
 1736 
 1737 /*
 1738  * Copy argv and env strings from kernel buffer (argp) to the new stack.
 1739  * Those strings are located just after auxinfo.
 1740  */
 1741 int
 1742 copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
 1743     char **stackp, void *argp)
 1744 {
 1745         char    **cpp, *dp, *sp;
 1746         size_t  len;
 1747         void    *nullp;
 1748         long    argc, envc;
 1749         int     error;
 1750 
 1751         cpp = (char **)*stackp;
 1752         nullp = NULL;
 1753         argc = arginfo->ps_nargvstr;
 1754         envc = arginfo->ps_nenvstr;
 1755 
 1756         /* argc on stack is long */
 1757         CTASSERT(sizeof(*cpp) == sizeof(argc));
 1758 
 1759         dp = (char *)(cpp +
 1760             1 +                         /* long argc */
 1761             argc +                      /* char *argv[] */
 1762             1 +                         /* \0 */
 1763             envc +                      /* char *env[] */
 1764             1) +                        /* \0 */
 1765             pack->ep_esch->es_arglen;   /* auxinfo */
 1766         sp = argp;
 1767 
 1768         if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0) {
 1769                 COPYPRINTF("", cpp - 1, sizeof(argc));
 1770                 return error;
 1771         }
 1772 
 1773         /* XXX don't copy them out, remap them! */
 1774         arginfo->ps_argvstr = cpp; /* remember location of argv for later */
 1775 
 1776         for (; --argc >= 0; sp += len, dp += len) {
 1777                 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
 1778                         COPYPRINTF("", cpp - 1, sizeof(dp));
 1779                         return error;
 1780                 }
 1781                 if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
 1782                         COPYPRINTF("str", dp, (size_t)ARG_MAX);
 1783                         return error;
 1784                 }
 1785         }
 1786 
 1787         if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
 1788                 COPYPRINTF("", cpp - 1, sizeof(nullp));
 1789                 return error;
 1790         }
 1791 
 1792         arginfo->ps_envstr = cpp; /* remember location of envp for later */
 1793 
 1794         for (; --envc >= 0; sp += len, dp += len) {
 1795                 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
 1796                         COPYPRINTF("", cpp - 1, sizeof(dp));
 1797                         return error;
 1798                 }
 1799                 if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
 1800                         COPYPRINTF("str", dp, (size_t)ARG_MAX);
 1801                         return error;
 1802                 }
 1803 
 1804         }
 1805 
 1806         if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
 1807                 COPYPRINTF("", cpp - 1, sizeof(nullp));
 1808                 return error;
 1809         }
 1810 
 1811         *stackp = (char *)cpp;
 1812         return 0;
 1813 }
 1814 
 1815 
 1816 /*
 1817  * Add execsw[] entries.
 1818  */
 1819 int
 1820 exec_add(struct execsw *esp, int count)
 1821 {
 1822         struct exec_entry       *it;
 1823         int                     i, error = 0;
 1824 
 1825         if (count == 0) {
 1826                 return 0;
 1827         }
 1828 
 1829         /* Check for duplicates. */
 1830         rw_enter(&exec_lock, RW_WRITER);
 1831         for (i = 0; i < count; i++) {
 1832                 LIST_FOREACH(it, &ex_head, ex_list) {
 1833                         /* assume unique (makecmds, probe_func, emulation) */
 1834                         if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
 1835                             it->ex_sw->u.elf_probe_func ==
 1836                             esp[i].u.elf_probe_func &&
 1837                             it->ex_sw->es_emul == esp[i].es_emul) {
 1838                                 rw_exit(&exec_lock);
 1839                                 return EEXIST;
 1840                         }
 1841                 }
 1842         }
 1843 
 1844         /* Allocate new entries. */
 1845         for (i = 0; i < count; i++) {
 1846                 it = kmem_alloc(sizeof(*it), KM_SLEEP);
 1847                 it->ex_sw = &esp[i];
 1848                 error = exec_sigcode_alloc(it->ex_sw->es_emul);
 1849                 if (error != 0) {
 1850                         kmem_free(it, sizeof(*it));
 1851                         break;
 1852                 }
 1853                 LIST_INSERT_HEAD(&ex_head, it, ex_list);
 1854         }
 1855         /* If even one fails, remove them all back. */
 1856         if (error != 0) {
 1857                 for (i--; i >= 0; i--) {
 1858                         it = LIST_FIRST(&ex_head);
 1859                         LIST_REMOVE(it, ex_list);
 1860                         exec_sigcode_free(it->ex_sw->es_emul);
 1861                         kmem_free(it, sizeof(*it));
 1862                 }
 1863                 return error;
 1864         }
 1865 
 1866         /* update execsw[] */
 1867         exec_init(0);
 1868         rw_exit(&exec_lock);
 1869         return 0;
 1870 }
 1871 
 1872 /*
 1873  * Remove execsw[] entry.
 1874  */
 1875 int
 1876 exec_remove(struct execsw *esp, int count)
 1877 {
 1878         struct exec_entry       *it, *next;
 1879         int                     i;
 1880         const struct proclist_desc *pd;
 1881         proc_t                  *p;
 1882 
 1883         if (count == 0) {
 1884                 return 0;
 1885         }
 1886 
 1887         /* Abort if any are busy. */
 1888         rw_enter(&exec_lock, RW_WRITER);
 1889         for (i = 0; i < count; i++) {
 1890                 mutex_enter(&proc_lock);
 1891                 for (pd = proclists; pd->pd_list != NULL; pd++) {
 1892                         PROCLIST_FOREACH(p, pd->pd_list) {
 1893                                 if (p->p_execsw == &esp[i]) {
 1894                                         mutex_exit(&proc_lock);
 1895                                         rw_exit(&exec_lock);
 1896                                         return EBUSY;
 1897                                 }
 1898                         }
 1899                 }
 1900                 mutex_exit(&proc_lock);
 1901         }
 1902 
 1903         /* None are busy, so remove them all. */
 1904         for (i = 0; i < count; i++) {
 1905                 for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
 1906                         next = LIST_NEXT(it, ex_list);
 1907                         if (it->ex_sw == &esp[i]) {
 1908                                 LIST_REMOVE(it, ex_list);
 1909                                 exec_sigcode_free(it->ex_sw->es_emul);
 1910                                 kmem_free(it, sizeof(*it));
 1911                                 break;
 1912                         }
 1913                 }
 1914         }
 1915 
 1916         /* update execsw[] */
 1917         exec_init(0);
 1918         rw_exit(&exec_lock);
 1919         return 0;
 1920 }
 1921 
 1922 /*
 1923  * Initialize exec structures. If init_boot is true, also does necessary
 1924  * one-time initialization (it's called from main() that way).
 1925  * Once system is multiuser, this should be called with exec_lock held,
 1926  * i.e. via exec_{add|remove}().
 1927  */
 1928 int
 1929 exec_init(int init_boot)
 1930 {
 1931         const struct execsw     **sw;
 1932         struct exec_entry       *ex;
 1933         SLIST_HEAD(,exec_entry) first;
 1934         SLIST_HEAD(,exec_entry) any;
 1935         SLIST_HEAD(,exec_entry) last;
 1936         int                     i, sz;
 1937 
 1938         if (init_boot) {
 1939                 /* do one-time initializations */
 1940                 vaddr_t vmin = 0, vmax;
 1941 
 1942                 rw_init(&exec_lock);
 1943                 exec_map = uvm_km_suballoc(kernel_map, &vmin, &vmax,
 1944                     maxexec*NCARGS, VM_MAP_PAGEABLE, false, NULL);
 1945                 pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
 1946                     "execargs", &exec_palloc, IPL_NONE);
 1947                 pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
 1948         } else {
 1949                 KASSERT(rw_write_held(&exec_lock));
 1950         }
 1951 
 1952         /* Sort each entry onto the appropriate queue. */
 1953         SLIST_INIT(&first);
 1954         SLIST_INIT(&any);
 1955         SLIST_INIT(&last);
 1956         sz = 0;
 1957         LIST_FOREACH(ex, &ex_head, ex_list) {
 1958                 switch(ex->ex_sw->es_prio) {
 1959                 case EXECSW_PRIO_FIRST:
 1960                         SLIST_INSERT_HEAD(&first, ex, ex_slist);
 1961                         break;
 1962                 case EXECSW_PRIO_ANY:
 1963                         SLIST_INSERT_HEAD(&any, ex, ex_slist);
 1964                         break;
 1965                 case EXECSW_PRIO_LAST:
 1966                         SLIST_INSERT_HEAD(&last, ex, ex_slist);
 1967                         break;
 1968                 default:
 1969                         panic("%s", __func__);
 1970                         break;
 1971                 }
 1972                 sz++;
 1973         }
 1974 
 1975         /*
 1976          * Create new execsw[].  Ensure we do not try a zero-sized
 1977          * allocation.
 1978          */
 1979         sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
 1980         i = 0;
 1981         SLIST_FOREACH(ex, &first, ex_slist) {
 1982                 sw[i++] = ex->ex_sw;
 1983         }
 1984         SLIST_FOREACH(ex, &any, ex_slist) {
 1985                 sw[i++] = ex->ex_sw;
 1986         }
 1987         SLIST_FOREACH(ex, &last, ex_slist) {
 1988                 sw[i++] = ex->ex_sw;
 1989         }
 1990 
 1991         /* Replace old execsw[] and free used memory. */
 1992         if (execsw != NULL) {
 1993                 kmem_free(__UNCONST(execsw),
 1994                     nexecs * sizeof(struct execsw *) + 1);
 1995         }
 1996         execsw = sw;
 1997         nexecs = sz;
 1998 
 1999         /* Figure out the maximum size of an exec header. */
 2000         exec_maxhdrsz = sizeof(int);
 2001         for (i = 0; i < nexecs; i++) {
 2002                 if (execsw[i]->es_hdrsz > exec_maxhdrsz)
 2003                         exec_maxhdrsz = execsw[i]->es_hdrsz;
 2004         }
 2005 
 2006         return 0;
 2007 }
 2008 
 2009 int
 2010 exec_sigcode_alloc(const struct emul *e)
 2011 {
 2012         vaddr_t va;
 2013         vsize_t sz;
 2014         int error;
 2015         struct uvm_object *uobj;
 2016 
 2017         KASSERT(rw_lock_held(&exec_lock));
 2018 
 2019         if (e == NULL || e->e_sigobject == NULL)
 2020                 return 0;
 2021 
 2022         sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
 2023         if (sz == 0)
 2024                 return 0;
 2025 
 2026         /*
 2027          * Create a sigobject for this emulation.
 2028          *
 2029          * sigobject is an anonymous memory object (just like SYSV shared
 2030          * memory) that we keep a permanent reference to and that we map
 2031          * in all processes that need this sigcode. The creation is simple,
 2032          * we create an object, add a permanent reference to it, map it in
 2033          * kernel space, copy out the sigcode to it and unmap it.
 2034          * We map it with PROT_READ|PROT_EXEC into the process just
 2035          * the way sys_mmap() would map it.
 2036          */
 2037         if (*e->e_sigobject == NULL) {
 2038                 uobj = uao_create(sz, 0);
 2039                 (*uobj->pgops->pgo_reference)(uobj);
 2040                 va = vm_map_min(kernel_map);
 2041                 if ((error = uvm_map(kernel_map, &va, round_page(sz),
 2042                     uobj, 0, 0,
 2043                     UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
 2044                     UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
 2045                         printf("sigcode kernel mapping failed %d\n", error);
 2046                         (*uobj->pgops->pgo_detach)(uobj);
 2047                         return error;
 2048                 }
 2049                 memcpy((void *)va, e->e_sigcode, sz);
 2050 #ifdef PMAP_NEED_PROCWR
 2051                 pmap_procwr(&proc0, va, sz);
 2052 #endif
 2053                 uvm_unmap(kernel_map, va, va + round_page(sz));
 2054                 *e->e_sigobject = uobj;
 2055                 KASSERT(uobj->uo_refs == 1);
 2056         } else {
 2057                 /* if already created, reference++ */
 2058                 uobj = *e->e_sigobject;
 2059                 (*uobj->pgops->pgo_reference)(uobj);
 2060         }
 2061 
 2062         return 0;
 2063 }
 2064 
 2065 void
 2066 exec_sigcode_free(const struct emul *e)
 2067 {
 2068         struct uvm_object *uobj;
 2069 
 2070         KASSERT(rw_lock_held(&exec_lock));
 2071 
 2072         if (e == NULL || e->e_sigobject == NULL)
 2073                 return;
 2074 
 2075         uobj = *e->e_sigobject;
 2076         if (uobj == NULL)
 2077                 return;
 2078 
 2079         if (uobj->uo_refs == 1)
 2080                 *e->e_sigobject = NULL; /* I'm the last person to reference. */
 2081         (*uobj->pgops->pgo_detach)(uobj);
 2082 }
 2083 
 2084 static int
 2085 exec_sigcode_map(struct proc *p, const struct emul *e)
 2086 {
 2087         vaddr_t va;
 2088         vsize_t sz;
 2089         int error;
 2090         struct uvm_object *uobj;
 2091 
 2092         sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
 2093         if (e->e_sigobject == NULL || sz == 0)
 2094                 return 0;
 2095 
 2096         uobj = *e->e_sigobject;
 2097         if (uobj == NULL)
 2098                 return 0;
 2099 
 2100         /* Just a hint to uvm_map where to put it. */
 2101         va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
 2102             round_page(sz), p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
 2103 
 2104 #ifdef __alpha__
 2105         /*
 2106          * Tru64 puts /sbin/loader at the end of user virtual memory,
 2107          * which causes the above calculation to put the sigcode at
 2108          * an invalid address.  Put it just below the text instead.
 2109          */
 2110         if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
 2111                 va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
 2112         }
 2113 #endif
 2114 
 2115         (*uobj->pgops->pgo_reference)(uobj);
 2116         error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
 2117                         uobj, 0, 0,
 2118                         UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
 2119                                     UVM_ADV_RANDOM, 0));
 2120         if (error) {
 2121                 DPRINTF(("%s, %d: map %p "
 2122                     "uvm_map %#"PRIxVSIZE"@%#"PRIxVADDR" failed %d\n",
 2123                     __func__, __LINE__, &p->p_vmspace->vm_map, round_page(sz),
 2124                     va, error));
 2125                 (*uobj->pgops->pgo_detach)(uobj);
 2126                 return error;
 2127         }
 2128         p->p_sigctx.ps_sigcode = (void *)va;
 2129         return 0;
 2130 }
 2131 
 2132 /*
 2133  * Release a refcount on spawn_exec_data and destroy memory, if this
 2134  * was the last one.
 2135  */
 2136 static void
 2137 spawn_exec_data_release(struct spawn_exec_data *data)
 2138 {
 2139 
 2140         membar_release();
 2141         if (atomic_dec_32_nv(&data->sed_refcnt) != 0)
 2142                 return;
 2143         membar_acquire();
 2144 
 2145         cv_destroy(&data->sed_cv_child_ready);
 2146         mutex_destroy(&data->sed_mtx_child);
 2147 
 2148         if (data->sed_actions)
 2149                 posix_spawn_fa_free(data->sed_actions,
 2150                     data->sed_actions->len);
 2151         if (data->sed_attrs)
 2152                 kmem_free(data->sed_attrs,
 2153                     sizeof(*data->sed_attrs));
 2154         kmem_free(data, sizeof(*data));
 2155 }
 2156 
 2157 static int
 2158 handle_posix_spawn_file_actions(struct posix_spawn_file_actions *actions)
 2159 {
 2160         struct lwp *l = curlwp;
 2161         register_t retval;
 2162         int error, newfd;
 2163 
 2164         if (actions == NULL)
 2165                 return 0;
 2166 
 2167         for (size_t i = 0; i < actions->len; i++) {
 2168                 const struct posix_spawn_file_actions_entry *fae =
 2169                     &actions->fae[i];
 2170                 switch (fae->fae_action) {
 2171                 case FAE_OPEN:
 2172                         if (fd_getfile(fae->fae_fildes) != NULL) {
 2173                                 error = fd_close(fae->fae_fildes);
 2174                                 if (error)
 2175                                         return error;
 2176                         }
 2177                         error = fd_open(fae->fae_path, fae->fae_oflag,
 2178                             fae->fae_mode, &newfd);
 2179                         if (error)
 2180                                 return error;
 2181                         if (newfd != fae->fae_fildes) {
 2182                                 error = dodup(l, newfd,
 2183                                     fae->fae_fildes, 0, &retval);
 2184                                 if (fd_getfile(newfd) != NULL)
 2185                                         fd_close(newfd);
 2186                         }
 2187                         break;
 2188                 case FAE_DUP2:
 2189                         error = dodup(l, fae->fae_fildes,
 2190                             fae->fae_newfildes, 0, &retval);
 2191                         break;
 2192                 case FAE_CLOSE:
 2193                         if (fd_getfile(fae->fae_fildes) == NULL) {
 2194                                 return EBADF;
 2195                         }
 2196                         error = fd_close(fae->fae_fildes);
 2197                         break;
 2198                 case FAE_CHDIR:
 2199                         error = do_sys_chdir(l, fae->fae_chdir_path,
 2200                             UIO_SYSSPACE, &retval);
 2201                         break;
 2202                 case FAE_FCHDIR:
 2203                         error = do_sys_fchdir(l, fae->fae_fildes, &retval);
 2204                         break;
 2205                 }
 2206                 if (error)
 2207                         return error;
 2208         }
 2209         return 0;
 2210 }
 2211 
 2212 static int
 2213 handle_posix_spawn_attrs(struct posix_spawnattr *attrs, struct proc *parent)
 2214 {
 2215         struct sigaction sigact;
 2216         int error;
 2217         struct proc *p = curproc;
 2218         struct lwp *l = curlwp;
 2219 
 2220         if (attrs == NULL)
 2221                 return 0;
 2222 
 2223         memset(&sigact, 0, sizeof(sigact));
 2224         sigact._sa_u._sa_handler = SIG_DFL;
 2225         sigact.sa_flags = 0;
 2226 
 2227         /* 
 2228          * set state to SSTOP so that this proc can be found by pid.
 2229          * see proc_enterprp, do_sched_setparam below
 2230          */
 2231         mutex_enter(&proc_lock);
 2232         /*
 2233          * p_stat should be SACTIVE, so we need to adjust the
 2234          * parent's p_nstopchild here.  For safety, just make
 2235          * we're on the good side of SDEAD before we adjust.
 2236          */
 2237         int ostat = p->p_stat;
 2238         KASSERT(ostat < SSTOP);
 2239         p->p_stat = SSTOP;
 2240         p->p_waited = 0;
 2241         p->p_pptr->p_nstopchild++;
 2242         mutex_exit(&proc_lock);
 2243 
 2244         /* Set process group */
 2245         if (attrs->sa_flags & POSIX_SPAWN_SETPGROUP) {
 2246                 pid_t mypid = p->p_pid;
 2247                 pid_t pgrp = attrs->sa_pgroup;
 2248 
 2249                 if (pgrp == 0)
 2250                         pgrp = mypid;
 2251 
 2252                 error = proc_enterpgrp(parent, mypid, pgrp, false);
 2253                 if (error)
 2254                         goto out;
 2255         }
 2256 
 2257         /* Set scheduler policy */
 2258         if (attrs->sa_flags & POSIX_SPAWN_SETSCHEDULER)
 2259                 error = do_sched_setparam(p->p_pid, 0, attrs->sa_schedpolicy,
 2260                     &attrs->sa_schedparam);
 2261         else if (attrs->sa_flags & POSIX_SPAWN_SETSCHEDPARAM) {
 2262                 error = do_sched_setparam(parent->p_pid, 0,
 2263                     SCHED_NONE, &attrs->sa_schedparam);
 2264         }
 2265         if (error)
 2266                 goto out;
 2267 
 2268         /* Reset user ID's */
 2269         if (attrs->sa_flags & POSIX_SPAWN_RESETIDS) {
 2270                 error = do_setresgid(l, -1, kauth_cred_getgid(l->l_cred), -1,
 2271                      ID_E_EQ_R | ID_E_EQ_S);
 2272                 if (error)
 2273                         return error;
 2274                 error = do_setresuid(l, -1, kauth_cred_getuid(l->l_cred), -1,
 2275                     ID_E_EQ_R | ID_E_EQ_S);
 2276                 if (error)
 2277                         goto out;
 2278         }
 2279 
 2280         /* Set signal masks/defaults */
 2281         if (attrs->sa_flags & POSIX_SPAWN_SETSIGMASK) {
 2282                 mutex_enter(p->p_lock);
 2283                 error = sigprocmask1(l, SIG_SETMASK, &attrs->sa_sigmask, NULL);
 2284                 mutex_exit(p->p_lock);
 2285                 if (error)
 2286                         goto out;
 2287         }
 2288 
 2289         if (attrs->sa_flags & POSIX_SPAWN_SETSIGDEF) {
 2290                 /*
 2291                  * The following sigaction call is using a sigaction
 2292                  * version 0 trampoline which is in the compatibility
 2293                  * code only. This is not a problem because for SIG_DFL
 2294                  * and SIG_IGN, the trampolines are now ignored. If they
 2295                  * were not, this would be a problem because we are
 2296                  * holding the exec_lock, and the compat code needs
 2297                  * to do the same in order to replace the trampoline
 2298                  * code of the process.
 2299                  */
 2300                 for (int i = 1; i <= NSIG; i++) {
 2301                         if (sigismember(&attrs->sa_sigdefault, i))
 2302                                 sigaction1(l, i, &sigact, NULL, NULL, 0);
 2303                 }
 2304         }
 2305         error = 0;
 2306 out:
 2307         mutex_enter(&proc_lock);
 2308         p->p_stat = ostat;
 2309         p->p_pptr->p_nstopchild--;
 2310         mutex_exit(&proc_lock);
 2311         return error;
 2312 }
 2313 
 2314 /*
 2315  * A child lwp of a posix_spawn operation starts here and ends up in
 2316  * cpu_spawn_return, dealing with all filedescriptor and scheduler
 2317  * manipulations in between.
 2318  * The parent waits for the child, as it is not clear whether the child
 2319  * will be able to acquire its own exec_lock. If it can, the parent can
 2320  * be released early and continue running in parallel. If not (or if the
 2321  * magic debug flag is passed in the scheduler attribute struct), the
 2322  * child rides on the parent's exec lock until it is ready to return to
 2323  * to userland - and only then releases the parent. This method loses
 2324  * concurrency, but improves error reporting.
 2325  */
 2326 static void
 2327 spawn_return(void *arg)
 2328 {
 2329         struct spawn_exec_data *spawn_data = arg;
 2330         struct lwp *l = curlwp;
 2331         struct proc *p = l->l_proc;
 2332         int error;
 2333         bool have_reflock;
 2334         bool parent_is_waiting = true;
 2335 
 2336         /*
 2337          * Check if we can release parent early.
 2338          * We either need to have no sed_attrs, or sed_attrs does not
 2339          * have POSIX_SPAWN_RETURNERROR or one of the flags, that require
 2340          * safe access to the parent proc (passed in sed_parent).
 2341          * We then try to get the exec_lock, and only if that works, we can
 2342          * release the parent here already.
 2343          */
 2344         struct posix_spawnattr *attrs = spawn_data->sed_attrs;
 2345         if ((!attrs || (attrs->sa_flags
 2346                 & (POSIX_SPAWN_RETURNERROR|POSIX_SPAWN_SETPGROUP)) == 0)
 2347             && rw_tryenter(&exec_lock, RW_READER)) {
 2348                 parent_is_waiting = false;
 2349                 mutex_enter(&spawn_data->sed_mtx_child);
 2350                 cv_signal(&spawn_data->sed_cv_child_ready);
 2351                 mutex_exit(&spawn_data->sed_mtx_child);
 2352         }
 2353 
 2354         /* don't allow debugger access yet */
 2355         rw_enter(&p->p_reflock, RW_WRITER);
 2356         have_reflock = true;
 2357 
 2358         /* handle posix_spawnattr */
 2359         error = handle_posix_spawn_attrs(attrs, spawn_data->sed_parent);
 2360         if (error)
 2361                 goto report_error;
 2362 
 2363         /* handle posix_spawn_file_actions */
 2364         error = handle_posix_spawn_file_actions(spawn_data->sed_actions);
 2365         if (error)
 2366                 goto report_error;
 2367 
 2368         /* now do the real exec */
 2369         error = execve_runproc(l, &spawn_data->sed_exec, parent_is_waiting,
 2370             true);
 2371         have_reflock = false;
 2372         if (error == EJUSTRETURN)
 2373                 error = 0;
 2374         else if (error)
 2375                 goto report_error;
 2376 
 2377         if (parent_is_waiting) {
 2378                 mutex_enter(&spawn_data->sed_mtx_child);
 2379                 cv_signal(&spawn_data->sed_cv_child_ready);
 2380                 mutex_exit(&spawn_data->sed_mtx_child);
 2381         }
 2382 
 2383         /* release our refcount on the data */
 2384         spawn_exec_data_release(spawn_data);
 2385 
 2386         if ((p->p_slflag & (PSL_TRACED|PSL_TRACEDCHILD)) ==
 2387             (PSL_TRACED|PSL_TRACEDCHILD)) {
 2388                 eventswitchchild(p, TRAP_CHLD, PTRACE_POSIX_SPAWN);
 2389         }
 2390 
 2391         /* and finally: leave to userland for the first time */
 2392         cpu_spawn_return(l);
 2393 
 2394         /* NOTREACHED */
 2395         return;
 2396 
 2397  report_error:
 2398         if (have_reflock) {
 2399                 /*
 2400                  * We have not passed through execve_runproc(),
 2401                  * which would have released the p_reflock and also
 2402                  * taken ownership of the sed_exec part of spawn_data,
 2403                  * so release/free both here.
 2404                  */
 2405                 rw_exit(&p->p_reflock);
 2406                 execve_free_data(&spawn_data->sed_exec);
 2407         }
 2408 
 2409         if (parent_is_waiting) {
 2410                 /* pass error to parent */
 2411                 mutex_enter(&spawn_data->sed_mtx_child);
 2412                 spawn_data->sed_error = error;
 2413                 cv_signal(&spawn_data->sed_cv_child_ready);
 2414                 mutex_exit(&spawn_data->sed_mtx_child);
 2415         } else {
 2416                 rw_exit(&exec_lock);
 2417         }
 2418 
 2419         /* release our refcount on the data */
 2420         spawn_exec_data_release(spawn_data);
 2421 
 2422         /* done, exit */
 2423         mutex_enter(p->p_lock);
 2424         /*
 2425          * Posix explicitly asks for an exit code of 127 if we report
 2426          * errors from the child process - so, unfortunately, there
 2427          * is no way to report a more exact error code.
 2428          * A NetBSD specific workaround is POSIX_SPAWN_RETURNERROR as
 2429          * flag bit in the attrp argument to posix_spawn(2), see above.
 2430          */
 2431         exit1(l, 127, 0);
 2432 }
 2433 
 2434 static __inline char **
 2435 posix_spawn_fae_path(struct posix_spawn_file_actions_entry *fae)
 2436 {
 2437         switch (fae->fae_action) {
 2438         case FAE_OPEN:
 2439                 return &fae->fae_path;
 2440         case FAE_CHDIR:
 2441                 return &fae->fae_chdir_path;
 2442         default:
 2443                 return NULL;
 2444         }
 2445 }
 2446     
 2447 void
 2448 posix_spawn_fa_free(struct posix_spawn_file_actions *fa, size_t len)
 2449 {
 2450 
 2451         for (size_t i = 0; i < len; i++) {
 2452                 char **pathp = posix_spawn_fae_path(&fa->fae[i]);
 2453                 if (pathp)
 2454                         kmem_strfree(*pathp);
 2455         }
 2456         if (fa->len > 0)
 2457                 kmem_free(fa->fae, sizeof(*fa->fae) * fa->len);
 2458         kmem_free(fa, sizeof(*fa));
 2459 }
 2460 
 2461 static int
 2462 posix_spawn_fa_alloc(struct posix_spawn_file_actions **fap,
 2463     const struct posix_spawn_file_actions *ufa, rlim_t lim)
 2464 {
 2465         struct posix_spawn_file_actions *fa;
 2466         struct posix_spawn_file_actions_entry *fae;
 2467         char *pbuf = NULL;
 2468         int error;
 2469         size_t i = 0;
 2470 
 2471         fa = kmem_alloc(sizeof(*fa), KM_SLEEP);
 2472         error = copyin(ufa, fa, sizeof(*fa));
 2473         if (error || fa->len == 0) {
 2474                 kmem_free(fa, sizeof(*fa));
 2475                 return error;   /* 0 if not an error, and len == 0 */
 2476         }
 2477 
 2478         if (fa->len > lim) {
 2479                 kmem_free(fa, sizeof(*fa));
 2480                 return EINVAL;
 2481         }
 2482 
 2483         fa->size = fa->len;
 2484         size_t fal = fa->len * sizeof(*fae);
 2485         fae = fa->fae;
 2486         fa->fae = kmem_alloc(fal, KM_SLEEP);
 2487         error = copyin(fae, fa->fae, fal);
 2488         if (error)
 2489                 goto out;
 2490 
 2491         pbuf = PNBUF_GET();
 2492         for (; i < fa->len; i++) {
 2493                 char **pathp = posix_spawn_fae_path(&fa->fae[i]);
 2494                 if (pathp == NULL)
 2495                         continue;
 2496                 error = copyinstr(*pathp, pbuf, MAXPATHLEN, &fal);
 2497                 if (error)
 2498                         goto out;
 2499                 *pathp = kmem_alloc(fal, KM_SLEEP);
 2500                 memcpy(*pathp, pbuf, fal);
 2501         }
 2502         PNBUF_PUT(pbuf);
 2503 
 2504         *fap = fa;
 2505         return 0;
 2506 out:
 2507         if (pbuf)
 2508                 PNBUF_PUT(pbuf);
 2509         posix_spawn_fa_free(fa, i);
 2510         return error;
 2511 }
 2512 
 2513 /*
 2514  * N.B. increments nprocs upon success.  Callers need to drop nprocs if
 2515  * they fail for some other reason.
 2516  */
 2517 int
 2518 check_posix_spawn(struct lwp *l1)
 2519 {
 2520         int error, tnprocs, count;
 2521         uid_t uid;
 2522         struct proc *p1;
 2523 
 2524         p1 = l1->l_proc;
 2525         uid = kauth_cred_getuid(l1->l_cred);
 2526         tnprocs = atomic_inc_uint_nv(&nprocs);
 2527 
 2528         /*
 2529          * Although process entries are dynamically created, we still keep
 2530          * a global limit on the maximum number we will create.
 2531          */
 2532         if (__predict_false(tnprocs >= maxproc))
 2533                 error = -1;
 2534         else
 2535                 error = kauth_authorize_process(l1->l_cred,
 2536                     KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL);
 2537 
 2538         if (error) {
 2539                 atomic_dec_uint(&nprocs);
 2540                 return EAGAIN;
 2541         }
 2542 
 2543         /*
 2544          * Enforce limits.
 2545          */
 2546         count = chgproccnt(uid, 1);
 2547         if (kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_RLIMIT,
 2548              p1, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
 2549              &p1->p_rlimit[RLIMIT_NPROC], KAUTH_ARG(RLIMIT_NPROC)) != 0 &&
 2550             __predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) {
 2551                 (void)chgproccnt(uid, -1);
 2552                 atomic_dec_uint(&nprocs);
 2553                 return EAGAIN;
 2554         }
 2555 
 2556         return 0;
 2557 }
 2558 
 2559 int
 2560 do_posix_spawn(struct lwp *l1, pid_t *pid_res, bool *child_ok, const char *path,
 2561         struct posix_spawn_file_actions *fa,
 2562         struct posix_spawnattr *sa,
 2563         char *const *argv, char *const *envp,
 2564         execve_fetch_element_t fetch)
 2565 {
 2566 
 2567         struct proc *p1, *p2;
 2568         struct lwp *l2;
 2569         int error;
 2570         struct spawn_exec_data *spawn_data;
 2571         vaddr_t uaddr = 0;
 2572         pid_t pid;
 2573         bool have_exec_lock = false;
 2574 
 2575         p1 = l1->l_proc;
 2576 
 2577         /* Allocate and init spawn_data */
 2578         spawn_data = kmem_zalloc(sizeof(*spawn_data), KM_SLEEP);
 2579         spawn_data->sed_refcnt = 1; /* only parent so far */
 2580         cv_init(&spawn_data->sed_cv_child_ready, "pspawn");
 2581         mutex_init(&spawn_data->sed_mtx_child, MUTEX_DEFAULT, IPL_NONE);
 2582         mutex_enter(&spawn_data->sed_mtx_child);
 2583 
 2584         /*
 2585          * Do the first part of the exec now, collect state
 2586          * in spawn_data.
 2587          */
 2588         error = execve_loadvm(l1, true, path, -1, argv,
 2589             envp, fetch, &spawn_data->sed_exec);
 2590         if (error == EJUSTRETURN)
 2591                 error = 0;
 2592         else if (error)
 2593                 goto error_exit;
 2594 
 2595         have_exec_lock = true;
 2596 
 2597         /*
 2598          * Allocate virtual address space for the U-area now, while it
 2599          * is still easy to abort the fork operation if we're out of
 2600          * kernel virtual address space.
 2601          */
 2602         uaddr = uvm_uarea_alloc();
 2603         if (__predict_false(uaddr == 0)) {
 2604                 error = ENOMEM;
 2605                 goto error_exit;
 2606         }
 2607         
 2608         /*
 2609          * Allocate new proc. Borrow proc0 vmspace for it, we will
 2610          * replace it with its own before returning to userland
 2611          * in the child.
 2612          */
 2613         p2 = proc_alloc();
 2614         if (p2 == NULL) {
 2615                 /* We were unable to allocate a process ID. */
 2616                 error = EAGAIN;
 2617                 goto error_exit;
 2618         }
 2619 
 2620         /*
 2621          * This is a point of no return, we will have to go through
 2622          * the child proc to properly clean it up past this point.
 2623          */
 2624         pid = p2->p_pid;
 2625 
 2626         /*
 2627          * Make a proc table entry for the new process.
 2628          * Start by zeroing the section of proc that is zero-initialized,
 2629          * then copy the section that is copied directly from the parent.
 2630          */
 2631         memset(&p2->p_startzero, 0,
 2632             (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero));
 2633         memcpy(&p2->p_startcopy, &p1->p_startcopy,
 2634             (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy));
 2635         p2->p_vmspace = proc0.p_vmspace;
 2636 
 2637         TAILQ_INIT(&p2->p_sigpend.sp_info);
 2638 
 2639         LIST_INIT(&p2->p_lwps);
 2640         LIST_INIT(&p2->p_sigwaiters);
 2641 
 2642         /*
 2643          * Duplicate sub-structures as needed.
 2644          * Increase reference counts on shared objects.
 2645          * Inherit flags we want to keep.  The flags related to SIGCHLD
 2646          * handling are important in order to keep a consistent behaviour
 2647          * for the child after the fork.  If we are a 32-bit process, the
 2648          * child will be too.
 2649          */
 2650         p2->p_flag =
 2651             p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32);
 2652         p2->p_emul = p1->p_emul;
 2653         p2->p_execsw = p1->p_execsw;
 2654 
 2655         mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
 2656         mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
 2657         rw_init(&p2->p_reflock);
 2658         cv_init(&p2->p_waitcv, "wait");
 2659         cv_init(&p2->p_lwpcv, "lwpwait");
 2660 
 2661         p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
 2662 
 2663         kauth_proc_fork(p1, p2);
 2664 
 2665         p2->p_raslist = NULL;
 2666         p2->p_fd = fd_copy();
 2667 
 2668         /* XXX racy */
 2669         p2->p_mqueue_cnt = p1->p_mqueue_cnt;
 2670 
 2671         p2->p_cwdi = cwdinit();
 2672 
 2673         /*
 2674          * Note: p_limit (rlimit stuff) is copy-on-write, so normally
 2675          * we just need increase pl_refcnt.
 2676          */
 2677         if (!p1->p_limit->pl_writeable) {
 2678                 lim_addref(p1->p_limit);
 2679                 p2->p_limit = p1->p_limit;
 2680         } else {
 2681                 p2->p_limit = lim_copy(p1->p_limit);
 2682         }
 2683 
 2684         p2->p_lflag = 0;
 2685         l1->l_vforkwaiting = false;
 2686         p2->p_sflag = 0;
 2687         p2->p_slflag = 0;
 2688         p2->p_pptr = p1;
 2689         p2->p_ppid = p1->p_pid;
 2690         LIST_INIT(&p2->p_children);
 2691 
 2692         p2->p_aio = NULL;
 2693 
 2694 #ifdef KTRACE
 2695         /*
 2696          * Copy traceflag and tracefile if enabled.
 2697          * If not inherited, these were zeroed above.
 2698          */
 2699         if (p1->p_traceflag & KTRFAC_INHERIT) {
 2700                 mutex_enter(&ktrace_lock);
 2701                 p2->p_traceflag = p1->p_traceflag;
 2702                 if ((p2->p_tracep = p1->p_tracep) != NULL)
 2703                         ktradref(p2);
 2704                 mutex_exit(&ktrace_lock);
 2705         }
 2706 #endif
 2707 
 2708         /*
 2709          * Create signal actions for the child process.
 2710          */
 2711         p2->p_sigacts = sigactsinit(p1, 0);
 2712         mutex_enter(p1->p_lock);
 2713         p2->p_sflag |=
 2714             (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP));
 2715         sched_proc_fork(p1, p2);
 2716         mutex_exit(p1->p_lock);
 2717 
 2718         p2->p_stflag = p1->p_stflag;
 2719 
 2720         /*
 2721          * p_stats.
 2722          * Copy parts of p_stats, and zero out the rest.
 2723          */
 2724         p2->p_stats = pstatscopy(p1->p_stats);
 2725 
 2726         /* copy over machdep flags to the new proc */
 2727         cpu_proc_fork(p1, p2);
 2728 
 2729         /*
 2730          * Prepare remaining parts of spawn data
 2731          */
 2732         spawn_data->sed_actions = fa;
 2733         spawn_data->sed_attrs = sa;
 2734 
 2735         spawn_data->sed_parent = p1;
 2736 
 2737         /* create LWP */
 2738         lwp_create(l1, p2, uaddr, 0, NULL, 0, spawn_return, spawn_data,
 2739             &l2, l1->l_class, &l1->l_sigmask, &l1->l_sigstk);
 2740         l2->l_ctxlink = NULL;   /* reset ucontext link */
 2741 
 2742         /*
 2743          * Copy the credential so other references don't see our changes.
 2744          * Test to see if this is necessary first, since in the common case
 2745          * we won't need a private reference.
 2746          */
 2747         if (kauth_cred_geteuid(l2->l_cred) != kauth_cred_getsvuid(l2->l_cred) ||
 2748             kauth_cred_getegid(l2->l_cred) != kauth_cred_getsvgid(l2->l_cred)) {
 2749                 l2->l_cred = kauth_cred_copy(l2->l_cred);
 2750                 kauth_cred_setsvuid(l2->l_cred, kauth_cred_geteuid(l2->l_cred));
 2751                 kauth_cred_setsvgid(l2->l_cred, kauth_cred_getegid(l2->l_cred));
 2752         }
 2753 
 2754         /* Update the master credentials. */
 2755         if (l2->l_cred != p2->p_cred) {
 2756                 kauth_cred_t ocred;
 2757 
 2758                 kauth_cred_hold(l2->l_cred);
 2759                 mutex_enter(p2->p_lock);
 2760                 ocred = p2->p_cred;
 2761                 p2->p_cred = l2->l_cred;
 2762                 mutex_exit(p2->p_lock);
 2763                 kauth_cred_free(ocred);
 2764         }
 2765 
 2766         *child_ok = true;
 2767         spawn_data->sed_refcnt = 2;     /* child gets it as well */
 2768 #if 0
 2769         l2->l_nopreempt = 1; /* start it non-preemptable */
 2770 #endif
 2771 
 2772         /*
 2773          * It's now safe for the scheduler and other processes to see the
 2774          * child process.
 2775          */
 2776         mutex_enter(&proc_lock);
 2777 
 2778         if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT)
 2779                 p2->p_lflag |= PL_CONTROLT;
 2780 
 2781         LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
 2782         p2->p_exitsig = SIGCHLD;        /* signal for parent on exit */
 2783 
 2784         if ((p1->p_slflag & (PSL_TRACEPOSIX_SPAWN|PSL_TRACED)) ==
 2785             (PSL_TRACEPOSIX_SPAWN|PSL_TRACED)) {
 2786                 proc_changeparent(p2, p1->p_pptr);
 2787                 SET(p2->p_slflag, PSL_TRACEDCHILD);
 2788         }
 2789 
 2790         p2->p_oppid = p1->p_pid;  /* Remember the original parent id. */
 2791 
 2792         LIST_INSERT_AFTER(p1, p2, p_pglist);
 2793         LIST_INSERT_HEAD(&allproc, p2, p_list);
 2794 
 2795         p2->p_trace_enabled = trace_is_enabled(p2);
 2796 #ifdef __HAVE_SYSCALL_INTERN
 2797         (*p2->p_emul->e_syscall_intern)(p2);
 2798 #endif
 2799 
 2800         /*
 2801          * Make child runnable, set start time, and add to run queue except
 2802          * if the parent requested the child to start in SSTOP state.
 2803          */
 2804         mutex_enter(p2->p_lock);
 2805 
 2806         getmicrotime(&p2->p_stats->p_start);
 2807 
 2808         lwp_lock(l2);
 2809         KASSERT(p2->p_nrlwps == 1);
 2810         KASSERT(l2->l_stat == LSIDL);
 2811         p2->p_nrlwps = 1;
 2812         p2->p_stat = SACTIVE;
 2813         setrunnable(l2);
 2814         /* LWP now unlocked */
 2815 
 2816         mutex_exit(p2->p_lock);
 2817         mutex_exit(&proc_lock);
 2818 
 2819         cv_wait(&spawn_data->sed_cv_child_ready, &spawn_data->sed_mtx_child);
 2820         error = spawn_data->sed_error;
 2821         mutex_exit(&spawn_data->sed_mtx_child);
 2822         spawn_exec_data_release(spawn_data);
 2823 
 2824         rw_exit(&p1->p_reflock);
 2825         rw_exit(&exec_lock);
 2826         have_exec_lock = false;
 2827 
 2828         *pid_res = pid;
 2829 
 2830         if (error)
 2831                 return error;
 2832 
 2833         if (p1->p_slflag & PSL_TRACED) {
 2834                 /* Paranoid check */
 2835                 mutex_enter(&proc_lock);
 2836                 if ((p1->p_slflag & (PSL_TRACEPOSIX_SPAWN|PSL_TRACED)) !=
 2837                     (PSL_TRACEPOSIX_SPAWN|PSL_TRACED)) {
 2838                         mutex_exit(&proc_lock);
 2839                         return 0;
 2840                 }
 2841 
 2842                 mutex_enter(p1->p_lock);
 2843                 eventswitch(TRAP_CHLD, PTRACE_POSIX_SPAWN, pid);
 2844         }
 2845         return 0;
 2846 
 2847  error_exit:
 2848         if (have_exec_lock) {
 2849                 execve_free_data(&spawn_data->sed_exec);
 2850                 rw_exit(&p1->p_reflock);
 2851                 rw_exit(&exec_lock);
 2852         }
 2853         mutex_exit(&spawn_data->sed_mtx_child);
 2854         spawn_exec_data_release(spawn_data);
 2855         if (uaddr != 0)
 2856                 uvm_uarea_free(uaddr);
 2857 
 2858         return error;
 2859 }
 2860 
 2861 int
 2862 sys_posix_spawn(struct lwp *l1, const struct sys_posix_spawn_args *uap,
 2863     register_t *retval)
 2864 {
 2865         /* {
 2866                 syscallarg(pid_t *) pid;
 2867                 syscallarg(const char *) path;
 2868                 syscallarg(const struct posix_spawn_file_actions *) file_actions;
 2869                 syscallarg(const struct posix_spawnattr *) attrp;
 2870                 syscallarg(char *const *) argv;
 2871                 syscallarg(char *const *) envp;
 2872         } */    
 2873 
 2874         int error;
 2875         struct posix_spawn_file_actions *fa = NULL;
 2876         struct posix_spawnattr *sa = NULL;
 2877         pid_t pid;
 2878         bool child_ok = false;
 2879         rlim_t max_fileactions;
 2880         proc_t *p = l1->l_proc;
 2881 
 2882         /* check_posix_spawn() increments nprocs for us. */
 2883         error = check_posix_spawn(l1);
 2884         if (error) {
 2885                 *retval = error;
 2886                 return 0;
 2887         }
 2888 
 2889         /* copy in file_actions struct */
 2890         if (SCARG(uap, file_actions) != NULL) {
 2891                 max_fileactions = 2 * uimin(p->p_rlimit[RLIMIT_NOFILE].rlim_cur,
 2892                     maxfiles);
 2893                 error = posix_spawn_fa_alloc(&fa, SCARG(uap, file_actions),
 2894                     max_fileactions);
 2895                 if (error)
 2896                         goto error_exit;
 2897         }
 2898 
 2899         /* copyin posix_spawnattr struct */
 2900         if (SCARG(uap, attrp) != NULL) {
 2901                 sa = kmem_alloc(sizeof(*sa), KM_SLEEP);
 2902                 error = copyin(SCARG(uap, attrp), sa, sizeof(*sa));
 2903                 if (error)
 2904                         goto error_exit;
 2905         }
 2906 
 2907         /*
 2908          * Do the spawn
 2909          */
 2910         error = do_posix_spawn(l1, &pid, &child_ok, SCARG(uap, path), fa, sa,
 2911             SCARG(uap, argv), SCARG(uap, envp), execve_fetch_element);
 2912         if (error)
 2913                 goto error_exit;
 2914 
 2915         if (error == 0 && SCARG(uap, pid) != NULL)
 2916                 error = copyout(&pid, SCARG(uap, pid), sizeof(pid));
 2917 
 2918         *retval = error;
 2919         return 0;
 2920 
 2921  error_exit:
 2922         if (!child_ok) {
 2923                 (void)chgproccnt(kauth_cred_getuid(l1->l_cred), -1);
 2924                 atomic_dec_uint(&nprocs);
 2925 
 2926                 if (sa)
 2927                         kmem_free(sa, sizeof(*sa));
 2928                 if (fa)
 2929                         posix_spawn_fa_free(fa, fa->len);
 2930         }
 2931 
 2932         *retval = error;
 2933         return 0;
 2934 }
 2935 
 2936 void
 2937 exec_free_emul_arg(struct exec_package *epp)
 2938 {
 2939         if (epp->ep_emul_arg_free != NULL) {
 2940                 KASSERT(epp->ep_emul_arg != NULL);
 2941                 (*epp->ep_emul_arg_free)(epp->ep_emul_arg);
 2942                 epp->ep_emul_arg_free = NULL;
 2943                 epp->ep_emul_arg = NULL;
 2944         } else {
 2945                 KASSERT(epp->ep_emul_arg == NULL);
 2946         }
 2947 }
 2948 
 2949 #ifdef DEBUG_EXEC
 2950 static void
 2951 dump_vmcmds(const struct exec_package * const epp, size_t x, int error)
 2952 {
 2953         struct exec_vmcmd *vp = &epp->ep_vmcmds.evs_cmds[0];
 2954         size_t j;
 2955 
 2956         if (error == 0)
 2957                 DPRINTF(("vmcmds %u\n", epp->ep_vmcmds.evs_used));
 2958         else
 2959                 DPRINTF(("vmcmds %zu/%u, error %d\n", x, 
 2960                     epp->ep_vmcmds.evs_used, error));
 2961 
 2962         for (j = 0; j < epp->ep_vmcmds.evs_used; j++) {
 2963                 DPRINTF(("vmcmd[%zu] = vmcmd_map_%s %#"
 2964                     PRIxVADDR"/%#"PRIxVSIZE" fd@%#"
 2965                     PRIxVSIZE" prot=0%o flags=%d\n", j,
 2966                     vp[j].ev_proc == vmcmd_map_pagedvn ?
 2967                     "pagedvn" :
 2968                     vp[j].ev_proc == vmcmd_map_readvn ?
 2969                     "readvn" :
 2970                     vp[j].ev_proc == vmcmd_map_zero ?
 2971                     "zero" : "*unknown*",
 2972                     vp[j].ev_addr, vp[j].ev_len,
 2973                     vp[j].ev_offset, vp[j].ev_prot,
 2974                     vp[j].ev_flags));
 2975                 if (error != 0 && j == x)
 2976                         DPRINTF(("     ^--- failed\n"));
 2977         }
 2978 }
 2979 #endif
Cache object: 2c132a5e7166b86b685ca05bcc52697e
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/kern_exec.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_exec.c