kern_exec.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: kern_exec.c,v 1.232.2.4 2011/03/20 20:51:52 bouyer Exp $       */
    2 
    3 /*-
    4  * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
    5  * Copyright (C) 1992 Wolfgang Solfrank.
    6  * Copyright (C) 1992 TooLs GmbH.
    7  * All rights reserved.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *      This product includes software developed by TooLs GmbH.
   20  * 4. The name of TooLs GmbH may not be used to endorse or promote products
   21  *    derived from this software without specific prior written permission.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
   24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   26  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   28  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
   29  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
   30  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
   31  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
   32  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.232.2.4 2011/03/20 20:51:52 bouyer Exp $");
   37 
   38 #include "opt_ktrace.h"
   39 #include "opt_syscall_debug.h"
   40 #include "opt_compat_netbsd.h"
   41 #include "veriexec.h"
   42 #include "opt_pax.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/filedesc.h>
   47 #include <sys/kernel.h>
   48 #include <sys/proc.h>
   49 #include <sys/mount.h>
   50 #include <sys/malloc.h>
   51 #include <sys/namei.h>
   52 #include <sys/vnode.h>
   53 #include <sys/file.h>
   54 #include <sys/acct.h>
   55 #include <sys/exec.h>
   56 #include <sys/ktrace.h>
   57 #include <sys/resourcevar.h>
   58 #include <sys/wait.h>
   59 #include <sys/mman.h>
   60 #include <sys/ras.h>
   61 #include <sys/signalvar.h>
   62 #include <sys/stat.h>
   63 #include <sys/syscall.h>
   64 #include <sys/kauth.h>
   65 
   66 #include <sys/sa.h>
   67 #include <sys/savar.h>
   68 #include <sys/syscallargs.h>
   69 #if NVERIEXEC > 0
   70 #include <sys/verified_exec.h>
   71 #endif /* NVERIEXEC > 0 */
   72 
   73 #ifdef SYSTRACE
   74 #include <sys/systrace.h>
   75 #endif /* SYSTRACE */
   76 
   77 #ifdef PAX_SEGVGUARD
   78 #include <sys/pax.h>
   79 #endif /* PAX_SEGVGUARD */
   80 
   81 #include <uvm/uvm_extern.h>
   82 
   83 #include <machine/cpu.h>
   84 #include <machine/reg.h>
   85 
   86 static int exec_sigcode_map(struct proc *, const struct emul *);
   87 
   88 #ifdef DEBUG_EXEC
   89 #define DPRINTF(a) uprintf a
   90 #else
   91 #define DPRINTF(a)
   92 #endif /* DEBUG_EXEC */
   93 
   94 MALLOC_DEFINE(M_EXEC, "exec", "argument lists & other mem used by exec");
   95 
   96 /*
   97  * Exec function switch:
   98  *
   99  * Note that each makecmds function is responsible for loading the
  100  * exec package with the necessary functions for any exec-type-specific
  101  * handling.
  102  *
  103  * Functions for specific exec types should be defined in their own
  104  * header file.
  105  */
  106 extern const struct execsw      execsw_builtin[];
  107 extern int                      nexecs_builtin;
  108 static const struct execsw      **execsw = NULL;
  109 static int                      nexecs;
  110 
  111 u_int   exec_maxhdrsz;          /* must not be static - netbsd32 needs it */
  112 
  113 #ifdef LKM
  114 /* list of supported emulations */
  115 static
  116 LIST_HEAD(emlist_head, emul_entry) el_head = LIST_HEAD_INITIALIZER(el_head);
  117 struct emul_entry {
  118         LIST_ENTRY(emul_entry)  el_list;
  119         const struct emul       *el_emul;
  120         int                     ro_entry;
  121 };
  122 
  123 /* list of dynamically loaded execsw entries */
  124 static
  125 LIST_HEAD(execlist_head, exec_entry) ex_head = LIST_HEAD_INITIALIZER(ex_head);
  126 struct exec_entry {
  127         LIST_ENTRY(exec_entry)  ex_list;
  128         const struct execsw     *es;
  129 };
  130 
  131 /* structure used for building execw[] */
  132 struct execsw_entry {
  133         struct execsw_entry     *next;
  134         const struct execsw     *es;
  135 };
  136 #endif /* LKM */
  137 
  138 #ifdef SYSCALL_DEBUG
  139 extern const char * const syscallnames[];
  140 #endif
  141 
  142 #ifdef COMPAT_16
  143 extern char     sigcode[], esigcode[];
  144 struct uvm_object *emul_netbsd_object;
  145 #endif
  146 
  147 #ifndef __HAVE_SYSCALL_INTERN
  148 void    syscall(void);
  149 #endif
  150 
  151 static const struct sa_emul saemul_netbsd = {
  152         sizeof(ucontext_t),
  153         sizeof(struct sa_t),
  154         sizeof(struct sa_t *),
  155         NULL,
  156         NULL,
  157         cpu_upcall,
  158         (void (*)(struct lwp *, void *))getucontext,
  159         sa_ucsp
  160 };
  161 
  162 /* NetBSD emul struct */
  163 const struct emul emul_netbsd = {
  164         "netbsd",
  165         NULL,           /* emulation path */
  166 #ifndef __HAVE_MINIMAL_EMUL
  167         EMUL_HAS_SYS___syscall,
  168         NULL,
  169         SYS_syscall,
  170         SYS_NSYSENT,
  171 #endif
  172         sysent,
  173 #ifdef SYSCALL_DEBUG
  174         syscallnames,
  175 #else
  176         NULL,
  177 #endif
  178         sendsig,
  179         trapsignal,
  180         NULL,
  181 #ifdef COMPAT_16
  182         sigcode,
  183         esigcode,
  184         &emul_netbsd_object,
  185 #else
  186         NULL,
  187         NULL,
  188         NULL,
  189 #endif
  190         setregs,
  191         NULL,
  192         NULL,
  193         NULL,
  194         NULL,
  195         NULL,
  196 #ifdef __HAVE_SYSCALL_INTERN
  197         syscall_intern,
  198 #else
  199         syscall,
  200 #endif
  201         NULL,
  202         NULL,
  203 
  204         uvm_default_mapaddr,
  205         NULL,
  206         &saemul_netbsd,
  207 };
  208 
  209 #ifdef LKM
  210 /*
  211  * Exec lock. Used to control access to execsw[] structures.
  212  * This must not be static so that netbsd32 can access it, too.
  213  */
  214 struct lock exec_lock;
  215 
  216 static void link_es(struct execsw_entry **, const struct execsw *);
  217 #endif /* LKM */
  218 
  219 /*
  220  * check exec:
  221  * given an "executable" described in the exec package's namei info,
  222  * see what we can do with it.
  223  *
  224  * ON ENTRY:
  225  *      exec package with appropriate namei info
  226  *      lwp pointer of exec'ing lwp
  227  *      NO SELF-LOCKED VNODES
  228  *
  229  * ON EXIT:
  230  *      error:  nothing held, etc.  exec header still allocated.
  231  *      ok:     filled exec package, executable's vnode (unlocked).
  232  *
  233  * EXEC SWITCH ENTRY:
  234  *      Locked vnode to check, exec package, proc.
  235  *
  236  * EXEC SWITCH EXIT:
  237  *      ok:     return 0, filled exec package, executable's vnode (unlocked).
  238  *      error:  destructive:
  239  *                      everything deallocated execept exec header.
  240  *              non-destructive:
  241  *                      error code, executable's vnode (unlocked),
  242  *                      exec header unmodified.
  243  */
  244 int
  245 /*ARGSUSED*/
  246 check_exec(struct lwp *l, struct exec_package *epp)
  247 {
  248         int             error, i;
  249         struct vnode    *vp;
  250         struct nameidata *ndp;
  251         size_t          resid;
  252 
  253         ndp = epp->ep_ndp;
  254         ndp->ni_cnd.cn_nameiop = LOOKUP;
  255         ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF | SAVENAME;
  256         /* first get the vnode */
  257         if ((error = namei(ndp)) != 0)
  258                 return error;
  259         epp->ep_vp = vp = ndp->ni_vp;
  260 
  261         /* check access and type */
  262         if (vp->v_type != VREG) {
  263                 error = EACCES;
  264                 goto bad1;
  265         }
  266         if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred, l)) != 0)
  267                 goto bad1;
  268 
  269         /* get attributes */
  270         if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred, l)) != 0)
  271                 goto bad1;
  272 
  273         /* Check mount point */
  274         if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
  275                 error = EACCES;
  276                 goto bad1;
  277         }
  278         if (vp->v_mount->mnt_flag & MNT_NOSUID)
  279                 epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
  280 
  281         /* try to open it */
  282         if ((error = VOP_OPEN(vp, FREAD, l->l_cred, l)) != 0)
  283                 goto bad1;
  284 
  285         /* unlock vp, since we need it unlocked from here on out. */
  286         VOP_UNLOCK(vp, 0);
  287 
  288 #if NVERIEXEC > 0
  289         if ((error = veriexec_verify(l, vp, ndp->ni_cnd.cn_pnbuf,
  290             epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
  291             NULL)) != 0)
  292                 goto bad2;
  293 #endif /* NVERIEXEC > 0 */
  294 
  295 #ifdef PAX_SEGVGUARD
  296         error = pax_segvguard(l, vp, ndp->ni_cnd.cn_pnbuf, FALSE);
  297         if (error)
  298                 goto bad2;
  299 #endif /* PAX_SEGVGUARD */
  300 
  301         /* now we have the file, get the exec header */
  302         uvn_attach(vp, VM_PROT_READ);
  303         error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
  304                         UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
  305         if (error)
  306                 goto bad2;
  307         epp->ep_hdrvalid = epp->ep_hdrlen - resid;
  308 
  309         /*
  310          * Set up default address space limits.  Can be overridden
  311          * by individual exec packages.
  312          *
  313          * XXX probably should be all done in the exec pakages.
  314          */
  315         epp->ep_vm_minaddr = VM_MIN_ADDRESS;
  316         epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
  317         /*
  318          * set up the vmcmds for creation of the process
  319          * address space
  320          */
  321         error = ENOEXEC;
  322         for (i = 0; i < nexecs && error != 0; i++) {
  323                 int newerror;
  324 
  325                 epp->ep_esch = execsw[i];
  326                 newerror = (*execsw[i]->es_makecmds)(l, epp);
  327                 /* make sure the first "interesting" error code is saved. */
  328                 if (!newerror || error == ENOEXEC)
  329                         error = newerror;
  330 
  331                 /* if es_makecmds call was successful, update epp->ep_es */
  332                 if (!newerror && (epp->ep_flags & EXEC_HASES) == 0)
  333                         epp->ep_es = execsw[i];
  334 
  335                 if (epp->ep_flags & EXEC_DESTR && error != 0)
  336                         return error;
  337         }
  338         if (!error) {
  339                 /* check that entry point is sane */
  340                 if (epp->ep_entry > VM_MAXUSER_ADDRESS)
  341                         error = ENOEXEC;
  342 
  343                 /* check limits */
  344                 if ((epp->ep_tsize > MAXTSIZ) ||
  345                     (epp->ep_dsize >
  346                      (u_quad_t)l->l_proc->p_rlimit[RLIMIT_DATA].rlim_cur))
  347                         error = ENOMEM;
  348 
  349                 if (!error)
  350                         return (0);
  351         }
  352 
  353         /*
  354          * free any vmspace-creation commands,
  355          * and release their references
  356          */
  357         kill_vmcmds(&epp->ep_vmcmds);
  358 
  359 bad2:
  360         /*
  361          * close and release the vnode, restore the old one, free the
  362          * pathname buf, and punt.
  363          */
  364         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  365         VOP_CLOSE(vp, FREAD, l->l_cred, l);
  366         vput(vp);
  367         PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
  368         return error;
  369 
  370 bad1:
  371         /*
  372          * free the namei pathname buffer, and put the vnode
  373          * (which we don't yet have open).
  374          */
  375         vput(vp);                               /* was still locked */
  376         PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
  377         return error;
  378 }
  379 
  380 #ifdef __MACHINE_STACK_GROWS_UP
  381 #define STACK_PTHREADSPACE NBPG
  382 #else
  383 #define STACK_PTHREADSPACE 0
  384 #endif
  385 
  386 static int
  387 execve_fetch_element(char * const *array, size_t index, char **value)
  388 {
  389         return copyin(array + index, value, sizeof(*value));
  390 }
  391 
  392 /*
  393  * exec system call
  394  */
  395 /* ARGSUSED */
  396 int
  397 sys_execve(struct lwp *l, void *v, register_t *retval)
  398 {
  399         struct sys_execve_args /* {
  400                 syscallarg(const char *)        path;
  401                 syscallarg(char * const *)      argp;
  402                 syscallarg(char * const *)      envp;
  403         } */ *uap = v;
  404 
  405         return execve1(l, SCARG(uap, path), SCARG(uap, argp),
  406             SCARG(uap, envp), execve_fetch_element);
  407 }
  408 
  409 int
  410 execve1(struct lwp *l, const char *path, char * const *args,
  411     char * const *envs, execve_fetch_element_t fetch_element)
  412 {
  413         int                     error;
  414         u_int                   i;
  415         struct exec_package     pack;
  416         struct nameidata        nid;
  417         struct vattr            attr;
  418         struct proc             *p;
  419         char                    *argp;
  420         char                    *dp, *sp;
  421         long                    argc, envc;
  422         size_t                  len;
  423         char                    *stack;
  424         struct ps_strings       arginfo;
  425         struct ps_strings       *aip = &arginfo;
  426         struct vmspace          *vm;
  427         char                    **tmpfap;
  428         int                     szsigcode;
  429         struct exec_vmcmd       *base_vcp;
  430         int                     oldlwpflags;
  431         uid_t                   uid;
  432 #ifdef SYSTRACE
  433         int                     wassugid = ISSET(p->p_flag, P_SUGID);
  434         char                    pathbuf[MAXPATHLEN];
  435         size_t                  pathbuflen;
  436 #endif /* SYSTRACE */
  437 
  438         p = l->l_proc;
  439 
  440         /*
  441          * Check if we have exceeded our number of processes limit.
  442          * This is so that we handle the case where a root daemon
  443          * forked, ran setuid to become the desired user and is trying
  444          * to exec. The obvious place to do the reference counting check
  445          * is setuid(), but we don't do the reference counting check there
  446          * like other OS's do because then all the programs that use setuid()
  447          * must be modified to check the return code of setuid() and exit().
  448          * It is dangerous to make setuid() fail, because it fails open and
  449          * the program will continue to run as root. If we make it succeed
  450          * and return an error code, again we are not enforcing the limit.
  451          * The best place to enforce the limit is here, when the process tries
  452          * to execute a new image, because eventually the process will need
  453          * to call exec in order to do something useful.
  454          */
  455 
  456         if ((p->p_flag & P_SUGID) && (uid = kauth_cred_getuid(l->l_cred)) != 0
  457             && chgproccnt(uid, 0) > p->p_rlimit[RLIMIT_NPROC].rlim_cur)
  458                 return EAGAIN;
  459 
  460         /* Disable scheduler activation upcalls. */
  461         oldlwpflags = l->l_flag & (L_SA | L_SA_UPCALL);
  462         if (l->l_flag & L_SA)
  463                 l->l_flag &= ~(L_SA | L_SA_UPCALL);
  464 
  465         /*
  466          * Lock the process and set the P_INEXEC flag to indicate that
  467          * it should be left alone until we're done here.  This is
  468          * necessary to avoid race conditions - e.g. in ptrace() -
  469          * that might allow a local user to illicitly obtain elevated
  470          * privileges.
  471          */
  472         p->p_flag |= P_INEXEC;
  473 
  474         base_vcp = NULL;
  475         /*
  476          * Init the namei data to point the file user's program name.
  477          * This is done here rather than in check_exec(), so that it's
  478          * possible to override this settings if any of makecmd/probe
  479          * functions call check_exec() recursively - for example,
  480          * see exec_script_makecmds().
  481          */
  482 #ifdef SYSTRACE
  483         if (ISSET(p->p_flag, P_SYSTRACE))
  484                 systrace_execve0(p);
  485 
  486         error = copyinstr(path, pathbuf, sizeof(pathbuf),
  487                           &pathbuflen);
  488         if (error)
  489                 goto clrflg;
  490 
  491         NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_SYSSPACE, pathbuf, l);
  492 #else
  493         NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_USERSPACE, path, l);
  494 #endif /* SYSTRACE */
  495 
  496         /*
  497          * initialize the fields of the exec package.
  498          */
  499 #ifdef SYSTRACE
  500         pack.ep_name = pathbuf;
  501 #else
  502         pack.ep_name = path;
  503 #endif /* SYSTRACE */
  504         pack.ep_hdr = malloc(exec_maxhdrsz, M_EXEC, M_WAITOK);
  505         pack.ep_hdrlen = exec_maxhdrsz;
  506         pack.ep_hdrvalid = 0;
  507         pack.ep_ndp = &nid;
  508         pack.ep_emul_arg = NULL;
  509         pack.ep_vmcmds.evs_cnt = 0;
  510         pack.ep_vmcmds.evs_used = 0;
  511         pack.ep_vap = &attr;
  512         pack.ep_flags = 0;
  513 
  514 #ifdef LKM
  515         lockmgr(&exec_lock, LK_SHARED, NULL);
  516 #endif
  517 
  518         /* see if we can run it. */
  519         if ((error = check_exec(l, &pack)) != 0)
  520                 goto freehdr;
  521 
  522         /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
  523 
  524         /* allocate an argument buffer */
  525         argp = (char *) uvm_km_alloc(exec_map, NCARGS, 0,
  526             UVM_KMF_PAGEABLE|UVM_KMF_WAITVA);
  527 #ifdef DIAGNOSTIC
  528         if (argp == NULL)
  529                 panic("execve: argp == NULL");
  530 #endif
  531         dp = argp;
  532         argc = 0;
  533 
  534         /* copy the fake args list, if there's one, freeing it as we go */
  535         if (pack.ep_flags & EXEC_HASARGL) {
  536                 tmpfap = pack.ep_fa;
  537                 while (*tmpfap != NULL) {
  538                         char *cp;
  539 
  540                         cp = *tmpfap;
  541                         while (*cp)
  542                                 *dp++ = *cp++;
  543                         *dp++ = 0;
  544 #ifdef KTRACE
  545                         if (KTRPOINT(p, KTR_EXEC_ARG))
  546                                 ktrkmem(l, KTR_EXEC_ARG, *tmpfap, cp - *tmpfap);
  547 #endif
  548 
  549                         FREE(*tmpfap, M_EXEC);
  550                         tmpfap++; argc++;
  551                 }
  552                 FREE(pack.ep_fa, M_EXEC);
  553                 pack.ep_flags &= ~EXEC_HASARGL;
  554         }
  555 
  556         /* Now get argv & environment */
  557         if (args == NULL) {
  558                 error = EINVAL;
  559                 goto bad;
  560         }
  561         /* 'i' will index the argp/envp element to be retrieved */
  562         i = 0;
  563         if (pack.ep_flags & EXEC_SKIPARG)
  564                 i++;
  565 
  566         while (1) {
  567                 len = argp + ARG_MAX - dp;
  568                 if ((error = (*fetch_element)(args, i, &sp)) != 0)
  569                         goto bad;
  570                 if (!sp)
  571                         break;
  572                 if ((error = copyinstr(sp, dp, len, &len)) != 0) {
  573                         if (error == ENAMETOOLONG)
  574                                 error = E2BIG;
  575                         goto bad;
  576                 }
  577 #ifdef KTRACE
  578                 if (KTRPOINT(p, KTR_EXEC_ARG))
  579                         ktrkmem(l, KTR_EXEC_ARG, dp, len - 1);
  580 #endif
  581                 dp += len;
  582                 i++;
  583                 argc++;
  584         }
  585 
  586         envc = 0;
  587         /* environment need not be there */
  588         if (envs != NULL) {
  589                 i = 0;
  590                 while (1) {
  591                         len = argp + ARG_MAX - dp;
  592                         if ((error = (*fetch_element)(envs, i, &sp)) != 0)
  593                                 goto bad;
  594                         if (!sp)
  595                                 break;
  596                         if ((error = copyinstr(sp, dp, len, &len)) != 0) {
  597                                 if (error == ENAMETOOLONG)
  598                                         error = E2BIG;
  599                                 goto bad;
  600                         }
  601 #ifdef KTRACE
  602                         if (KTRPOINT(p, KTR_EXEC_ENV))
  603                                 ktrkmem(l, KTR_EXEC_ENV, dp, len - 1);
  604 #endif
  605                         dp += len;
  606                         i++;
  607                         envc++;
  608                 }
  609         }
  610 
  611         dp = (char *) ALIGN(dp);
  612 
  613         szsigcode = pack.ep_es->es_emul->e_esigcode -
  614             pack.ep_es->es_emul->e_sigcode;
  615 
  616         /* Now check if args & environ fit into new stack */
  617         if (pack.ep_flags & EXEC_32)
  618                 len = ((argc + envc + 2 + pack.ep_es->es_arglen) *
  619                     sizeof(int) + sizeof(int) + dp + STACKGAPLEN +
  620                     szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
  621                     - argp;
  622         else
  623                 len = ((argc + envc + 2 + pack.ep_es->es_arglen) *
  624                     sizeof(char *) + sizeof(int) + dp + STACKGAPLEN +
  625                     szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
  626                     - argp;
  627 
  628         len = ALIGN(len);       /* make the stack "safely" aligned */
  629 
  630         if (len > pack.ep_ssize) { /* in effect, compare to initial limit */
  631                 error = ENOMEM;
  632                 goto bad;
  633         }
  634 
  635         /* Get rid of other LWPs/ */
  636         p->p_flag |= P_WEXIT; /* XXX hack. lwp-exit stuff wants to see it. */
  637         exit_lwps(l);
  638         p->p_flag &= ~P_WEXIT;
  639         KDASSERT(p->p_nlwps == 1);
  640 
  641         /* This is now LWP 1 */
  642         l->l_lid = 1;
  643         p->p_nlwpid = 1;
  644 
  645         /* Release any SA state. */
  646         if (p->p_sa)
  647                 sa_release(p);
  648 
  649         /* Remove POSIX timers */
  650         timers_free(p, TIMERS_POSIX);
  651 
  652         /* adjust "active stack depth" for process VSZ */
  653         pack.ep_ssize = len;    /* maybe should go elsewhere, but... */
  654 
  655         /*
  656          * Do whatever is necessary to prepare the address space
  657          * for remapping.  Note that this might replace the current
  658          * vmspace with another!
  659          */
  660         uvmspace_exec(l, pack.ep_vm_minaddr, pack.ep_vm_maxaddr);
  661 
  662         /* record proc's vnode, for use by procfs and others */
  663         if (p->p_textvp)
  664                 vrele(p->p_textvp);
  665         VREF(pack.ep_vp);
  666         p->p_textvp = pack.ep_vp;
  667 
  668         /* Now map address space */
  669         vm = p->p_vmspace;
  670         vm->vm_taddr = (caddr_t) pack.ep_taddr;
  671         vm->vm_tsize = btoc(pack.ep_tsize);
  672         vm->vm_daddr = (caddr_t) pack.ep_daddr;
  673         vm->vm_dsize = btoc(pack.ep_dsize);
  674         vm->vm_ssize = btoc(pack.ep_ssize);
  675         vm->vm_maxsaddr = (caddr_t) pack.ep_maxsaddr;
  676         vm->vm_minsaddr = (caddr_t) pack.ep_minsaddr;
  677 
  678         /* create the new process's VM space by running the vmcmds */
  679 #ifdef DIAGNOSTIC
  680         if (pack.ep_vmcmds.evs_used == 0)
  681                 panic("execve: no vmcmds");
  682 #endif
  683         for (i = 0; i < pack.ep_vmcmds.evs_used && !error; i++) {
  684                 struct exec_vmcmd *vcp;
  685 
  686                 vcp = &pack.ep_vmcmds.evs_cmds[i];
  687                 if (vcp->ev_flags & VMCMD_RELATIVE) {
  688 #ifdef DIAGNOSTIC
  689                         if (base_vcp == NULL)
  690                                 panic("execve: relative vmcmd with no base");
  691                         if (vcp->ev_flags & VMCMD_BASE)
  692                                 panic("execve: illegal base & relative vmcmd");
  693 #endif
  694                         vcp->ev_addr += base_vcp->ev_addr;
  695                 }
  696                 error = (*vcp->ev_proc)(l, vcp);
  697 #ifdef DEBUG_EXEC
  698                 if (error) {
  699                         int j;
  700                         struct exec_vmcmd *vp = &pack.ep_vmcmds.evs_cmds[0];
  701                         for (j = 0; j <= i; j++)
  702                                 uprintf(
  703                             "vmcmd[%d] = %#lx/%#lx fd@%#lx prot=0%o flags=%d\n",
  704                                     j, vp[j].ev_addr, vp[j].ev_len,
  705                                     vp[j].ev_offset, vp[j].ev_prot,
  706                                     vp[j].ev_flags);
  707                 }
  708 #endif /* DEBUG_EXEC */
  709                 if (vcp->ev_flags & VMCMD_BASE)
  710                         base_vcp = vcp;
  711         }
  712 
  713         /* free the vmspace-creation commands, and release their references */
  714         kill_vmcmds(&pack.ep_vmcmds);
  715 
  716         vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
  717         VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred, l);
  718         vput(pack.ep_vp);
  719 
  720         /* if an error happened, deallocate and punt */
  721         if (error) {
  722                 DPRINTF(("execve: vmcmd %i failed: %d\n", i - 1, error));
  723                 goto exec_abort;
  724         }
  725 
  726         /* remember information about the process */
  727         arginfo.ps_nargvstr = argc;
  728         arginfo.ps_nenvstr = envc;
  729 
  730         stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
  731                 STACK_PTHREADSPACE + sizeof(struct ps_strings) + szsigcode),
  732                 len - (sizeof(struct ps_strings) + szsigcode));
  733 #ifdef __MACHINE_STACK_GROWS_UP
  734         /*
  735          * The copyargs call always copies into lower addresses
  736          * first, moving towards higher addresses, starting with
  737          * the stack pointer that we give.  When the stack grows
  738          * down, this puts argc/argv/envp very shallow on the
  739          * stack, right at the first user stack pointer, and puts
  740          * STACKGAPLEN very deep in the stack.  When the stack
  741          * grows up, the situation is reversed.
  742          *
  743          * Normally, this is no big deal.  But the ld_elf.so _rtld()
  744          * function expects to be called with a single pointer to
  745          * a region that has a few words it can stash values into,
  746          * followed by argc/argv/envp.  When the stack grows down,
  747          * it's easy to decrement the stack pointer a little bit to
  748          * allocate the space for these few words and pass the new
  749          * stack pointer to _rtld.  When the stack grows up, however,
  750          * a few words before argc is part of the signal trampoline, XXX
  751          * so we have a problem.
  752          *
  753          * Instead of changing how _rtld works, we take the easy way
  754          * out and steal 32 bytes before we call copyargs.  This
  755          * space is effectively stolen from STACKGAPLEN.
  756          */
  757         stack += 32;
  758 #endif /* __MACHINE_STACK_GROWS_UP */
  759 
  760         /* Now copy argc, args & environ to new stack */
  761         error = (*pack.ep_es->es_copyargs)(l, &pack, &arginfo, &stack, argp);
  762         if (error) {
  763                 DPRINTF(("execve: copyargs failed %d\n", error));
  764                 goto exec_abort;
  765         }
  766         /* Move the stack back to original point */
  767         stack = (char *)STACK_GROW(vm->vm_minsaddr, len);
  768 
  769         /* fill process ps_strings info */
  770         p->p_psstr = (struct ps_strings *)
  771             STACK_ALLOC(STACK_GROW(vm->vm_minsaddr, STACK_PTHREADSPACE),
  772             sizeof(struct ps_strings));
  773         p->p_psargv = offsetof(struct ps_strings, ps_argvstr);
  774         p->p_psnargv = offsetof(struct ps_strings, ps_nargvstr);
  775         p->p_psenv = offsetof(struct ps_strings, ps_envstr);
  776         p->p_psnenv = offsetof(struct ps_strings, ps_nenvstr);
  777 
  778         /* copy out the process's ps_strings structure */
  779         if ((error = copyout(aip, (char *)p->p_psstr,
  780             sizeof(arginfo))) != 0) {
  781                 DPRINTF(("execve: ps_strings copyout %p->%p size %ld failed\n",
  782                        aip, (char *)p->p_psstr, (long)sizeof(arginfo)));
  783                 goto exec_abort;
  784         }
  785 
  786         stopprofclock(p);       /* stop profiling */
  787         fdcloseexec(l);         /* handle close on exec */
  788         execsigs(p);            /* reset catched signals */
  789 
  790         l->l_ctxlink = NULL;    /* reset ucontext link */
  791 
  792         /* set command name & other accounting info */
  793         len = min(nid.ni_cnd.cn_namelen, MAXCOMLEN);
  794         memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, len);
  795         p->p_comm[len] = 0;
  796         p->p_acflag &= ~AFORK;
  797 
  798         p->p_flag |= P_EXEC;
  799         if (p->p_flag & P_PPWAIT) {
  800                 p->p_flag &= ~P_PPWAIT;
  801                 wakeup((caddr_t) p->p_pptr);
  802         }
  803 
  804         /*
  805          * deal with set[ug]id.
  806          * MNT_NOSUID has already been used to disable s[ug]id.
  807          */
  808         if ((p->p_flag & P_TRACED) == 0 &&
  809 
  810             (((attr.va_mode & S_ISUID) != 0 &&
  811               kauth_cred_geteuid(l->l_cred) != attr.va_uid) ||
  812 
  813              ((attr.va_mode & S_ISGID) != 0 &&
  814               kauth_cred_getegid(l->l_cred) != attr.va_gid))) {
  815                 /*
  816                  * Mark the process as SUGID before we do
  817                  * anything that might block.
  818                  */
  819                 p_sugid(p);
  820 
  821                 /* Make sure file descriptors 0..2 are in use. */
  822                 if ((error = fdcheckstd(l)) != 0) {
  823                         DPRINTF(("execve: fdcheckstd failed %d\n", error));
  824                         goto exec_abort;
  825                 }
  826 
  827                 /*
  828                  * Copy the credential so other references don't see our
  829                  * changes.
  830                  */
  831                 l->l_cred = kauth_cred_copy(l->l_cred);
  832 #ifdef KTRACE
  833                 /*
  834                  * If process is being ktraced, turn off - unless
  835                  * root set it.
  836                  */
  837                 if (p->p_tracep && !(p->p_traceflag & KTRFAC_ROOT))
  838                         ktrderef(p);
  839 #endif
  840                 if (attr.va_mode & S_ISUID)
  841                         kauth_cred_seteuid(l->l_cred, attr.va_uid);
  842                 if (attr.va_mode & S_ISGID)
  843                         kauth_cred_setegid(l->l_cred, attr.va_gid);
  844         } else {
  845                 if (kauth_cred_geteuid(l->l_cred) ==
  846                     kauth_cred_getuid(l->l_cred) &&
  847                     kauth_cred_getegid(l->l_cred) ==
  848                     kauth_cred_getgid(l->l_cred))
  849                         p->p_flag &= ~P_SUGID;
  850         }
  851 
  852         /*
  853          * Copy the credential so other references don't see our changes.
  854          * Test to see if this is necessary first, since in the common case
  855          * we won't need a private reference.
  856          */
  857         if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
  858             kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
  859                 l->l_cred = kauth_cred_copy(l->l_cred);
  860                 kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
  861                 kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
  862         }
  863 
  864         /* Update the master credentials. */
  865         if (l->l_cred != p->p_cred) {
  866                 kauth_cred_t ocred;
  867 
  868                 kauth_cred_hold(l->l_cred);
  869                 simple_lock(&p->p_lock);
  870                 ocred = p->p_cred;
  871                 p->p_cred = l->l_cred;
  872                 simple_unlock(&p->p_lock);
  873                 kauth_cred_free(ocred);
  874         }
  875 
  876 #if defined(__HAVE_RAS)
  877         /*
  878          * Remove all RASs from the address space.
  879          */
  880         ras_purgeall(p);
  881 #endif
  882 
  883         doexechooks(p);
  884 
  885         uvm_km_free(exec_map, (vaddr_t) argp, NCARGS, UVM_KMF_PAGEABLE);
  886 
  887         PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
  888 
  889         /* notify others that we exec'd */
  890         KNOTE(&p->p_klist, NOTE_EXEC);
  891 
  892         /* setup new registers and do misc. setup. */
  893         (*pack.ep_es->es_emul->e_setregs)(l, &pack, (u_long) stack);
  894         if (pack.ep_es->es_setregs)
  895                 (*pack.ep_es->es_setregs)(l, &pack, (u_long) stack);
  896 
  897         /* map the process's signal trampoline code */
  898         if (exec_sigcode_map(p, pack.ep_es->es_emul)) {
  899                 DPRINTF(("execve: map sigcode failed %d\n", error));
  900                 goto exec_abort;
  901         }
  902 
  903         if ((p->p_flag & (P_TRACED|P_SYSCALL)) == P_TRACED)
  904                 psignal(p, SIGTRAP);
  905 
  906         free(pack.ep_hdr, M_EXEC);
  907 
  908         /*
  909          * Call emulation specific exec hook. This can setup per-process
  910          * p->p_emuldata or do any other per-process stuff an emulation needs.
  911          *
  912          * If we are executing process of different emulation than the
  913          * original forked process, call e_proc_exit() of the old emulation
  914          * first, then e_proc_exec() of new emulation. If the emulation is
  915          * same, the exec hook code should deallocate any old emulation
  916          * resources held previously by this process.
  917          */
  918         if (p->p_emul && p->p_emul->e_proc_exit
  919             && p->p_emul != pack.ep_es->es_emul)
  920                 (*p->p_emul->e_proc_exit)(p);
  921 
  922         /*
  923          * Call exec hook. Emulation code may NOT store reference to anything
  924          * from &pack.
  925          */
  926         if (pack.ep_es->es_emul->e_proc_exec)
  927                 (*pack.ep_es->es_emul->e_proc_exec)(p, &pack);
  928 
  929         /* update p_emul, the old value is no longer needed */
  930         p->p_emul = pack.ep_es->es_emul;
  931 
  932         /* ...and the same for p_execsw */
  933         p->p_execsw = pack.ep_es;
  934 
  935 #ifdef __HAVE_SYSCALL_INTERN
  936         (*p->p_emul->e_syscall_intern)(p);
  937 #endif
  938 #ifdef KTRACE
  939         if (KTRPOINT(p, KTR_EMUL))
  940                 ktremul(l);
  941 #endif
  942 
  943 #ifdef LKM
  944         lockmgr(&exec_lock, LK_RELEASE, NULL);
  945 #endif
  946         p->p_flag &= ~P_INEXEC;
  947 
  948         if (p->p_flag & P_STOPEXEC) {
  949                 int s;
  950 
  951                 sigminusset(&contsigmask, &p->p_sigctx.ps_siglist);
  952                 SCHED_LOCK(s);
  953                 p->p_pptr->p_nstopchild++;
  954                 p->p_stat = SSTOP;
  955                 l->l_stat = LSSTOP;
  956                 p->p_nrlwps--;
  957                 mi_switch(l, NULL);
  958                 SCHED_ASSERT_UNLOCKED();
  959                 splx(s);
  960         }
  961 
  962 #ifdef SYSTRACE
  963         if (ISSET(p->p_flag, P_SYSTRACE) &&
  964             wassugid && !ISSET(p->p_flag, P_SUGID))
  965                 systrace_execve1(pathbuf, p);
  966 #endif /* SYSTRACE */
  967 
  968         return (EJUSTRETURN);
  969 
  970  bad:
  971         p->p_flag &= ~P_INEXEC;
  972         /* free the vmspace-creation commands, and release their references */
  973         kill_vmcmds(&pack.ep_vmcmds);
  974         /* kill any opened file descriptor, if necessary */
  975         if (pack.ep_flags & EXEC_HASFD) {
  976                 pack.ep_flags &= ~EXEC_HASFD;
  977                 (void) fdrelease(l, pack.ep_fd);
  978         }
  979         /* close and put the exec'd file */
  980         vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
  981         VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred, l);
  982         vput(pack.ep_vp);
  983         PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
  984         uvm_km_free(exec_map, (vaddr_t) argp, NCARGS, UVM_KMF_PAGEABLE);
  985 
  986  freehdr:
  987         free(pack.ep_hdr, M_EXEC);
  988 
  989 #ifdef SYSTRACE
  990  clrflg:
  991 #endif /* SYSTRACE */
  992         l->l_flag |= oldlwpflags;
  993         p->p_flag &= ~P_INEXEC;
  994 #ifdef LKM
  995         lockmgr(&exec_lock, LK_RELEASE, NULL);
  996 #endif
  997 
  998         return error;
  999 
 1000  exec_abort:
 1001         p->p_flag &= ~P_INEXEC;
 1002 #ifdef LKM
 1003         lockmgr(&exec_lock, LK_RELEASE, NULL);
 1004 #endif
 1005 
 1006         /*
 1007          * the old process doesn't exist anymore.  exit gracefully.
 1008          * get rid of the (new) address space we have created, if any, get rid
 1009          * of our namei data and vnode, and exit noting failure
 1010          */
 1011         uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
 1012                 VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
 1013         if (pack.ep_emul_arg)
 1014                 FREE(pack.ep_emul_arg, M_TEMP);
 1015         PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
 1016         uvm_km_free(exec_map, (vaddr_t) argp, NCARGS, UVM_KMF_PAGEABLE);
 1017         free(pack.ep_hdr, M_EXEC);
 1018         exit1(l, W_EXITCODE(error, SIGABRT));
 1019 
 1020         /* NOTREACHED */
 1021         return 0;
 1022 }
 1023 
 1024 
 1025 int
 1026 copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
 1027     char **stackp, void *argp)
 1028 {
 1029         char    **cpp, *dp, *sp;
 1030         size_t  len;
 1031         void    *nullp;
 1032         long    argc, envc;
 1033         int     error;
 1034 
 1035         cpp = (char **)*stackp;
 1036         nullp = NULL;
 1037         argc = arginfo->ps_nargvstr;
 1038         envc = arginfo->ps_nenvstr;
 1039         if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0)
 1040                 return error;
 1041 
 1042         dp = (char *) (cpp + argc + envc + 2 + pack->ep_es->es_arglen);
 1043         sp = argp;
 1044 
 1045         /* XXX don't copy them out, remap them! */
 1046         arginfo->ps_argvstr = cpp; /* remember location of argv for later */
 1047 
 1048         for (; --argc >= 0; sp += len, dp += len)
 1049                 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
 1050                     (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
 1051                         return error;
 1052 
 1053         if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
 1054                 return error;
 1055 
 1056         arginfo->ps_envstr = cpp; /* remember location of envp for later */
 1057 
 1058         for (; --envc >= 0; sp += len, dp += len)
 1059                 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
 1060                     (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
 1061                         return error;
 1062 
 1063         if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
 1064                 return error;
 1065 
 1066         *stackp = (char *)cpp;
 1067         return 0;
 1068 }
 1069 
 1070 #ifdef LKM
 1071 /*
 1072  * Find an emulation of given name in list of emulations.
 1073  * Needs to be called with the exec_lock held.
 1074  */
 1075 const struct emul *
 1076 emul_search(const char *name)
 1077 {
 1078         struct emul_entry *it;
 1079 
 1080         LIST_FOREACH(it, &el_head, el_list) {
 1081                 if (strcmp(name, it->el_emul->e_name) == 0)
 1082                         return it->el_emul;
 1083         }
 1084 
 1085         return NULL;
 1086 }
 1087 
 1088 /*
 1089  * Add an emulation to list, if it's not there already.
 1090  */
 1091 int
 1092 emul_register(const struct emul *emul, int ro_entry)
 1093 {
 1094         struct emul_entry       *ee;
 1095         int                     error;
 1096 
 1097         error = 0;
 1098         lockmgr(&exec_lock, LK_SHARED, NULL);
 1099 
 1100         if (emul_search(emul->e_name)) {
 1101                 error = EEXIST;
 1102                 goto out;
 1103         }
 1104 
 1105         MALLOC(ee, struct emul_entry *, sizeof(struct emul_entry),
 1106                 M_EXEC, M_WAITOK);
 1107         ee->el_emul = emul;
 1108         ee->ro_entry = ro_entry;
 1109         LIST_INSERT_HEAD(&el_head, ee, el_list);
 1110 
 1111  out:
 1112         lockmgr(&exec_lock, LK_RELEASE, NULL);
 1113         return error;
 1114 }
 1115 
 1116 /*
 1117  * Remove emulation with name 'name' from list of supported emulations.
 1118  */
 1119 int
 1120 emul_unregister(const char *name)
 1121 {
 1122         const struct proclist_desc *pd;
 1123         struct emul_entry       *it;
 1124         int                     i, error;
 1125         struct proc             *ptmp;
 1126 
 1127         error = 0;
 1128         lockmgr(&exec_lock, LK_SHARED, NULL);
 1129 
 1130         LIST_FOREACH(it, &el_head, el_list) {
 1131                 if (strcmp(it->el_emul->e_name, name) == 0)
 1132                         break;
 1133         }
 1134 
 1135         if (!it) {
 1136                 error = ENOENT;
 1137                 goto out;
 1138         }
 1139 
 1140         if (it->ro_entry) {
 1141                 error = EBUSY;
 1142                 goto out;
 1143         }
 1144 
 1145         /* test if any execw[] entry is still using this */
 1146         for(i=0; i < nexecs; i++) {
 1147                 if (execsw[i]->es_emul == it->el_emul) {
 1148                         error = EBUSY;
 1149                         goto out;
 1150                 }
 1151         }
 1152 
 1153         /*
 1154          * Test if any process is running under this emulation - since
 1155          * emul_unregister() is running quite sendomly, it's better
 1156          * to do expensive check here than to use any locking.
 1157          */
 1158         proclist_lock_read();
 1159         for (pd = proclists; pd->pd_list != NULL && !error; pd++) {
 1160                 PROCLIST_FOREACH(ptmp, pd->pd_list) {
 1161                         if (ptmp->p_emul == it->el_emul) {
 1162                                 error = EBUSY;
 1163                                 break;
 1164                         }
 1165                 }
 1166         }
 1167         proclist_unlock_read();
 1168 
 1169         if (error)
 1170                 goto out;
 1171 
 1172 
 1173         /* entry is not used, remove it */
 1174         LIST_REMOVE(it, el_list);
 1175         FREE(it, M_EXEC);
 1176 
 1177  out:
 1178         lockmgr(&exec_lock, LK_RELEASE, NULL);
 1179         return error;
 1180 }
 1181 
 1182 /*
 1183  * Add execsw[] entry.
 1184  */
 1185 int
 1186 exec_add(struct execsw *esp, const char *e_name)
 1187 {
 1188         struct exec_entry       *it;
 1189         int                     error;
 1190 
 1191         error = 0;
 1192         lockmgr(&exec_lock, LK_EXCLUSIVE, NULL);
 1193 
 1194         if (!esp->es_emul) {
 1195                 esp->es_emul = emul_search(e_name);
 1196                 if (!esp->es_emul) {
 1197                         error = ENOENT;
 1198                         goto out;
 1199                 }
 1200         }
 1201 
 1202         LIST_FOREACH(it, &ex_head, ex_list) {
 1203                 /* assume tuple (makecmds, probe_func, emulation) is unique */
 1204                 if (it->es->es_makecmds == esp->es_makecmds
 1205                     && it->es->u.elf_probe_func == esp->u.elf_probe_func
 1206                     && it->es->es_emul == esp->es_emul) {
 1207                         error = EEXIST;
 1208                         goto out;
 1209                 }
 1210         }
 1211 
 1212         /* if we got here, the entry doesn't exist yet */
 1213         MALLOC(it, struct exec_entry *, sizeof(struct exec_entry),
 1214                 M_EXEC, M_WAITOK);
 1215         it->es = esp;
 1216         LIST_INSERT_HEAD(&ex_head, it, ex_list);
 1217 
 1218         /* update execsw[] */
 1219         exec_init(0);
 1220 
 1221  out:
 1222         lockmgr(&exec_lock, LK_RELEASE, NULL);
 1223         return error;
 1224 }
 1225 
 1226 /*
 1227  * Remove execsw[] entry.
 1228  */
 1229 int
 1230 exec_remove(const struct execsw *esp)
 1231 {
 1232         struct exec_entry       *it;
 1233         int                     error;
 1234 
 1235         error = 0;
 1236         lockmgr(&exec_lock, LK_EXCLUSIVE, NULL);
 1237 
 1238         LIST_FOREACH(it, &ex_head, ex_list) {
 1239                 /* assume tuple (makecmds, probe_func, emulation) is unique */
 1240                 if (it->es->es_makecmds == esp->es_makecmds
 1241                     && it->es->u.elf_probe_func == esp->u.elf_probe_func
 1242                     && it->es->es_emul == esp->es_emul)
 1243                         break;
 1244         }
 1245         if (!it) {
 1246                 error = ENOENT;
 1247                 goto out;
 1248         }
 1249 
 1250         /* remove item from list and free resources */
 1251         LIST_REMOVE(it, ex_list);
 1252         FREE(it, M_EXEC);
 1253 
 1254         /* update execsw[] */
 1255         exec_init(0);
 1256 
 1257  out:
 1258         lockmgr(&exec_lock, LK_RELEASE, NULL);
 1259         return error;
 1260 }
 1261 
 1262 static void
 1263 link_es(struct execsw_entry **listp, const struct execsw *esp)
 1264 {
 1265         struct execsw_entry *et, *e1;
 1266 
 1267         et = (struct execsw_entry *) malloc(sizeof(struct execsw_entry),
 1268                         M_TEMP, M_WAITOK);
 1269         et->next = NULL;
 1270         et->es = esp;
 1271         if (*listp == NULL) {
 1272                 *listp = et;
 1273                 return;
 1274         }
 1275 
 1276         switch(et->es->es_prio) {
 1277         case EXECSW_PRIO_FIRST:
 1278                 /* put new entry as the first */
 1279                 et->next = *listp;
 1280                 *listp = et;
 1281                 break;
 1282         case EXECSW_PRIO_ANY:
 1283                 /* put new entry after all *_FIRST and *_ANY entries */
 1284                 for(e1 = *listp; e1->next
 1285                         && e1->next->es->es_prio != EXECSW_PRIO_LAST;
 1286                         e1 = e1->next);
 1287                 et->next = e1->next;
 1288                 e1->next = et;
 1289                 break;
 1290         case EXECSW_PRIO_LAST:
 1291                 /* put new entry as the last one */
 1292                 for(e1 = *listp; e1->next; e1 = e1->next);
 1293                 e1->next = et;
 1294                 break;
 1295         default:
 1296 #ifdef DIAGNOSTIC
 1297                 panic("execw[] entry with unknown priority %d found",
 1298                         et->es->es_prio);
 1299 #else
 1300                 free(et, M_TEMP);
 1301 #endif
 1302                 break;
 1303         }
 1304 }
 1305 
 1306 /*
 1307  * Initialize exec structures. If init_boot is true, also does necessary
 1308  * one-time initialization (it's called from main() that way).
 1309  * Once system is multiuser, this should be called with exec_lock held,
 1310  * i.e. via exec_{add|remove}().
 1311  */
 1312 int
 1313 exec_init(int init_boot)
 1314 {
 1315         const struct execsw     **new_es, * const *old_es;
 1316         struct execsw_entry     *list, *e1;
 1317         struct exec_entry       *e2;
 1318         int                     i, es_sz;
 1319 
 1320         if (init_boot) {
 1321                 /* do one-time initializations */
 1322                 lockinit(&exec_lock, PWAIT, "execlck", 0, 0);
 1323 
 1324                 /* register compiled-in emulations */
 1325                 for(i=0; i < nexecs_builtin; i++) {
 1326                         if (execsw_builtin[i].es_emul)
 1327                                 emul_register(execsw_builtin[i].es_emul, 1);
 1328                 }
 1329 #ifdef DIAGNOSTIC
 1330                 if (i == 0)
 1331                         panic("no emulations found in execsw_builtin[]");
 1332 #endif
 1333         }
 1334 
 1335         /*
 1336          * Build execsw[] array from builtin entries and entries added
 1337          * at runtime.
 1338          */
 1339         list = NULL;
 1340         for(i=0; i < nexecs_builtin; i++)
 1341                 link_es(&list, &execsw_builtin[i]);
 1342 
 1343         /* Add dynamically loaded entries */
 1344         es_sz = nexecs_builtin;
 1345         LIST_FOREACH(e2, &ex_head, ex_list) {
 1346                 link_es(&list, e2->es);
 1347                 es_sz++;
 1348         }
 1349 
 1350         /*
 1351          * Now that we have sorted all execw entries, create new execsw[]
 1352          * and free no longer needed memory in the process.
 1353          */
 1354         new_es = malloc(es_sz * sizeof(struct execsw *), M_EXEC, M_WAITOK);
 1355         for(i=0; list; i++) {
 1356                 new_es[i] = list->es;
 1357                 e1 = list->next;
 1358                 free(list, M_TEMP);
 1359                 list = e1;
 1360         }
 1361 
 1362         /*
 1363          * New execsw[] array built, now replace old execsw[] and free
 1364          * used memory.
 1365          */
 1366         old_es = execsw;
 1367         execsw = new_es;
 1368         nexecs = es_sz;
 1369         if (old_es)
 1370                 /*XXXUNCONST*/
 1371                 free(__UNCONST(old_es), M_EXEC);
 1372 
 1373         /*
 1374          * Figure out the maximum size of an exec header.
 1375          */
 1376         exec_maxhdrsz = 0;
 1377         for (i = 0; i < nexecs; i++) {
 1378                 if (execsw[i]->es_hdrsz > exec_maxhdrsz)
 1379                         exec_maxhdrsz = execsw[i]->es_hdrsz;
 1380         }
 1381 
 1382         return 0;
 1383 }
 1384 #endif
 1385 
 1386 #ifndef LKM
 1387 /*
 1388  * Simplified exec_init() for kernels without LKMs. Only initialize
 1389  * exec_maxhdrsz and execsw[].
 1390  */
 1391 int
 1392 exec_init(int init_boot)
 1393 {
 1394         int i;
 1395 
 1396 #ifdef DIAGNOSTIC
 1397         if (!init_boot)
 1398                 panic("exec_init(): called with init_boot == 0");
 1399 #endif
 1400 
 1401         /* do one-time initializations */
 1402         nexecs = nexecs_builtin;
 1403         execsw = malloc(nexecs*sizeof(struct execsw *), M_EXEC, M_WAITOK);
 1404 
 1405         /*
 1406          * Fill in execsw[] and figure out the maximum size of an exec header.
 1407          */
 1408         exec_maxhdrsz = 0;
 1409         for(i=0; i < nexecs; i++) {
 1410                 execsw[i] = &execsw_builtin[i];
 1411                 if (execsw_builtin[i].es_hdrsz > exec_maxhdrsz)
 1412                         exec_maxhdrsz = execsw_builtin[i].es_hdrsz;
 1413         }
 1414 
 1415         return 0;
 1416 
 1417 }
 1418 #endif /* !LKM */
 1419 
 1420 static int
 1421 exec_sigcode_map(struct proc *p, const struct emul *e)
 1422 {
 1423         vaddr_t va;
 1424         vsize_t sz;
 1425         int error;
 1426         struct uvm_object *uobj;
 1427 
 1428         sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
 1429 
 1430         if (e->e_sigobject == NULL || sz == 0) {
 1431                 return 0;
 1432         }
 1433 
 1434         /*
 1435          * If we don't have a sigobject for this emulation, create one.
 1436          *
 1437          * sigobject is an anonymous memory object (just like SYSV shared
 1438          * memory) that we keep a permanent reference to and that we map
 1439          * in all processes that need this sigcode. The creation is simple,
 1440          * we create an object, add a permanent reference to it, map it in
 1441          * kernel space, copy out the sigcode to it and unmap it.
 1442          * We map it with PROT_READ|PROT_EXEC into the process just
 1443          * the way sys_mmap() would map it.
 1444          */
 1445 
 1446         uobj = *e->e_sigobject;
 1447         if (uobj == NULL) {
 1448                 uobj = uao_create(sz, 0);
 1449                 (*uobj->pgops->pgo_reference)(uobj);
 1450                 va = vm_map_min(kernel_map);
 1451                 if ((error = uvm_map(kernel_map, &va, round_page(sz),
 1452                     uobj, 0, 0,
 1453                     UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
 1454                     UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
 1455                         printf("kernel mapping failed %d\n", error);
 1456                         (*uobj->pgops->pgo_detach)(uobj);
 1457                         return (error);
 1458                 }
 1459                 memcpy((void *)va, e->e_sigcode, sz);
 1460 #ifdef PMAP_NEED_PROCWR
 1461                 pmap_procwr(&proc0, va, sz);
 1462 #endif
 1463                 uvm_unmap(kernel_map, va, va + round_page(sz));
 1464                 *e->e_sigobject = uobj;
 1465         }
 1466 
 1467         /* Just a hint to uvm_map where to put it. */
 1468         va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
 1469             round_page(sz));
 1470 
 1471 #ifdef __alpha__
 1472         /*
 1473          * Tru64 puts /sbin/loader at the end of user virtual memory,
 1474          * which causes the above calculation to put the sigcode at
 1475          * an invalid address.  Put it just below the text instead.
 1476          */
 1477         if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
 1478                 va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
 1479         }
 1480 #endif
 1481 
 1482         (*uobj->pgops->pgo_reference)(uobj);
 1483         error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
 1484                         uobj, 0, 0,
 1485                         UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
 1486                                     UVM_ADV_RANDOM, 0));
 1487         if (error) {
 1488                 (*uobj->pgops->pgo_detach)(uobj);
 1489                 return (error);
 1490         }
 1491         p->p_sigctx.ps_sigcode = (void *)va;
 1492         return (0);
 1493 }
Cache object: 1317b7aacb1edbc3e0c6eb928c46e72c
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/kern_exec.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_exec.c