The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_exec.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1993, David Greenman
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD: releng/5.2/sys/kern/kern_exec.c 122524 2003-11-12 03:14:31Z rwatson $");
   29 
   30 #include "opt_ktrace.h"
   31 #include "opt_mac.h"
   32 
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/eventhandler.h>
   36 #include <sys/lock.h>
   37 #include <sys/mutex.h>
   38 #include <sys/sysproto.h>
   39 #include <sys/signalvar.h>
   40 #include <sys/kernel.h>
   41 #include <sys/mac.h>
   42 #include <sys/mount.h>
   43 #include <sys/filedesc.h>
   44 #include <sys/fcntl.h>
   45 #include <sys/acct.h>
   46 #include <sys/exec.h>
   47 #include <sys/imgact.h>
   48 #include <sys/imgact_elf.h>
   49 #include <sys/wait.h>
   50 #include <sys/malloc.h>
   51 #include <sys/proc.h>
   52 #include <sys/pioctl.h>
   53 #include <sys/namei.h>
   54 #include <sys/sysent.h>
   55 #include <sys/shm.h>
   56 #include <sys/sysctl.h>
   57 #include <sys/user.h>
   58 #include <sys/vnode.h>
   59 #ifdef KTRACE
   60 #include <sys/ktrace.h>
   61 #endif
   62 
   63 #include <vm/vm.h>
   64 #include <vm/vm_param.h>
   65 #include <vm/pmap.h>
   66 #include <vm/vm_page.h>
   67 #include <vm/vm_map.h>
   68 #include <vm/vm_kern.h>
   69 #include <vm/vm_extern.h>
   70 #include <vm/vm_object.h>
   71 #include <vm/vm_pager.h>
   72 
   73 #include <machine/reg.h>
   74 
   75 MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
   76 
   77 static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
   78 static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
   79 static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);
   80 static int kern_execve(struct thread *td, char *fname, char **argv,
   81         char **envv, struct mac *mac_p);
   82 
   83 /* XXX This should be vm_size_t. */
   84 SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD,
   85     NULL, 0, sysctl_kern_ps_strings, "LU", "");
   86 
   87 /* XXX This should be vm_size_t. */
   88 SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD,
   89     NULL, 0, sysctl_kern_usrstack, "LU", "");
   90 
   91 SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD,
   92     NULL, 0, sysctl_kern_stackprot, "I", "");
   93 
   94 u_long ps_arg_cache_limit = PAGE_SIZE / 16;
   95 SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 
   96     &ps_arg_cache_limit, 0, "");
   97 
   98 int ps_argsopen = 1;
   99 SYSCTL_INT(_kern, OID_AUTO, ps_argsopen, CTLFLAG_RW, &ps_argsopen, 0, "");
  100 
  101 static int
  102 sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS)
  103 {
  104         struct proc *p;
  105 
  106         p = curproc;
  107         return (SYSCTL_OUT(req, &p->p_sysent->sv_psstrings,
  108            sizeof(p->p_sysent->sv_psstrings)));
  109 }
  110 
  111 static int
  112 sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
  113 {
  114         struct proc *p;
  115 
  116         p = curproc;
  117         return (SYSCTL_OUT(req, &p->p_sysent->sv_usrstack,
  118             sizeof(p->p_sysent->sv_usrstack)));
  119 }
  120 
  121 static int
  122 sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS)
  123 {
  124         struct proc *p;
  125 
  126         p = curproc;
  127         return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot,
  128             sizeof(p->p_sysent->sv_stackprot)));
  129 }
  130 
  131 /*
  132  * Each of the items is a pointer to a `const struct execsw', hence the
  133  * double pointer here.
  134  */
  135 static const struct execsw **execsw;
  136 
  137 /*
  138  * In-kernel implementation of execve().  All arguments are assumed to be
  139  * userspace pointers from the passed thread.
  140  *
  141  * MPSAFE
  142  */
  143 static int
  144 kern_execve(td, fname, argv, envv, mac_p)
  145         struct thread *td;
  146         char *fname;
  147         char **argv;
  148         char **envv;
  149         struct mac *mac_p;
  150 {
  151         struct proc *p = td->td_proc;
  152         struct nameidata nd, *ndp;
  153         struct ucred *newcred = NULL, *oldcred;
  154         struct uidinfo *euip;
  155         register_t *stack_base;
  156         int error, len, i;
  157         struct image_params image_params, *imgp;
  158         struct vattr attr;
  159         int (*img_first)(struct image_params *);
  160         struct pargs *oldargs = NULL, *newargs = NULL;
  161         struct sigacts *oldsigacts, *newsigacts;
  162 #ifdef KTRACE
  163         struct vnode *tracevp = NULL;
  164         struct ucred *tracecred = NULL;
  165 #endif
  166         struct vnode *textvp = NULL;
  167         int credential_changing;
  168         int textset;
  169 #ifdef MAC
  170         struct label *interplabel = NULL;
  171         int will_transition;
  172 #endif
  173 
  174         imgp = &image_params;
  175 
  176         /*
  177          * Lock the process and set the P_INEXEC flag to indicate that
  178          * it should be left alone until we're done here.  This is
  179          * necessary to avoid race conditions - e.g. in ptrace() -
  180          * that might allow a local user to illicitly obtain elevated
  181          * privileges.
  182          */
  183         PROC_LOCK(p);
  184         KASSERT((p->p_flag & P_INEXEC) == 0,
  185             ("%s(): process already has P_INEXEC flag", __func__));
  186         if (p->p_flag & P_SA || p->p_numthreads > 1) {
  187                 if (thread_single(SINGLE_EXIT)) {
  188                         PROC_UNLOCK(p);
  189                         return (ERESTART);      /* Try again later. */
  190                 }
  191                 /*
  192                  * If we get here all other threads are dead,
  193                  * so unset the associated flags and lose KSE mode.
  194                  */
  195                 p->p_flag &= ~P_SA;
  196                 td->td_mailbox = NULL;
  197                 thread_single_end();
  198         }
  199         p->p_flag |= P_INEXEC;
  200         PROC_UNLOCK(p);
  201 
  202         /*
  203          * Initialize part of the common data
  204          */
  205         imgp->proc = p;
  206         imgp->userspace_argv = argv;
  207         imgp->userspace_envv = envv;
  208         imgp->execlabel = NULL;
  209         imgp->attr = &attr;
  210         imgp->argc = imgp->envc = 0;
  211         imgp->argv0 = NULL;
  212         imgp->entry_addr = 0;
  213         imgp->vmspace_destroyed = 0;
  214         imgp->interpreted = 0;
  215         imgp->interpreter_name[0] = '\0';
  216         imgp->auxargs = NULL;
  217         imgp->vp = NULL;
  218         imgp->object = NULL;
  219         imgp->firstpage = NULL;
  220         imgp->ps_strings = 0;
  221         imgp->auxarg_size = 0;
  222 
  223 #ifdef MAC
  224         error = mac_execve_enter(imgp, mac_p);
  225         if (error) {
  226                 mtx_lock(&Giant);
  227                 goto exec_fail;
  228         }
  229 #endif
  230 
  231         /*
  232          * Allocate temporary demand zeroed space for argument and
  233          *      environment strings
  234          */
  235         imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX +
  236             PAGE_SIZE);
  237         if (imgp->stringbase == NULL) {
  238                 error = ENOMEM;
  239                 mtx_lock(&Giant);
  240                 goto exec_fail;
  241         }
  242         imgp->stringp = imgp->stringbase;
  243         imgp->stringspace = ARG_MAX;
  244         imgp->image_header = imgp->stringbase + ARG_MAX;
  245 
  246         /*
  247          * Translate the file name. namei() returns a vnode pointer
  248          *      in ni_vp amoung other things.
  249          */
  250         ndp = &nd;
  251         NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
  252             UIO_USERSPACE, fname, td);
  253 
  254         mtx_lock(&Giant);
  255 interpret:
  256 
  257         error = namei(ndp);
  258         if (error) {
  259                 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
  260                     ARG_MAX + PAGE_SIZE);
  261                 goto exec_fail;
  262         }
  263 
  264         imgp->vp = ndp->ni_vp;
  265         imgp->fname = fname;
  266 
  267         /*
  268          * Check file permissions (also 'opens' file)
  269          */
  270         error = exec_check_permissions(imgp);
  271         if (error)
  272                 goto exec_fail_dealloc;
  273 
  274         if (VOP_GETVOBJECT(imgp->vp, &imgp->object) == 0)
  275                 vm_object_reference(imgp->object);
  276 
  277         /*
  278          * Set VV_TEXT now so no one can write to the executable while we're
  279          * activating it.
  280          *
  281          * Remember if this was set before and unset it in case this is not
  282          * actually an executable image.
  283          */
  284         textset = imgp->vp->v_vflag & VV_TEXT;
  285         imgp->vp->v_vflag |= VV_TEXT;
  286 
  287         error = exec_map_first_page(imgp);
  288         if (error)
  289                 goto exec_fail_dealloc;
  290 
  291         /*
  292          *      If the current process has a special image activator it
  293          *      wants to try first, call it.   For example, emulating shell 
  294          *      scripts differently.
  295          */
  296         error = -1;
  297         if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
  298                 error = img_first(imgp);
  299 
  300         /*
  301          *      Loop through the list of image activators, calling each one.
  302          *      An activator returns -1 if there is no match, 0 on success,
  303          *      and an error otherwise.
  304          */
  305         for (i = 0; error == -1 && execsw[i]; ++i) {
  306                 if (execsw[i]->ex_imgact == NULL ||
  307                     execsw[i]->ex_imgact == img_first) {
  308                         continue;
  309                 }
  310                 error = (*execsw[i]->ex_imgact)(imgp);
  311         }
  312 
  313         if (error) {
  314                 if (error == -1) {
  315                         if (textset == 0)
  316                                 imgp->vp->v_vflag &= ~VV_TEXT;
  317                         error = ENOEXEC;
  318                 }
  319                 goto exec_fail_dealloc;
  320         }
  321 
  322         /*
  323          * Special interpreter operation, cleanup and loop up to try to
  324          * activate the interpreter.
  325          */
  326         if (imgp->interpreted) {
  327                 exec_unmap_first_page(imgp);
  328                 /*
  329                  * VV_TEXT needs to be unset for scripts.  There is a short
  330                  * period before we determine that something is a script where
  331                  * VV_TEXT will be set. The vnode lock is held over this
  332                  * entire period so nothing should illegitimately be blocked.
  333                  */
  334                 imgp->vp->v_vflag &= ~VV_TEXT;
  335                 /* free name buffer and old vnode */
  336                 NDFREE(ndp, NDF_ONLY_PNBUF);
  337 #ifdef MAC
  338                 interplabel = mac_vnode_label_alloc();
  339                 mac_copy_vnode_label(ndp->ni_vp->v_label, interplabel);
  340 #endif
  341                 vput(ndp->ni_vp);
  342                 vm_object_deallocate(imgp->object);
  343                 imgp->object = NULL;
  344                 /* set new name to that of the interpreter */
  345                 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
  346                     UIO_SYSSPACE, imgp->interpreter_name, td);
  347                 goto interpret;
  348         }
  349 
  350         /*
  351          * Copy out strings (args and env) and initialize stack base
  352          */
  353         if (p->p_sysent->sv_copyout_strings)
  354                 stack_base = (*p->p_sysent->sv_copyout_strings)(imgp);
  355         else
  356                 stack_base = exec_copyout_strings(imgp);
  357 
  358         /*
  359          * If custom stack fixup routine present for this process
  360          * let it do the stack setup.
  361          * Else stuff argument count as first item on stack
  362          */
  363         if (p->p_sysent->sv_fixup)
  364                 (*p->p_sysent->sv_fixup)(&stack_base, imgp);
  365         else
  366                 suword(--stack_base, imgp->argc);
  367 
  368         /*
  369          * For security and other reasons, the file descriptor table cannot
  370          * be shared after an exec.
  371          */
  372         FILEDESC_LOCK(p->p_fd);
  373         if (p->p_fd->fd_refcnt > 1) {
  374                 struct filedesc *tmp;
  375 
  376                 tmp = fdcopy(td->td_proc->p_fd);
  377                 FILEDESC_UNLOCK(p->p_fd);
  378                 fdfree(td);
  379                 p->p_fd = tmp;
  380         } else
  381                 FILEDESC_UNLOCK(p->p_fd);
  382 
  383         /*
  384          * Malloc things before we need locks.
  385          */
  386         newcred = crget();
  387         euip = uifind(attr.va_uid);
  388         i = imgp->endargs - imgp->stringbase;
  389         if (ps_arg_cache_limit >= i + sizeof(struct pargs))
  390                 newargs = pargs_alloc(i);
  391 
  392         /* close files on exec */
  393         fdcloseexec(td);
  394 
  395         /* Get a reference to the vnode prior to locking the proc */
  396         VREF(ndp->ni_vp);
  397 
  398         /*
  399          * For security and other reasons, signal handlers cannot
  400          * be shared after an exec. The new process gets a copy of the old
  401          * handlers. In execsigs(), the new process will have its signals
  402          * reset.
  403          */
  404         PROC_LOCK(p);
  405         if (sigacts_shared(p->p_sigacts)) {
  406                 oldsigacts = p->p_sigacts;
  407                 PROC_UNLOCK(p);
  408                 newsigacts = sigacts_alloc();
  409                 sigacts_copy(newsigacts, oldsigacts);
  410                 PROC_LOCK(p);
  411                 p->p_sigacts = newsigacts;
  412         } else
  413                 oldsigacts = NULL;
  414 
  415         /* Stop profiling */
  416         stopprofclock(p);
  417 
  418         /* reset caught signals */
  419         execsigs(p);
  420 
  421         /* name this process - nameiexec(p, ndp) */
  422         len = min(ndp->ni_cnd.cn_namelen,MAXCOMLEN);
  423         bcopy(ndp->ni_cnd.cn_nameptr, p->p_comm, len);
  424         p->p_comm[len] = 0;
  425 
  426         /*
  427          * mark as execed, wakeup the process that vforked (if any) and tell
  428          * it that it now has its own resources back
  429          */
  430         p->p_flag |= P_EXEC;
  431         if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
  432                 p->p_flag &= ~P_PPWAIT;
  433                 wakeup(p->p_pptr);
  434         }
  435 
  436         /*
  437          * Implement image setuid/setgid.
  438          *
  439          * Don't honor setuid/setgid if the filesystem prohibits it or if
  440          * the process is being traced.
  441          *
  442          * XXXMAC: For the time being, use NOSUID to also prohibit
  443          * transitions on the file system.
  444          */
  445         oldcred = p->p_ucred;
  446         credential_changing = 0;
  447         credential_changing |= (attr.va_mode & VSUID) && oldcred->cr_uid !=
  448             attr.va_uid;
  449         credential_changing |= (attr.va_mode & VSGID) && oldcred->cr_gid !=
  450             attr.va_gid;
  451 #ifdef MAC
  452         will_transition = mac_execve_will_transition(oldcred, imgp->vp,
  453             interplabel, imgp);
  454         credential_changing |= will_transition;
  455 #endif
  456 
  457         if (credential_changing &&
  458             (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
  459             (p->p_flag & P_TRACED) == 0) {
  460                 /*
  461                  * Turn off syscall tracing for set-id programs, except for
  462                  * root.  Record any set-id flags first to make sure that
  463                  * we do not regain any tracing during a possible block.
  464                  */
  465                 setsugid(p);
  466 #ifdef KTRACE
  467                 if (p->p_tracevp != NULL && suser_cred(oldcred, PRISON_ROOT)) {
  468                         mtx_lock(&ktrace_mtx);
  469                         p->p_traceflag = 0;
  470                         tracevp = p->p_tracevp;
  471                         p->p_tracevp = NULL;
  472                         tracecred = p->p_tracecred;
  473                         p->p_tracecred = NULL;
  474                         mtx_unlock(&ktrace_mtx);
  475                 }
  476 #endif
  477                 /*
  478                  * Close any file descriptors 0..2 that reference procfs,
  479                  * then make sure file descriptors 0..2 are in use.
  480                  *
  481                  * setugidsafety() may call closef() and then pfind()
  482                  * which may grab the process lock.
  483                  * fdcheckstd() may call falloc() which may block to
  484                  * allocate memory, so temporarily drop the process lock.
  485                  */
  486                 PROC_UNLOCK(p);
  487                 setugidsafety(td);
  488                 error = fdcheckstd(td);
  489                 if (error != 0)
  490                         goto done1;
  491                 PROC_LOCK(p);
  492                 /*
  493                  * Set the new credentials.
  494                  */
  495                 crcopy(newcred, oldcred);
  496                 if (attr.va_mode & VSUID)
  497                         change_euid(newcred, euip);
  498                 if (attr.va_mode & VSGID)
  499                         change_egid(newcred, attr.va_gid);
  500 #ifdef MAC
  501                 if (will_transition) {
  502                         mac_execve_transition(oldcred, newcred, imgp->vp,
  503                             interplabel, imgp);
  504                 }
  505 #endif
  506                 /*
  507                  * Implement correct POSIX saved-id behavior.
  508                  *
  509                  * XXXMAC: Note that the current logic will save the
  510                  * uid and gid if a MAC domain transition occurs, even
  511                  * though maybe it shouldn't.
  512                  */
  513                 change_svuid(newcred, newcred->cr_uid);
  514                 change_svgid(newcred, newcred->cr_gid);
  515                 p->p_ucred = newcred;
  516                 newcred = NULL;
  517         } else {
  518                 if (oldcred->cr_uid == oldcred->cr_ruid &&
  519                     oldcred->cr_gid == oldcred->cr_rgid)
  520                         p->p_flag &= ~P_SUGID;
  521                 /*
  522                  * Implement correct POSIX saved-id behavior.
  523                  *
  524                  * XXX: It's not clear that the existing behavior is
  525                  * POSIX-compliant.  A number of sources indicate that the
  526                  * saved uid/gid should only be updated if the new ruid is
  527                  * not equal to the old ruid, or the new euid is not equal
  528                  * to the old euid and the new euid is not equal to the old
  529                  * ruid.  The FreeBSD code always updates the saved uid/gid.
  530                  * Also, this code uses the new (replaced) euid and egid as
  531                  * the source, which may or may not be the right ones to use.
  532                  */
  533                 if (oldcred->cr_svuid != oldcred->cr_uid ||
  534                     oldcred->cr_svgid != oldcred->cr_gid) {
  535                         crcopy(newcred, oldcred);
  536                         change_svuid(newcred, newcred->cr_uid);
  537                         change_svgid(newcred, newcred->cr_gid);
  538                         p->p_ucred = newcred;
  539                         newcred = NULL;
  540                 }
  541         }
  542 
  543         /*
  544          * Store the vp for use in procfs.  This vnode was referenced prior
  545          * to locking the proc lock.
  546          */
  547         textvp = p->p_textvp;
  548         p->p_textvp = ndp->ni_vp;
  549 
  550         /*
  551          * Notify others that we exec'd, and clear the P_INEXEC flag
  552          * as we're now a bona fide freshly-execed process.
  553          */
  554         KNOTE(&p->p_klist, NOTE_EXEC);
  555         p->p_flag &= ~P_INEXEC;
  556 
  557         /*
  558          * If tracing the process, trap to debugger so breakpoints
  559          * can be set before the program executes.
  560          */
  561         if (p->p_flag & P_TRACED)
  562                 psignal(p, SIGTRAP);
  563 
  564         /* clear "fork but no exec" flag, as we _are_ execing */
  565         p->p_acflag &= ~AFORK;
  566 
  567         /* Free any previous argument cache */
  568         oldargs = p->p_args;
  569         p->p_args = NULL;
  570 
  571         /* Cache arguments if they fit inside our allowance */
  572         if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
  573                 bcopy(imgp->stringbase, newargs->ar_args, i);
  574                 p->p_args = newargs;
  575                 newargs = NULL;
  576         }
  577         PROC_UNLOCK(p);
  578 
  579         /* Set values passed into the program in registers. */
  580         if (p->p_sysent->sv_setregs)
  581                 (*p->p_sysent->sv_setregs)(td, imgp->entry_addr,
  582                     (u_long)(uintptr_t)stack_base, imgp->ps_strings);
  583         else
  584                 exec_setregs(td, imgp->entry_addr,
  585                     (u_long)(uintptr_t)stack_base, imgp->ps_strings);
  586 
  587 done1:
  588         /*
  589          * Free any resources malloc'd earlier that we didn't use.
  590          */
  591         uifree(euip);
  592         if (newcred == NULL)
  593                 crfree(oldcred);
  594         else
  595                 crfree(newcred);
  596         /*
  597          * Handle deferred decrement of ref counts.
  598          */
  599         if (textvp != NULL)
  600                 vrele(textvp);
  601         if (ndp->ni_vp && error != 0)
  602                 vrele(ndp->ni_vp);
  603 #ifdef KTRACE
  604         if (tracevp != NULL)
  605                 vrele(tracevp);
  606         if (tracecred != NULL)
  607                 crfree(tracecred);
  608 #endif
  609         if (oldargs != NULL)
  610                 pargs_drop(oldargs);
  611         if (newargs != NULL)
  612                 pargs_drop(newargs);
  613         if (oldsigacts != NULL)
  614                 sigacts_free(oldsigacts);
  615 
  616 exec_fail_dealloc:
  617 
  618         /*
  619          * free various allocated resources
  620          */
  621         if (imgp->firstpage)
  622                 exec_unmap_first_page(imgp);
  623 
  624         if (imgp->vp) {
  625                 NDFREE(ndp, NDF_ONLY_PNBUF);
  626                 vput(imgp->vp);
  627         }
  628 
  629         if (imgp->stringbase != NULL)
  630                 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
  631                     ARG_MAX + PAGE_SIZE);
  632 
  633         if (imgp->object)
  634                 vm_object_deallocate(imgp->object);
  635 
  636         if (error == 0) {
  637                 /*
  638                  * Stop the process here if its stop event mask has
  639                  * the S_EXEC bit set.
  640                  */
  641                 STOPEVENT(p, S_EXEC, 0);
  642                 goto done2;
  643         }
  644 
  645 exec_fail:
  646         /* we're done here, clear P_INEXEC */
  647         PROC_LOCK(p);
  648         p->p_flag &= ~P_INEXEC;
  649         PROC_UNLOCK(p);
  650         
  651         if (imgp->vmspace_destroyed) {
  652                 /* sorry, no more process anymore. exit gracefully */
  653 #ifdef MAC
  654                 mac_execve_exit(imgp);
  655                 if (interplabel != NULL)
  656                         mac_vnode_label_free(interplabel);
  657 #endif
  658                 exit1(td, W_EXITCODE(0, SIGABRT));
  659                 /* NOT REACHED */
  660                 error = 0;
  661         }
  662 done2:
  663 #ifdef MAC
  664         mac_execve_exit(imgp);
  665         if (interplabel != NULL)
  666                 mac_vnode_label_free(interplabel);
  667 #endif
  668         mtx_unlock(&Giant);
  669         return (error);
  670 }
  671 
  672 #ifndef _SYS_SYSPROTO_H_
  673 struct execve_args {
  674         char    *fname; 
  675         char    **argv;
  676         char    **envv; 
  677 };
  678 #endif
  679 
  680 /*
  681  * MPSAFE
  682  */
  683 int
  684 execve(td, uap)
  685         struct thread *td;
  686         struct execve_args /* {
  687                 char *fname;
  688                 char **argv;
  689                 char **envv;
  690         } */ *uap;
  691 {
  692 
  693         return (kern_execve(td, uap->fname, uap->argv, uap->envv, NULL));
  694 }
  695 
  696 #ifndef _SYS_SYSPROTO_H_
  697 struct __mac_execve_args {
  698         char    *fname;
  699         char    **argv;
  700         char    **envv;
  701         struct mac      *mac_p;
  702 };
  703 #endif
  704 
  705 /*
  706  * MPSAFE
  707  */
  708 int
  709 __mac_execve(td, uap)
  710         struct thread *td;
  711         struct __mac_execve_args /* {
  712                 char *fname;
  713                 char **argv;
  714                 char **envv;
  715                 struct mac *mac_p;
  716         } */ *uap;
  717 {
  718 
  719 #ifdef MAC
  720         return (kern_execve(td, uap->fname, uap->argv, uap->envv,
  721             uap->mac_p));
  722 #else
  723         return (ENOSYS);
  724 #endif
  725 }
  726 
  727 int
  728 exec_map_first_page(imgp)
  729         struct image_params *imgp;
  730 {
  731         int rv, i;
  732         int initial_pagein;
  733         vm_page_t ma[VM_INITIAL_PAGEIN];
  734         vm_object_t object;
  735 
  736         GIANT_REQUIRED;
  737 
  738         if (imgp->firstpage) {
  739                 exec_unmap_first_page(imgp);
  740         }
  741 
  742         VOP_GETVOBJECT(imgp->vp, &object);
  743         VM_OBJECT_LOCK(object);
  744         ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
  745         if ((ma[0]->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
  746                 initial_pagein = VM_INITIAL_PAGEIN;
  747                 if (initial_pagein > object->size)
  748                         initial_pagein = object->size;
  749                 for (i = 1; i < initial_pagein; i++) {
  750                         if ((ma[i] = vm_page_lookup(object, i)) != NULL) {
  751                                 if (ma[i]->valid)
  752                                         break;
  753                                 vm_page_lock_queues();
  754                                 if ((ma[i]->flags & PG_BUSY) || ma[i]->busy) {
  755                                         vm_page_unlock_queues();
  756                                         break;
  757                                 }
  758                                 vm_page_busy(ma[i]);
  759                                 vm_page_unlock_queues();
  760                         } else {
  761                                 ma[i] = vm_page_alloc(object, i,
  762                                     VM_ALLOC_NORMAL);
  763                                 if (ma[i] == NULL)
  764                                         break;
  765                         }
  766                 }
  767                 initial_pagein = i;
  768                 rv = vm_pager_get_pages(object, ma, initial_pagein, 0);
  769                 ma[0] = vm_page_lookup(object, 0);
  770                 if ((rv != VM_PAGER_OK) || (ma[0] == NULL) ||
  771                     (ma[0]->valid == 0)) {
  772                         if (ma[0]) {
  773                                 vm_page_lock_queues();
  774                                 pmap_remove_all(ma[0]);
  775                                 vm_page_free(ma[0]);
  776                                 vm_page_unlock_queues();
  777                         }
  778                         VM_OBJECT_UNLOCK(object);
  779                         return (EIO);
  780                 }
  781         }
  782         vm_page_lock_queues();
  783         vm_page_wire(ma[0]);
  784         vm_page_wakeup(ma[0]);
  785         vm_page_unlock_queues();
  786         VM_OBJECT_UNLOCK(object);
  787 
  788         pmap_qenter((vm_offset_t)imgp->image_header, ma, 1);
  789         imgp->firstpage = ma[0];
  790 
  791         return (0);
  792 }
  793 
  794 void
  795 exec_unmap_first_page(imgp)
  796         struct image_params *imgp;
  797 {
  798         GIANT_REQUIRED;
  799 
  800         if (imgp->firstpage) {
  801                 pmap_qremove((vm_offset_t)imgp->image_header, 1);
  802                 vm_page_lock_queues();
  803                 vm_page_unwire(imgp->firstpage, 1);
  804                 vm_page_unlock_queues();
  805                 imgp->firstpage = NULL;
  806         }
  807 }
  808 
  809 /*
  810  * Destroy old address space, and allocate a new stack
  811  *      The new stack is only SGROWSIZ large because it is grown
  812  *      automatically in trap.c.
  813  */
  814 int
  815 exec_new_vmspace(imgp, sv)
  816         struct image_params *imgp;
  817         struct sysentvec *sv;
  818 {
  819         int error;
  820         struct proc *p = imgp->proc;
  821         struct vmspace *vmspace = p->p_vmspace;
  822         vm_offset_t stack_addr;
  823         vm_map_t map;
  824 
  825         GIANT_REQUIRED;
  826 
  827         imgp->vmspace_destroyed = 1;
  828 
  829         EVENTHANDLER_INVOKE(process_exec, p);
  830 
  831         /*
  832          * Here is as good a place as any to do any resource limit cleanups.
  833          * This is needed if a 64 bit binary exec's a 32 bit binary - the
  834          * data size limit may need to be changed to a value that makes
  835          * sense for the 32 bit binary.
  836          */
  837         if (sv->sv_fixlimits)
  838                 sv->sv_fixlimits(imgp);
  839 
  840         /*
  841          * Blow away entire process VM, if address space not shared,
  842          * otherwise, create a new VM space so that other threads are
  843          * not disrupted
  844          */
  845         map = &vmspace->vm_map;
  846         if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv->sv_minuser &&
  847             vm_map_max(map) == sv->sv_maxuser) {
  848                 shmexit(vmspace);
  849                 vm_page_lock_queues();
  850                 pmap_remove_pages(vmspace_pmap(vmspace), vm_map_min(map),
  851                     vm_map_max(map));
  852                 vm_page_unlock_queues();
  853                 vm_map_remove(map, vm_map_min(map), vm_map_max(map));
  854         } else {
  855                 vmspace_exec(p, sv->sv_minuser, sv->sv_maxuser);
  856                 vmspace = p->p_vmspace;
  857                 map = &vmspace->vm_map;
  858         }
  859 
  860         /* Allocate a new stack */
  861         stack_addr = sv->sv_usrstack - maxssiz;
  862         error = vm_map_stack(map, stack_addr, (vm_size_t)maxssiz,
  863             sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
  864         if (error)
  865                 return (error);
  866 
  867 #ifdef __ia64__
  868         /* Allocate a new register stack */
  869         stack_addr = IA64_BACKINGSTORE;
  870         error = vm_map_stack(map, stack_addr, (vm_size_t)maxssiz,
  871             sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_UP);
  872         if (error)
  873                 return (error);
  874 #endif
  875 
  876         /* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the
  877          * VM_STACK case, but they are still used to monitor the size of the
  878          * process stack so we can check the stack rlimit.
  879          */
  880         vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
  881         vmspace->vm_maxsaddr = (char *)sv->sv_usrstack - maxssiz;
  882 
  883         return (0);
  884 }
  885 
  886 /*
  887  * Copy out argument and environment strings from the old process
  888  *      address space into the temporary string buffer.
  889  */
  890 int
  891 exec_extract_strings(imgp)
  892         struct image_params *imgp;
  893 {
  894         char    **argv, **envv;
  895         char    *argp, *envp;
  896         int     error;
  897         size_t  length;
  898 
  899         /*
  900          * extract arguments first
  901          */
  902 
  903         argv = imgp->userspace_argv;
  904 
  905         if (argv) {
  906                 argp = (caddr_t)(intptr_t)fuword(argv);
  907                 if (argp == (caddr_t)-1)
  908                         return (EFAULT);
  909                 if (argp)
  910                         argv++;
  911                 if (imgp->argv0)
  912                         argp = imgp->argv0;
  913                 if (argp) {
  914                         do {
  915                                 if (argp == (caddr_t)-1)
  916                                         return (EFAULT);
  917                                 if ((error = copyinstr(argp, imgp->stringp,
  918                                     imgp->stringspace, &length))) {
  919                                         if (error == ENAMETOOLONG)
  920                                                 return (E2BIG);
  921                                         return (error);
  922                                 }
  923                                 imgp->stringspace -= length;
  924                                 imgp->stringp += length;
  925                                 imgp->argc++;
  926                         } while ((argp = (caddr_t)(intptr_t)fuword(argv++)));
  927                 }
  928         }       
  929 
  930         imgp->endargs = imgp->stringp;
  931 
  932         /*
  933          * extract environment strings
  934          */
  935 
  936         envv = imgp->userspace_envv;
  937 
  938         if (envv) {
  939                 while ((envp = (caddr_t)(intptr_t)fuword(envv++))) {
  940                         if (envp == (caddr_t)-1)
  941                                 return (EFAULT);
  942                         if ((error = copyinstr(envp, imgp->stringp,
  943                             imgp->stringspace, &length))) {
  944                                 if (error == ENAMETOOLONG)
  945                                         return (E2BIG);
  946                                 return (error);
  947                         }
  948                         imgp->stringspace -= length;
  949                         imgp->stringp += length;
  950                         imgp->envc++;
  951                 }
  952         }
  953 
  954         return (0);
  955 }
  956 
  957 /*
  958  * Copy strings out to the new process address space, constructing
  959  *      new arg and env vector tables. Return a pointer to the base
  960  *      so that it can be used as the initial stack pointer.
  961  */
  962 register_t *
  963 exec_copyout_strings(imgp)
  964         struct image_params *imgp;
  965 {
  966         int argc, envc;
  967         char **vectp;
  968         char *stringp, *destp;
  969         register_t *stack_base;
  970         struct ps_strings *arginfo;
  971         struct proc *p;
  972         int szsigcode;
  973 
  974         /*
  975          * Calculate string base and vector table pointers.
  976          * Also deal with signal trampoline code for this exec type.
  977          */
  978         p = imgp->proc;
  979         szsigcode = 0;
  980         arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
  981         if (p->p_sysent->sv_szsigcode != NULL)
  982                 szsigcode = *(p->p_sysent->sv_szsigcode);
  983         destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE -
  984             roundup((ARG_MAX - imgp->stringspace), sizeof(char *));
  985 
  986         /*
  987          * install sigcode
  988          */
  989         if (szsigcode)
  990                 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
  991                     szsigcode), szsigcode);
  992 
  993         /*
  994          * If we have a valid auxargs ptr, prepare some room
  995          * on the stack.
  996          */
  997         if (imgp->auxargs) {
  998                 /*
  999                  * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
 1000                  * lower compatibility.
 1001                  */
 1002                 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
 1003                     (AT_COUNT * 2);
 1004                 /*
 1005                  * The '+ 2' is for the null pointers at the end of each of
 1006                  * the arg and env vector sets,and imgp->auxarg_size is room
 1007                  * for argument of Runtime loader.
 1008                  */
 1009                 vectp = (char **)(destp - (imgp->argc + imgp->envc + 2 +
 1010                     imgp->auxarg_size) * sizeof(char *));
 1011 
 1012         } else 
 1013                 /*
 1014                  * The '+ 2' is for the null pointers at the end of each of
 1015                  * the arg and env vector sets
 1016                  */
 1017                 vectp = (char **)(destp - (imgp->argc + imgp->envc + 2) *
 1018                     sizeof(char *));
 1019 
 1020         /*
 1021          * vectp also becomes our initial stack base
 1022          */
 1023         stack_base = (register_t *)vectp;
 1024 
 1025         stringp = imgp->stringbase;
 1026         argc = imgp->argc;
 1027         envc = imgp->envc;
 1028 
 1029         /*
 1030          * Copy out strings - arguments and environment.
 1031          */
 1032         copyout(stringp, destp, ARG_MAX - imgp->stringspace);
 1033 
 1034         /*
 1035          * Fill in "ps_strings" struct for ps, w, etc.
 1036          */
 1037         suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
 1038         suword(&arginfo->ps_nargvstr, argc);
 1039 
 1040         /*
 1041          * Fill in argument portion of vector table.
 1042          */
 1043         for (; argc > 0; --argc) {
 1044                 suword(vectp++, (long)(intptr_t)destp);
 1045                 while (*stringp++ != 0)
 1046                         destp++;
 1047                 destp++;
 1048         }
 1049 
 1050         /* a null vector table pointer separates the argp's from the envp's */
 1051         suword(vectp++, 0);
 1052 
 1053         suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
 1054         suword(&arginfo->ps_nenvstr, envc);
 1055 
 1056         /*
 1057          * Fill in environment portion of vector table.
 1058          */
 1059         for (; envc > 0; --envc) {
 1060                 suword(vectp++, (long)(intptr_t)destp);
 1061                 while (*stringp++ != 0)
 1062                         destp++;
 1063                 destp++;
 1064         }
 1065 
 1066         /* end of vector table is a null pointer */
 1067         suword(vectp, 0);
 1068 
 1069         return (stack_base);
 1070 }
 1071 
 1072 /*
 1073  * Check permissions of file to execute.
 1074  *      Called with imgp->vp locked.
 1075  *      Return 0 for success or error code on failure.
 1076  */
 1077 int
 1078 exec_check_permissions(imgp)
 1079         struct image_params *imgp;
 1080 {
 1081         struct vnode *vp = imgp->vp;
 1082         struct vattr *attr = imgp->attr;
 1083         struct thread *td;
 1084         int error;
 1085 
 1086         td = curthread;                 /* XXXKSE */
 1087 
 1088         /* Get file attributes */
 1089         error = VOP_GETATTR(vp, attr, td->td_ucred, td);
 1090         if (error)
 1091                 return (error);
 1092 
 1093 #ifdef MAC
 1094         error = mac_check_vnode_exec(td->td_ucred, imgp->vp, imgp);
 1095         if (error)
 1096                 return (error);
 1097 #endif
 1098         
 1099         /*
 1100          * 1) Check if file execution is disabled for the filesystem that this
 1101          *      file resides on.
 1102          * 2) Insure that at least one execute bit is on - otherwise root
 1103          *      will always succeed, and we don't want to happen unless the
 1104          *      file really is executable.
 1105          * 3) Insure that the file is a regular file.
 1106          */
 1107         if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
 1108             ((attr->va_mode & 0111) == 0) ||
 1109             (attr->va_type != VREG))
 1110                 return (EACCES);
 1111 
 1112         /*
 1113          * Zero length files can't be exec'd
 1114          */
 1115         if (attr->va_size == 0)
 1116                 return (ENOEXEC);
 1117 
 1118         /*
 1119          *  Check for execute permission to file based on current credentials.
 1120          */
 1121         error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
 1122         if (error)
 1123                 return (error);
 1124 
 1125         /*
 1126          * Check number of open-for-writes on the file and deny execution
 1127          * if there are any.
 1128          */
 1129         if (vp->v_writecount)
 1130                 return (ETXTBSY);
 1131 
 1132         /*
 1133          * Call filesystem specific open routine (which does nothing in the
 1134          * general case).
 1135          */
 1136         error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1);
 1137         return (error);
 1138 }
 1139 
 1140 /*
 1141  * Exec handler registration
 1142  */
 1143 int
 1144 exec_register(execsw_arg)
 1145         const struct execsw *execsw_arg;
 1146 {
 1147         const struct execsw **es, **xs, **newexecsw;
 1148         int count = 2;  /* New slot and trailing NULL */
 1149 
 1150         if (execsw)
 1151                 for (es = execsw; *es; es++)
 1152                         count++;
 1153         newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
 1154         if (newexecsw == NULL)
 1155                 return (ENOMEM);
 1156         xs = newexecsw;
 1157         if (execsw)
 1158                 for (es = execsw; *es; es++)
 1159                         *xs++ = *es;
 1160         *xs++ = execsw_arg;
 1161         *xs = NULL;
 1162         if (execsw)
 1163                 free(execsw, M_TEMP);
 1164         execsw = newexecsw;
 1165         return (0);
 1166 }
 1167 
 1168 int
 1169 exec_unregister(execsw_arg)
 1170         const struct execsw *execsw_arg;
 1171 {
 1172         const struct execsw **es, **xs, **newexecsw;
 1173         int count = 1;
 1174 
 1175         if (execsw == NULL)
 1176                 panic("unregister with no handlers left?\n");
 1177 
 1178         for (es = execsw; *es; es++) {
 1179                 if (*es == execsw_arg)
 1180                         break;
 1181         }
 1182         if (*es == NULL)
 1183                 return (ENOENT);
 1184         for (es = execsw; *es; es++)
 1185                 if (*es != execsw_arg)
 1186                         count++;
 1187         newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
 1188         if (newexecsw == NULL)
 1189                 return (ENOMEM);
 1190         xs = newexecsw;
 1191         for (es = execsw; *es; es++)
 1192                 if (*es != execsw_arg)
 1193                         *xs++ = *es;
 1194         *xs = NULL;
 1195         if (execsw)
 1196                 free(execsw, M_TEMP);
 1197         execsw = newexecsw;
 1198         return (0);
 1199 }

Cache object: 74df381c629709c065741882ff8a6f44


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.