FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_exec.c
1 /* $NetBSD: kern_exec.c,v 1.194.4.11 2005/10/31 13:25:31 tron Exp $ */
2
3 /*-
4 * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
5 * Copyright (C) 1992 Wolfgang Solfrank.
6 * Copyright (C) 1992 TooLs GmbH.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by TooLs GmbH.
20 * 4. The name of TooLs GmbH may not be used to endorse or promote products
21 * derived from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
29 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
30 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
31 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
32 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.194.4.11 2005/10/31 13:25:31 tron Exp $");
37
38 #include "opt_ktrace.h"
39 #include "opt_syscall_debug.h"
40 #include "opt_compat_netbsd.h"
41 #include "opt_verified_exec.h"
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/filedesc.h>
46 #include <sys/kernel.h>
47 #include <sys/proc.h>
48 #include <sys/mount.h>
49 #include <sys/malloc.h>
50 #include <sys/namei.h>
51 #include <sys/vnode.h>
52 #include <sys/file.h>
53 #include <sys/acct.h>
54 #include <sys/exec.h>
55 #include <sys/ktrace.h>
56 #include <sys/resourcevar.h>
57 #include <sys/wait.h>
58 #include <sys/mman.h>
59 #include <sys/ras.h>
60 #include <sys/signalvar.h>
61 #include <sys/stat.h>
62 #include <sys/syscall.h>
63
64 #include <sys/sa.h>
65 #include <sys/savar.h>
66 #include <sys/syscallargs.h>
67 #ifdef VERIFIED_EXEC
68 #include <sys/verified_exec.h>
69 #endif
70
71 #include <uvm/uvm_extern.h>
72
73 #include <machine/cpu.h>
74 #include <machine/reg.h>
75
76 static int exec_sigcode_map(struct proc *, const struct emul *);
77
78 #ifdef DEBUG_EXEC
79 #define DPRINTF(a) uprintf a
80 #else
81 #define DPRINTF(a)
82 #endif /* DEBUG_EXEC */
83
84 MALLOC_DEFINE(M_EXEC, "exec", "argument lists & other mem used by exec");
85
86 /*
87 * Exec function switch:
88 *
89 * Note that each makecmds function is responsible for loading the
90 * exec package with the necessary functions for any exec-type-specific
91 * handling.
92 *
93 * Functions for specific exec types should be defined in their own
94 * header file.
95 */
96 extern const struct execsw execsw_builtin[];
97 extern int nexecs_builtin;
98 static const struct execsw **execsw = NULL;
99 static int nexecs;
100
101 u_int exec_maxhdrsz; /* must not be static - netbsd32 needs it */
102
103 #ifdef LKM
104 /* list of supported emulations */
105 static
106 LIST_HEAD(emlist_head, emul_entry) el_head = LIST_HEAD_INITIALIZER(el_head);
107 struct emul_entry {
108 LIST_ENTRY(emul_entry) el_list;
109 const struct emul *el_emul;
110 int ro_entry;
111 };
112
113 /* list of dynamically loaded execsw entries */
114 static
115 LIST_HEAD(execlist_head, exec_entry) ex_head = LIST_HEAD_INITIALIZER(ex_head);
116 struct exec_entry {
117 LIST_ENTRY(exec_entry) ex_list;
118 const struct execsw *es;
119 };
120
121 /* structure used for building execw[] */
122 struct execsw_entry {
123 struct execsw_entry *next;
124 const struct execsw *es;
125 };
126 #endif /* LKM */
127
128 #ifdef SYSCALL_DEBUG
129 extern const char * const syscallnames[];
130 #endif
131 #ifdef __HAVE_SYSCALL_INTERN
132 void syscall_intern(struct proc *);
133 #else
134 void syscall(void);
135 #endif
136
137 #ifdef COMPAT_16
138 extern char sigcode[], esigcode[];
139 struct uvm_object *emul_netbsd_object;
140 #endif
141
142 /* NetBSD emul struct */
143 const struct emul emul_netbsd = {
144 "netbsd",
145 NULL, /* emulation path */
146 #ifndef __HAVE_MINIMAL_EMUL
147 EMUL_HAS_SYS___syscall,
148 NULL,
149 SYS_syscall,
150 SYS_NSYSENT,
151 #endif
152 sysent,
153 #ifdef SYSCALL_DEBUG
154 syscallnames,
155 #else
156 NULL,
157 #endif
158 sendsig,
159 trapsignal,
160 NULL,
161 #ifdef COMPAT_16
162 sigcode,
163 esigcode,
164 &emul_netbsd_object,
165 #else
166 NULL,
167 NULL,
168 NULL,
169 #endif
170 setregs,
171 NULL,
172 NULL,
173 NULL,
174 NULL,
175 NULL,
176 #ifdef __HAVE_SYSCALL_INTERN
177 syscall_intern,
178 #else
179 syscall,
180 #endif
181 NULL,
182 NULL,
183
184 uvm_default_mapaddr,
185 };
186
187 #ifdef LKM
188 /*
189 * Exec lock. Used to control access to execsw[] structures.
190 * This must not be static so that netbsd32 can access it, too.
191 */
192 struct lock exec_lock;
193
194 static void link_es(struct execsw_entry **, const struct execsw *);
195 #endif /* LKM */
196
197 /*
198 * check exec:
199 * given an "executable" described in the exec package's namei info,
200 * see what we can do with it.
201 *
202 * ON ENTRY:
203 * exec package with appropriate namei info
204 * proc pointer of exec'ing proc
205 * if verified exec enabled then flag indicating a direct exec or
206 * an indirect exec (i.e. for a shell script interpreter)
207 * NO SELF-LOCKED VNODES
208 *
209 * ON EXIT:
210 * error: nothing held, etc. exec header still allocated.
211 * ok: filled exec package, executable's vnode (unlocked).
212 *
213 * EXEC SWITCH ENTRY:
214 * Locked vnode to check, exec package, proc.
215 *
216 * EXEC SWITCH EXIT:
217 * ok: return 0, filled exec package, executable's vnode (unlocked).
218 * error: destructive:
219 * everything deallocated execept exec header.
220 * non-destructive:
221 * error code, executable's vnode (unlocked),
222 * exec header unmodified.
223 */
224 int
225 /*ARGSUSED*/
226 check_exec(struct proc *p, struct exec_package *epp, int flag)
227 {
228 int error, i;
229 struct vnode *vp;
230 struct nameidata *ndp;
231 size_t resid;
232
233 ndp = epp->ep_ndp;
234 ndp->ni_cnd.cn_nameiop = LOOKUP;
235 ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF | SAVENAME;
236 /* first get the vnode */
237 if ((error = namei(ndp)) != 0)
238 return error;
239 epp->ep_vp = vp = ndp->ni_vp;
240
241 /* check access and type */
242 if (vp->v_type != VREG) {
243 error = EACCES;
244 goto bad1;
245 }
246 if ((error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p)) != 0)
247 goto bad1;
248
249 /* get attributes */
250 if ((error = VOP_GETATTR(vp, epp->ep_vap, p->p_ucred, p)) != 0)
251 goto bad1;
252
253 /* Check mount point */
254 if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
255 error = EACCES;
256 goto bad1;
257 }
258 if (vp->v_mount->mnt_flag & MNT_NOSUID)
259 epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
260
261 /* try to open it */
262 if ((error = VOP_OPEN(vp, FREAD, p->p_ucred, p)) != 0)
263 goto bad1;
264
265 /* unlock vp, since we need it unlocked from here on out. */
266 VOP_UNLOCK(vp, 0);
267
268
269 #ifdef VERIFIED_EXEC
270 if ((error = veriexec_verify(p, vp, epp->ep_vap, epp->ep_ndp->ni_dirp,
271 flag, NULL)) != 0)
272 goto bad2;
273 #endif
274
275 /* now we have the file, get the exec header */
276 uvn_attach(vp, VM_PROT_READ);
277 error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
278 UIO_SYSSPACE, 0, p->p_ucred, &resid, NULL);
279 if (error)
280 goto bad2;
281 epp->ep_hdrvalid = epp->ep_hdrlen - resid;
282
283 /*
284 * Set up default address space limits. Can be overridden
285 * by individual exec packages.
286 *
287 * XXX probably should be all done in the exec pakages.
288 */
289 epp->ep_vm_minaddr = VM_MIN_ADDRESS;
290 epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
291 /*
292 * set up the vmcmds for creation of the process
293 * address space
294 */
295 error = ENOEXEC;
296 for (i = 0; i < nexecs && error != 0; i++) {
297 int newerror;
298
299 epp->ep_esch = execsw[i];
300 newerror = (*execsw[i]->es_makecmds)(p, epp);
301 /* make sure the first "interesting" error code is saved. */
302 if (!newerror || error == ENOEXEC)
303 error = newerror;
304
305 /* if es_makecmds call was successful, update epp->ep_es */
306 if (!newerror && (epp->ep_flags & EXEC_HASES) == 0)
307 epp->ep_es = execsw[i];
308
309 if (epp->ep_flags & EXEC_DESTR && error != 0)
310 return error;
311 }
312 if (!error) {
313 /* check that entry point is sane */
314 if (epp->ep_entry > VM_MAXUSER_ADDRESS)
315 error = ENOEXEC;
316
317 /* check limits */
318 if ((epp->ep_tsize > MAXTSIZ) ||
319 (epp->ep_dsize >
320 (u_quad_t)p->p_rlimit[RLIMIT_DATA].rlim_cur))
321 error = ENOMEM;
322
323 if (!error)
324 return (0);
325 }
326
327 /*
328 * free any vmspace-creation commands,
329 * and release their references
330 */
331 kill_vmcmds(&epp->ep_vmcmds);
332
333 bad2:
334 /*
335 * close and release the vnode, restore the old one, free the
336 * pathname buf, and punt.
337 */
338 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
339 VOP_CLOSE(vp, FREAD, p->p_ucred, p);
340 vput(vp);
341 PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
342 return error;
343
344 bad1:
345 /*
346 * free the namei pathname buffer, and put the vnode
347 * (which we don't yet have open).
348 */
349 vput(vp); /* was still locked */
350 PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
351 return error;
352 }
353
354 #ifdef __MACHINE_STACK_GROWS_UP
355 #define STACK_PTHREADSPACE NBPG
356 #else
357 #define STACK_PTHREADSPACE 0
358 #endif
359
360 /*
361 * exec system call
362 */
363 /* ARGSUSED */
364 int
365 sys_execve(struct lwp *l, void *v, register_t *retval)
366 {
367 struct sys_execve_args /* {
368 syscallarg(const char *) path;
369 syscallarg(char * const *) argp;
370 syscallarg(char * const *) envp;
371 } */ *uap = v;
372 int error;
373 u_int i;
374 struct exec_package pack;
375 struct nameidata nid;
376 struct vattr attr;
377 struct proc *p;
378 struct ucred *cred;
379 char *argp;
380 char * const *cpp;
381 char *dp, *sp;
382 long argc, envc;
383 size_t len;
384 char *stack;
385 struct ps_strings arginfo;
386 struct vmspace *vm;
387 char **tmpfap;
388 int szsigcode;
389 struct exec_vmcmd *base_vcp;
390 int oldlwpflags;
391
392 /* Disable scheduler activation upcalls. */
393 oldlwpflags = l->l_flag & (L_SA | L_SA_UPCALL);
394 if (l->l_flag & L_SA)
395 l->l_flag &= ~(L_SA | L_SA_UPCALL);
396
397 p = l->l_proc;
398 /*
399 * Lock the process and set the P_INEXEC flag to indicate that
400 * it should be left alone until we're done here. This is
401 * necessary to avoid race conditions - e.g. in ptrace() -
402 * that might allow a local user to illicitly obtain elevated
403 * privileges.
404 */
405 p->p_flag |= P_INEXEC;
406
407 cred = p->p_ucred;
408 base_vcp = NULL;
409 /*
410 * Init the namei data to point the file user's program name.
411 * This is done here rather than in check_exec(), so that it's
412 * possible to override this settings if any of makecmd/probe
413 * functions call check_exec() recursively - for example,
414 * see exec_script_makecmds().
415 */
416 NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
417
418 /*
419 * initialize the fields of the exec package.
420 */
421 pack.ep_name = SCARG(uap, path);
422 pack.ep_hdr = malloc(exec_maxhdrsz, M_EXEC, M_WAITOK);
423 pack.ep_hdrlen = exec_maxhdrsz;
424 pack.ep_hdrvalid = 0;
425 pack.ep_ndp = &nid;
426 pack.ep_emul_arg = NULL;
427 pack.ep_vmcmds.evs_cnt = 0;
428 pack.ep_vmcmds.evs_used = 0;
429 pack.ep_vap = &attr;
430 pack.ep_flags = 0;
431
432 #ifdef LKM
433 lockmgr(&exec_lock, LK_SHARED, NULL);
434 #endif
435
436 /* see if we can run it. */
437 #ifdef VERIFIED_EXEC
438 if ((error = check_exec(p, &pack, VERIEXEC_DIRECT)) != 0)
439 #else
440 if ((error = check_exec(p, &pack, 0)) != 0)
441 #endif
442 goto freehdr;
443
444 /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
445
446 /* allocate an argument buffer */
447 argp = (char *) uvm_km_valloc_wait(exec_map, NCARGS);
448 #ifdef DIAGNOSTIC
449 if (argp == (vaddr_t) 0)
450 panic("execve: argp == NULL");
451 #endif
452 dp = argp;
453 argc = 0;
454
455 /* copy the fake args list, if there's one, freeing it as we go */
456 if (pack.ep_flags & EXEC_HASARGL) {
457 tmpfap = pack.ep_fa;
458 while (*tmpfap != NULL) {
459 char *cp;
460
461 cp = *tmpfap;
462 while (*cp)
463 *dp++ = *cp++;
464 dp++;
465
466 FREE(*tmpfap, M_EXEC);
467 tmpfap++; argc++;
468 }
469 FREE(pack.ep_fa, M_EXEC);
470 pack.ep_flags &= ~EXEC_HASARGL;
471 }
472
473 /* Now get argv & environment */
474 if (!(cpp = SCARG(uap, argp))) {
475 error = EINVAL;
476 goto bad;
477 }
478
479 if (pack.ep_flags & EXEC_SKIPARG)
480 cpp++;
481
482 while (1) {
483 len = argp + ARG_MAX - dp;
484 if ((error = copyin(cpp, &sp, sizeof(sp))) != 0)
485 goto bad;
486 if (!sp)
487 break;
488 if ((error = copyinstr(sp, dp, len, &len)) != 0) {
489 if (error == ENAMETOOLONG)
490 error = E2BIG;
491 goto bad;
492 }
493 #ifdef KTRACE
494 if (KTRPOINT(p, KTR_EXEC_ARG))
495 ktrkmem(p, KTR_EXEC_ARG, dp, len - 1);
496 #endif
497 dp += len;
498 cpp++;
499 argc++;
500 }
501
502 envc = 0;
503 /* environment need not be there */
504 if ((cpp = SCARG(uap, envp)) != NULL ) {
505 while (1) {
506 len = argp + ARG_MAX - dp;
507 if ((error = copyin(cpp, &sp, sizeof(sp))) != 0)
508 goto bad;
509 if (!sp)
510 break;
511 if ((error = copyinstr(sp, dp, len, &len)) != 0) {
512 if (error == ENAMETOOLONG)
513 error = E2BIG;
514 goto bad;
515 }
516 #ifdef KTRACE
517 if (KTRPOINT(p, KTR_EXEC_ENV))
518 ktrkmem(p, KTR_EXEC_ENV, dp, len - 1);
519 #endif
520 dp += len;
521 cpp++;
522 envc++;
523 }
524 }
525
526 dp = (char *) ALIGN(dp);
527
528 szsigcode = pack.ep_es->es_emul->e_esigcode -
529 pack.ep_es->es_emul->e_sigcode;
530
531 /* Now check if args & environ fit into new stack */
532 if (pack.ep_flags & EXEC_32)
533 len = ((argc + envc + 2 + pack.ep_es->es_arglen) *
534 sizeof(int) + sizeof(int) + dp + STACKGAPLEN +
535 szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
536 - argp;
537 else
538 len = ((argc + envc + 2 + pack.ep_es->es_arglen) *
539 sizeof(char *) + sizeof(int) + dp + STACKGAPLEN +
540 szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
541 - argp;
542
543 len = ALIGN(len); /* make the stack "safely" aligned */
544
545 if (len > pack.ep_ssize) { /* in effect, compare to initial limit */
546 error = ENOMEM;
547 goto bad;
548 }
549
550 /* Get rid of other LWPs/ */
551 p->p_flag |= P_WEXIT; /* XXX hack. lwp-exit stuff wants to see it. */
552 exit_lwps(l);
553 p->p_flag &= ~P_WEXIT;
554 KDASSERT(p->p_nlwps == 1);
555
556 /* This is now LWP 1 */
557 l->l_lid = 1;
558 p->p_nlwpid = 1;
559
560 /* Release any SA state. */
561 if (p->p_sa)
562 sa_release(p);
563
564 /* Remove POSIX timers */
565 timers_free(p, TIMERS_POSIX);
566
567 /* adjust "active stack depth" for process VSZ */
568 pack.ep_ssize = len; /* maybe should go elsewhere, but... */
569
570 /*
571 * Do whatever is necessary to prepare the address space
572 * for remapping. Note that this might replace the current
573 * vmspace with another!
574 */
575 uvmspace_exec(l, pack.ep_vm_minaddr, pack.ep_vm_maxaddr);
576
577 /* record proc's vnode, for use by procfs and others */
578 if (p->p_textvp)
579 vrele(p->p_textvp);
580 VREF(pack.ep_vp);
581 p->p_textvp = pack.ep_vp;
582
583 /* Now map address space */
584 vm = p->p_vmspace;
585 vm->vm_taddr = (caddr_t) pack.ep_taddr;
586 vm->vm_tsize = btoc(pack.ep_tsize);
587 vm->vm_daddr = (caddr_t) pack.ep_daddr;
588 vm->vm_dsize = btoc(pack.ep_dsize);
589 vm->vm_ssize = btoc(pack.ep_ssize);
590 vm->vm_maxsaddr = (caddr_t) pack.ep_maxsaddr;
591 vm->vm_minsaddr = (caddr_t) pack.ep_minsaddr;
592
593 /* create the new process's VM space by running the vmcmds */
594 #ifdef DIAGNOSTIC
595 if (pack.ep_vmcmds.evs_used == 0)
596 panic("execve: no vmcmds");
597 #endif
598 for (i = 0; i < pack.ep_vmcmds.evs_used && !error; i++) {
599 struct exec_vmcmd *vcp;
600
601 vcp = &pack.ep_vmcmds.evs_cmds[i];
602 if (vcp->ev_flags & VMCMD_RELATIVE) {
603 #ifdef DIAGNOSTIC
604 if (base_vcp == NULL)
605 panic("execve: relative vmcmd with no base");
606 if (vcp->ev_flags & VMCMD_BASE)
607 panic("execve: illegal base & relative vmcmd");
608 #endif
609 vcp->ev_addr += base_vcp->ev_addr;
610 }
611 error = (*vcp->ev_proc)(p, vcp);
612 #ifdef DEBUG_EXEC
613 if (error) {
614 int j;
615 struct exec_vmcmd *vp = &pack.ep_vmcmds.evs_cmds[0];
616 for (j = 0; j <= i; j++)
617 uprintf(
618 "vmcmd[%d] = %#lx/%#lx fd@%#lx prot=0%o flags=%d\n",
619 j, vp[j].ev_addr, vp[j].ev_len,
620 vp[j].ev_offset, vp[j].ev_prot,
621 vp[j].ev_flags);
622 }
623 #endif /* DEBUG_EXEC */
624 if (vcp->ev_flags & VMCMD_BASE)
625 base_vcp = vcp;
626 }
627
628 /* free the vmspace-creation commands, and release their references */
629 kill_vmcmds(&pack.ep_vmcmds);
630
631 vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
632 VOP_CLOSE(pack.ep_vp, FREAD, cred, p);
633 vput(pack.ep_vp);
634
635 /* if an error happened, deallocate and punt */
636 if (error) {
637 DPRINTF(("execve: vmcmd %i failed: %d\n", i - 1, error));
638 goto exec_abort;
639 }
640
641 /* remember information about the process */
642 arginfo.ps_nargvstr = argc;
643 arginfo.ps_nenvstr = envc;
644
645 stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
646 STACK_PTHREADSPACE + sizeof(struct ps_strings) + szsigcode),
647 len - (sizeof(struct ps_strings) + szsigcode));
648 #ifdef __MACHINE_STACK_GROWS_UP
649 /*
650 * The copyargs call always copies into lower addresses
651 * first, moving towards higher addresses, starting with
652 * the stack pointer that we give. When the stack grows
653 * down, this puts argc/argv/envp very shallow on the
654 * stack, right at the first user stack pointer, and puts
655 * STACKGAPLEN very deep in the stack. When the stack
656 * grows up, the situation is reversed.
657 *
658 * Normally, this is no big deal. But the ld_elf.so _rtld()
659 * function expects to be called with a single pointer to
660 * a region that has a few words it can stash values into,
661 * followed by argc/argv/envp. When the stack grows down,
662 * it's easy to decrement the stack pointer a little bit to
663 * allocate the space for these few words and pass the new
664 * stack pointer to _rtld. When the stack grows up, however,
665 * a few words before argc is part of the signal trampoline, XXX
666 * so we have a problem.
667 *
668 * Instead of changing how _rtld works, we take the easy way
669 * out and steal 32 bytes before we call copyargs. This
670 * space is effectively stolen from STACKGAPLEN.
671 */
672 stack += 32;
673 #endif /* __MACHINE_STACK_GROWS_UP */
674
675 /* Now copy argc, args & environ to new stack */
676 error = (*pack.ep_es->es_copyargs)(p, &pack, &arginfo, &stack, argp);
677 if (error) {
678 DPRINTF(("execve: copyargs failed %d\n", error));
679 goto exec_abort;
680 }
681 /* Move the stack back to original point */
682 stack = (char *)STACK_GROW(vm->vm_minsaddr, len);
683
684 /* fill process ps_strings info */
685 p->p_psstr = (struct ps_strings *)
686 STACK_ALLOC(STACK_GROW(vm->vm_minsaddr, STACK_PTHREADSPACE),
687 sizeof(struct ps_strings));
688 p->p_psargv = offsetof(struct ps_strings, ps_argvstr);
689 p->p_psnargv = offsetof(struct ps_strings, ps_nargvstr);
690 p->p_psenv = offsetof(struct ps_strings, ps_envstr);
691 p->p_psnenv = offsetof(struct ps_strings, ps_nenvstr);
692
693 /* copy out the process's ps_strings structure */
694 if ((error = copyout(&arginfo, (char *)p->p_psstr,
695 sizeof(arginfo))) != 0) {
696 DPRINTF(("execve: ps_strings copyout %p->%p size %ld failed\n",
697 &arginfo, (char *)p->p_psstr, (long)sizeof(arginfo)));
698 goto exec_abort;
699 }
700
701 stopprofclock(p); /* stop profiling */
702 fdcloseexec(p); /* handle close on exec */
703 execsigs(p); /* reset catched signals */
704
705 l->l_ctxlink = NULL; /* reset ucontext link */
706
707 /* set command name & other accounting info */
708 len = min(nid.ni_cnd.cn_namelen, MAXCOMLEN);
709 memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, len);
710 p->p_comm[len] = 0;
711 p->p_acflag &= ~AFORK;
712
713 p->p_flag |= P_EXEC;
714 if (p->p_flag & P_PPWAIT) {
715 p->p_flag &= ~P_PPWAIT;
716 wakeup((caddr_t) p->p_pptr);
717 }
718
719 /*
720 * deal with set[ug]id.
721 * MNT_NOSUID has already been used to disable s[ug]id.
722 */
723 if ((p->p_flag & P_TRACED) == 0 &&
724
725 (((attr.va_mode & S_ISUID) != 0 &&
726 p->p_ucred->cr_uid != attr.va_uid) ||
727
728 ((attr.va_mode & S_ISGID) != 0 &&
729 p->p_ucred->cr_gid != attr.va_gid))) {
730 /*
731 * Mark the process as SUGID before we do
732 * anything that might block.
733 */
734 p_sugid(p);
735
736 /* Make sure file descriptors 0..2 are in use. */
737 if ((error = fdcheckstd(p)) != 0)
738 goto exec_abort;
739
740 p->p_ucred = crcopy(cred);
741 #ifdef KTRACE
742 /*
743 * If process is being ktraced, turn off - unless
744 * root set it.
745 */
746 if (p->p_tracep && !(p->p_traceflag & KTRFAC_ROOT))
747 ktrderef(p);
748 #endif
749 if (attr.va_mode & S_ISUID)
750 p->p_ucred->cr_uid = attr.va_uid;
751 if (attr.va_mode & S_ISGID)
752 p->p_ucred->cr_gid = attr.va_gid;
753 } else {
754 if (p->p_ucred->cr_uid == p->p_cred->p_ruid &&
755 p->p_ucred->cr_gid == p->p_cred->p_rgid)
756 p->p_flag &= ~P_SUGID;
757 }
758 p->p_cred->p_svuid = p->p_ucred->cr_uid;
759 p->p_cred->p_svgid = p->p_ucred->cr_gid;
760
761 #if defined(__HAVE_RAS)
762 /*
763 * Remove all RASs from the address space.
764 */
765 ras_purgeall(p);
766 #endif
767
768 doexechooks(p);
769
770 uvm_km_free_wakeup(exec_map, (vaddr_t) argp, NCARGS);
771
772 PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
773
774 /* notify others that we exec'd */
775 KNOTE(&p->p_klist, NOTE_EXEC);
776
777 /* setup new registers and do misc. setup. */
778 (*pack.ep_es->es_emul->e_setregs)(l, &pack, (u_long) stack);
779 if (pack.ep_es->es_setregs)
780 (*pack.ep_es->es_setregs)(l, &pack, (u_long) stack);
781
782 /* map the process's signal trampoline code */
783 if (exec_sigcode_map(p, pack.ep_es->es_emul))
784 goto exec_abort;
785
786 if (p->p_flag & P_TRACED)
787 psignal(p, SIGTRAP);
788
789 free(pack.ep_hdr, M_EXEC);
790
791 /*
792 * Call emulation specific exec hook. This can setup per-process
793 * p->p_emuldata or do any other per-process stuff an emulation needs.
794 *
795 * If we are executing process of different emulation than the
796 * original forked process, call e_proc_exit() of the old emulation
797 * first, then e_proc_exec() of new emulation. If the emulation is
798 * same, the exec hook code should deallocate any old emulation
799 * resources held previously by this process.
800 */
801 if (p->p_emul && p->p_emul->e_proc_exit
802 && p->p_emul != pack.ep_es->es_emul)
803 (*p->p_emul->e_proc_exit)(p);
804
805 /*
806 * Call exec hook. Emulation code may NOT store reference to anything
807 * from &pack.
808 */
809 if (pack.ep_es->es_emul->e_proc_exec)
810 (*pack.ep_es->es_emul->e_proc_exec)(p, &pack);
811
812 /* update p_emul, the old value is no longer needed */
813 p->p_emul = pack.ep_es->es_emul;
814
815 /* ...and the same for p_execsw */
816 p->p_execsw = pack.ep_es;
817
818 #ifdef __HAVE_SYSCALL_INTERN
819 (*p->p_emul->e_syscall_intern)(p);
820 #endif
821 #ifdef KTRACE
822 if (KTRPOINT(p, KTR_EMUL))
823 ktremul(p);
824 #endif
825
826 #ifdef LKM
827 lockmgr(&exec_lock, LK_RELEASE, NULL);
828 #endif
829 p->p_flag &= ~P_INEXEC;
830
831 if (p->p_flag & P_STOPEXEC) {
832 int s;
833
834 sigminusset(&contsigmask, &p->p_sigctx.ps_siglist);
835 SCHED_LOCK(s);
836 p->p_pptr->p_nstopchild++;
837 p->p_stat = SSTOP;
838 l->l_stat = LSSTOP;
839 p->p_nrlwps--;
840 mi_switch(l, NULL);
841 SCHED_ASSERT_UNLOCKED();
842 splx(s);
843 }
844
845 return (EJUSTRETURN);
846
847 bad:
848 p->p_flag &= ~P_INEXEC;
849 /* free the vmspace-creation commands, and release their references */
850 kill_vmcmds(&pack.ep_vmcmds);
851 /* kill any opened file descriptor, if necessary */
852 if (pack.ep_flags & EXEC_HASFD) {
853 pack.ep_flags &= ~EXEC_HASFD;
854 (void) fdrelease(p, pack.ep_fd);
855 }
856 /* close and put the exec'd file */
857 vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
858 VOP_CLOSE(pack.ep_vp, FREAD, cred, p);
859 vput(pack.ep_vp);
860 PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
861 uvm_km_free_wakeup(exec_map, (vaddr_t) argp, NCARGS);
862
863 freehdr:
864 l->l_flag |= oldlwpflags;
865 p->p_flag &= ~P_INEXEC;
866 #ifdef LKM
867 lockmgr(&exec_lock, LK_RELEASE, NULL);
868 #endif
869
870 free(pack.ep_hdr, M_EXEC);
871 return error;
872
873 exec_abort:
874 p->p_flag &= ~P_INEXEC;
875 #ifdef LKM
876 lockmgr(&exec_lock, LK_RELEASE, NULL);
877 #endif
878
879 /*
880 * the old process doesn't exist anymore. exit gracefully.
881 * get rid of the (new) address space we have created, if any, get rid
882 * of our namei data and vnode, and exit noting failure
883 */
884 uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
885 VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
886 if (pack.ep_emul_arg)
887 FREE(pack.ep_emul_arg, M_TEMP);
888 PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
889 uvm_km_free_wakeup(exec_map, (vaddr_t) argp, NCARGS);
890 free(pack.ep_hdr, M_EXEC);
891 exit1(l, W_EXITCODE(error, SIGABRT));
892
893 /* NOTREACHED */
894 return 0;
895 }
896
897
898 int
899 copyargs(struct proc *p, struct exec_package *pack, struct ps_strings *arginfo,
900 char **stackp, void *argp)
901 {
902 char **cpp, *dp, *sp;
903 size_t len;
904 void *nullp;
905 long argc, envc;
906 int error;
907
908 cpp = (char **)*stackp;
909 nullp = NULL;
910 argc = arginfo->ps_nargvstr;
911 envc = arginfo->ps_nenvstr;
912 if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0)
913 return error;
914
915 dp = (char *) (cpp + argc + envc + 2 + pack->ep_es->es_arglen);
916 sp = argp;
917
918 /* XXX don't copy them out, remap them! */
919 arginfo->ps_argvstr = cpp; /* remember location of argv for later */
920
921 for (; --argc >= 0; sp += len, dp += len)
922 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
923 (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
924 return error;
925
926 if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
927 return error;
928
929 arginfo->ps_envstr = cpp; /* remember location of envp for later */
930
931 for (; --envc >= 0; sp += len, dp += len)
932 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
933 (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
934 return error;
935
936 if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
937 return error;
938
939 *stackp = (char *)cpp;
940 return 0;
941 }
942
943 #ifdef LKM
944 /*
945 * Find an emulation of given name in list of emulations.
946 * Needs to be called with the exec_lock held.
947 */
948 const struct emul *
949 emul_search(const char *name)
950 {
951 struct emul_entry *it;
952
953 LIST_FOREACH(it, &el_head, el_list) {
954 if (strcmp(name, it->el_emul->e_name) == 0)
955 return it->el_emul;
956 }
957
958 return NULL;
959 }
960
961 /*
962 * Add an emulation to list, if it's not there already.
963 */
964 int
965 emul_register(const struct emul *emul, int ro_entry)
966 {
967 struct emul_entry *ee;
968 int error;
969
970 error = 0;
971 lockmgr(&exec_lock, LK_SHARED, NULL);
972
973 if (emul_search(emul->e_name)) {
974 error = EEXIST;
975 goto out;
976 }
977
978 MALLOC(ee, struct emul_entry *, sizeof(struct emul_entry),
979 M_EXEC, M_WAITOK);
980 ee->el_emul = emul;
981 ee->ro_entry = ro_entry;
982 LIST_INSERT_HEAD(&el_head, ee, el_list);
983
984 out:
985 lockmgr(&exec_lock, LK_RELEASE, NULL);
986 return error;
987 }
988
989 /*
990 * Remove emulation with name 'name' from list of supported emulations.
991 */
992 int
993 emul_unregister(const char *name)
994 {
995 const struct proclist_desc *pd;
996 struct emul_entry *it;
997 int i, error;
998 struct proc *ptmp;
999
1000 error = 0;
1001 lockmgr(&exec_lock, LK_SHARED, NULL);
1002
1003 LIST_FOREACH(it, &el_head, el_list) {
1004 if (strcmp(it->el_emul->e_name, name) == 0)
1005 break;
1006 }
1007
1008 if (!it) {
1009 error = ENOENT;
1010 goto out;
1011 }
1012
1013 if (it->ro_entry) {
1014 error = EBUSY;
1015 goto out;
1016 }
1017
1018 /* test if any execw[] entry is still using this */
1019 for(i=0; i < nexecs; i++) {
1020 if (execsw[i]->es_emul == it->el_emul) {
1021 error = EBUSY;
1022 goto out;
1023 }
1024 }
1025
1026 /*
1027 * Test if any process is running under this emulation - since
1028 * emul_unregister() is running quite sendomly, it's better
1029 * to do expensive check here than to use any locking.
1030 */
1031 proclist_lock_read();
1032 for (pd = proclists; pd->pd_list != NULL && !error; pd++) {
1033 PROCLIST_FOREACH(ptmp, pd->pd_list) {
1034 if (ptmp->p_emul == it->el_emul) {
1035 error = EBUSY;
1036 break;
1037 }
1038 }
1039 }
1040 proclist_unlock_read();
1041
1042 if (error)
1043 goto out;
1044
1045
1046 /* entry is not used, remove it */
1047 LIST_REMOVE(it, el_list);
1048 FREE(it, M_EXEC);
1049
1050 out:
1051 lockmgr(&exec_lock, LK_RELEASE, NULL);
1052 return error;
1053 }
1054
1055 /*
1056 * Add execsw[] entry.
1057 */
1058 int
1059 exec_add(struct execsw *esp, const char *e_name)
1060 {
1061 struct exec_entry *it;
1062 int error;
1063
1064 error = 0;
1065 lockmgr(&exec_lock, LK_EXCLUSIVE, NULL);
1066
1067 if (!esp->es_emul) {
1068 esp->es_emul = emul_search(e_name);
1069 if (!esp->es_emul) {
1070 error = ENOENT;
1071 goto out;
1072 }
1073 }
1074
1075 LIST_FOREACH(it, &ex_head, ex_list) {
1076 /* assume tuple (makecmds, probe_func, emulation) is unique */
1077 if (it->es->es_makecmds == esp->es_makecmds
1078 && it->es->u.elf_probe_func == esp->u.elf_probe_func
1079 && it->es->es_emul == esp->es_emul) {
1080 error = EEXIST;
1081 goto out;
1082 }
1083 }
1084
1085 /* if we got here, the entry doesn't exist yet */
1086 MALLOC(it, struct exec_entry *, sizeof(struct exec_entry),
1087 M_EXEC, M_WAITOK);
1088 it->es = esp;
1089 LIST_INSERT_HEAD(&ex_head, it, ex_list);
1090
1091 /* update execsw[] */
1092 exec_init(0);
1093
1094 out:
1095 lockmgr(&exec_lock, LK_RELEASE, NULL);
1096 return error;
1097 }
1098
1099 /*
1100 * Remove execsw[] entry.
1101 */
1102 int
1103 exec_remove(const struct execsw *esp)
1104 {
1105 struct exec_entry *it;
1106 int error;
1107
1108 error = 0;
1109 lockmgr(&exec_lock, LK_EXCLUSIVE, NULL);
1110
1111 LIST_FOREACH(it, &ex_head, ex_list) {
1112 /* assume tuple (makecmds, probe_func, emulation) is unique */
1113 if (it->es->es_makecmds == esp->es_makecmds
1114 && it->es->u.elf_probe_func == esp->u.elf_probe_func
1115 && it->es->es_emul == esp->es_emul)
1116 break;
1117 }
1118 if (!it) {
1119 error = ENOENT;
1120 goto out;
1121 }
1122
1123 /* remove item from list and free resources */
1124 LIST_REMOVE(it, ex_list);
1125 FREE(it, M_EXEC);
1126
1127 /* update execsw[] */
1128 exec_init(0);
1129
1130 out:
1131 lockmgr(&exec_lock, LK_RELEASE, NULL);
1132 return error;
1133 }
1134
1135 static void
1136 link_es(struct execsw_entry **listp, const struct execsw *esp)
1137 {
1138 struct execsw_entry *et, *e1;
1139
1140 MALLOC(et, struct execsw_entry *, sizeof(struct execsw_entry),
1141 M_TEMP, M_WAITOK);
1142 et->next = NULL;
1143 et->es = esp;
1144 if (*listp == NULL) {
1145 *listp = et;
1146 return;
1147 }
1148
1149 switch(et->es->es_prio) {
1150 case EXECSW_PRIO_FIRST:
1151 /* put new entry as the first */
1152 et->next = *listp;
1153 *listp = et;
1154 break;
1155 case EXECSW_PRIO_ANY:
1156 /* put new entry after all *_FIRST and *_ANY entries */
1157 for(e1 = *listp; e1->next
1158 && e1->next->es->es_prio != EXECSW_PRIO_LAST;
1159 e1 = e1->next);
1160 et->next = e1->next;
1161 e1->next = et;
1162 break;
1163 case EXECSW_PRIO_LAST:
1164 /* put new entry as the last one */
1165 for(e1 = *listp; e1->next; e1 = e1->next);
1166 e1->next = et;
1167 break;
1168 default:
1169 #ifdef DIAGNOSTIC
1170 panic("execw[] entry with unknown priority %d found",
1171 et->es->es_prio);
1172 #endif
1173 break;
1174 }
1175 }
1176
1177 /*
1178 * Initialize exec structures. If init_boot is true, also does necessary
1179 * one-time initialization (it's called from main() that way).
1180 * Once system is multiuser, this should be called with exec_lock held,
1181 * i.e. via exec_{add|remove}().
1182 */
1183 int
1184 exec_init(int init_boot)
1185 {
1186 const struct execsw **new_es, * const *old_es;
1187 struct execsw_entry *list, *e1;
1188 struct exec_entry *e2;
1189 int i, es_sz;
1190
1191 if (init_boot) {
1192 /* do one-time initializations */
1193 lockinit(&exec_lock, PWAIT, "execlck", 0, 0);
1194
1195 /* register compiled-in emulations */
1196 for(i=0; i < nexecs_builtin; i++) {
1197 if (execsw_builtin[i].es_emul)
1198 emul_register(execsw_builtin[i].es_emul, 1);
1199 }
1200 #ifdef DIAGNOSTIC
1201 if (i == 0)
1202 panic("no emulations found in execsw_builtin[]");
1203 #endif
1204 }
1205
1206 /*
1207 * Build execsw[] array from builtin entries and entries added
1208 * at runtime.
1209 */
1210 list = NULL;
1211 for(i=0; i < nexecs_builtin; i++)
1212 link_es(&list, &execsw_builtin[i]);
1213
1214 /* Add dynamically loaded entries */
1215 es_sz = nexecs_builtin;
1216 LIST_FOREACH(e2, &ex_head, ex_list) {
1217 link_es(&list, e2->es);
1218 es_sz++;
1219 }
1220
1221 /*
1222 * Now that we have sorted all execw entries, create new execsw[]
1223 * and free no longer needed memory in the process.
1224 */
1225 new_es = malloc(es_sz * sizeof(struct execsw *), M_EXEC, M_WAITOK);
1226 for(i=0; list; i++) {
1227 new_es[i] = list->es;
1228 e1 = list->next;
1229 FREE(list, M_TEMP);
1230 list = e1;
1231 }
1232
1233 /*
1234 * New execsw[] array built, now replace old execsw[] and free
1235 * used memory.
1236 */
1237 old_es = execsw;
1238 execsw = new_es;
1239 nexecs = es_sz;
1240 if (old_es)
1241 free((void *)old_es, M_EXEC);
1242
1243 /*
1244 * Figure out the maximum size of an exec header.
1245 */
1246 exec_maxhdrsz = 0;
1247 for (i = 0; i < nexecs; i++) {
1248 if (execsw[i]->es_hdrsz > exec_maxhdrsz)
1249 exec_maxhdrsz = execsw[i]->es_hdrsz;
1250 }
1251
1252 return 0;
1253 }
1254 #endif
1255
1256 #ifndef LKM
1257 /*
1258 * Simplified exec_init() for kernels without LKMs. Only initialize
1259 * exec_maxhdrsz and execsw[].
1260 */
1261 int
1262 exec_init(int init_boot)
1263 {
1264 int i;
1265
1266 #ifdef DIAGNOSTIC
1267 if (!init_boot)
1268 panic("exec_init(): called with init_boot == 0");
1269 #endif
1270
1271 /* do one-time initializations */
1272 nexecs = nexecs_builtin;
1273 execsw = malloc(nexecs*sizeof(struct execsw *), M_EXEC, M_WAITOK);
1274
1275 /*
1276 * Fill in execsw[] and figure out the maximum size of an exec header.
1277 */
1278 exec_maxhdrsz = 0;
1279 for(i=0; i < nexecs; i++) {
1280 execsw[i] = &execsw_builtin[i];
1281 if (execsw_builtin[i].es_hdrsz > exec_maxhdrsz)
1282 exec_maxhdrsz = execsw_builtin[i].es_hdrsz;
1283 }
1284
1285 return 0;
1286
1287 }
1288 #endif /* !LKM */
1289
1290 static int
1291 exec_sigcode_map(struct proc *p, const struct emul *e)
1292 {
1293 vaddr_t va;
1294 vsize_t sz;
1295 int error;
1296 struct uvm_object *uobj;
1297
1298 sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
1299
1300 if (e->e_sigobject == NULL || sz == 0) {
1301 return 0;
1302 }
1303
1304 /*
1305 * If we don't have a sigobject for this emulation, create one.
1306 *
1307 * sigobject is an anonymous memory object (just like SYSV shared
1308 * memory) that we keep a permanent reference to and that we map
1309 * in all processes that need this sigcode. The creation is simple,
1310 * we create an object, add a permanent reference to it, map it in
1311 * kernel space, copy out the sigcode to it and unmap it.
1312 * We map it with PROT_READ|PROT_EXEC into the process just
1313 * the way sys_mmap() would map it.
1314 */
1315
1316 uobj = *e->e_sigobject;
1317 if (uobj == NULL) {
1318 uobj = uao_create(sz, 0);
1319 (*uobj->pgops->pgo_reference)(uobj);
1320 va = vm_map_min(kernel_map);
1321 if ((error = uvm_map(kernel_map, &va, round_page(sz),
1322 uobj, 0, 0,
1323 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
1324 UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
1325 printf("kernel mapping failed %d\n", error);
1326 (*uobj->pgops->pgo_detach)(uobj);
1327 return (error);
1328 }
1329 memcpy((void *)va, e->e_sigcode, sz);
1330 #ifdef PMAP_NEED_PROCWR
1331 pmap_procwr(&proc0, va, sz);
1332 #endif
1333 uvm_unmap(kernel_map, va, va + round_page(sz));
1334 *e->e_sigobject = uobj;
1335 }
1336
1337 /* Just a hint to uvm_map where to put it. */
1338 va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
1339 round_page(sz));
1340
1341 #ifdef __alpha__
1342 /*
1343 * Tru64 puts /sbin/loader at the end of user virtual memory,
1344 * which causes the above calculation to put the sigcode at
1345 * an invalid address. Put it just below the text instead.
1346 */
1347 if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1348 va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
1349 }
1350 #endif
1351
1352 (*uobj->pgops->pgo_reference)(uobj);
1353 error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
1354 uobj, 0, 0,
1355 UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
1356 UVM_ADV_RANDOM, 0));
1357 if (error) {
1358 (*uobj->pgops->pgo_detach)(uobj);
1359 return (error);
1360 }
1361 p->p_sigctx.ps_sigcode = (void *)va;
1362 return (0);
1363 }
Cache object: b32d7b63d37f0b2ec2b90d5fd1533671
|