FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_exec.c
1 /* $NetBSD: kern_exec.c,v 1.280.4.3 2009/04/01 21:03:04 snj Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*-
30 * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
31 * Copyright (C) 1992 Wolfgang Solfrank.
32 * Copyright (C) 1992 TooLs GmbH.
33 * All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by TooLs GmbH.
46 * 4. The name of TooLs GmbH may not be used to endorse or promote products
47 * derived from this software without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
50 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
51 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
52 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
53 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
54 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
55 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
56 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
57 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
58 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
59 */
60
61 #include <sys/cdefs.h>
62 __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.280.4.3 2009/04/01 21:03:04 snj Exp $");
63
64 #include "opt_ktrace.h"
65 #include "opt_syscall_debug.h"
66 #include "opt_compat_netbsd.h"
67 #include "veriexec.h"
68 #include "opt_pax.h"
69 #include "opt_sa.h"
70
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/filedesc.h>
74 #include <sys/kernel.h>
75 #include <sys/proc.h>
76 #include <sys/mount.h>
77 #include <sys/malloc.h>
78 #include <sys/kmem.h>
79 #include <sys/namei.h>
80 #include <sys/vnode.h>
81 #include <sys/file.h>
82 #include <sys/acct.h>
83 #include <sys/exec.h>
84 #include <sys/ktrace.h>
85 #include <sys/uidinfo.h>
86 #include <sys/wait.h>
87 #include <sys/mman.h>
88 #include <sys/ras.h>
89 #include <sys/signalvar.h>
90 #include <sys/stat.h>
91 #include <sys/syscall.h>
92 #include <sys/kauth.h>
93 #include <sys/lwpctl.h>
94 #include <sys/pax.h>
95 #include <sys/cpu.h>
96
97 #include <sys/sa.h>
98 #include <sys/savar.h>
99 #include <sys/syscallargs.h>
100 #if NVERIEXEC > 0
101 #include <sys/verified_exec.h>
102 #endif /* NVERIEXEC > 0 */
103
104 #include <uvm/uvm_extern.h>
105
106 #include <machine/reg.h>
107
108 #include <compat/common/compat_util.h>
109
110 static int exec_sigcode_map(struct proc *, const struct emul *);
111
112 #ifdef DEBUG_EXEC
113 #define DPRINTF(a) uprintf a
114 #else
115 #define DPRINTF(a)
116 #endif /* DEBUG_EXEC */
117
118 /*
119 * Exec function switch:
120 *
121 * Note that each makecmds function is responsible for loading the
122 * exec package with the necessary functions for any exec-type-specific
123 * handling.
124 *
125 * Functions for specific exec types should be defined in their own
126 * header file.
127 */
128 extern const struct execsw execsw_builtin[];
129 extern int nexecs_builtin;
130 static const struct execsw **execsw = NULL;
131 static int nexecs;
132
133 u_int exec_maxhdrsz; /* must not be static - netbsd32 needs it */
134
135 #ifdef LKM
136 /* list of supported emulations */
137 static
138 LIST_HEAD(emlist_head, emul_entry) el_head = LIST_HEAD_INITIALIZER(el_head);
139 struct emul_entry {
140 LIST_ENTRY(emul_entry) el_list;
141 const struct emul *el_emul;
142 int ro_entry;
143 };
144
145 /* list of dynamically loaded execsw entries */
146 static
147 LIST_HEAD(execlist_head, exec_entry) ex_head = LIST_HEAD_INITIALIZER(ex_head);
148 struct exec_entry {
149 LIST_ENTRY(exec_entry) ex_list;
150 const struct execsw *es;
151 };
152
153 /* structure used for building execw[] */
154 struct execsw_entry {
155 struct execsw_entry *next;
156 const struct execsw *es;
157 };
158 #endif /* LKM */
159
160 #ifdef SYSCALL_DEBUG
161 extern const char * const syscallnames[];
162 #endif
163
164 #ifdef COMPAT_16
165 extern char sigcode[], esigcode[];
166 struct uvm_object *emul_netbsd_object;
167 #endif
168
169 #ifndef __HAVE_SYSCALL_INTERN
170 void syscall(void);
171 #endif
172
173 #ifdef KERN_SA
174 static const struct sa_emul saemul_netbsd = {
175 sizeof(ucontext_t),
176 sizeof(struct sa_t),
177 sizeof(struct sa_t *),
178 NULL,
179 NULL,
180 cpu_upcall,
181 (void (*)(struct lwp *, void *))getucontext_sa,
182 sa_ucsp
183 };
184 #endif /* KERN_SA */
185
186 /* NetBSD emul struct */
187 const struct emul emul_netbsd = {
188 "netbsd",
189 NULL, /* emulation path */
190 #ifndef __HAVE_MINIMAL_EMUL
191 EMUL_HAS_SYS___syscall,
192 NULL,
193 SYS_syscall,
194 SYS_NSYSENT,
195 #endif
196 sysent,
197 #ifdef SYSCALL_DEBUG
198 syscallnames,
199 #else
200 NULL,
201 #endif
202 sendsig,
203 trapsignal,
204 NULL,
205 #ifdef COMPAT_16
206 sigcode,
207 esigcode,
208 &emul_netbsd_object,
209 #else
210 NULL,
211 NULL,
212 NULL,
213 #endif
214 setregs,
215 NULL,
216 NULL,
217 NULL,
218 NULL,
219 NULL,
220 #ifdef __HAVE_SYSCALL_INTERN
221 syscall_intern,
222 #else
223 syscall,
224 #endif
225 NULL,
226 NULL,
227
228 uvm_default_mapaddr,
229 NULL,
230 #ifdef KERN_SA
231 &saemul_netbsd,
232 #else
233 NULL,
234 #endif
235 sizeof(ucontext_t),
236 startlwp,
237 };
238
239 /*
240 * Exec lock. Used to control access to execsw[] structures.
241 * This must not be static so that netbsd32 can access it, too.
242 */
243 krwlock_t exec_lock;
244
245 #ifdef LKM
246 static void link_es(struct execsw_entry **, const struct execsw *);
247 #endif /* LKM */
248
249 static kmutex_t sigobject_lock;
250
251 static void *
252 exec_pool_alloc(struct pool *pp, int flags)
253 {
254
255 return (void *)uvm_km_alloc(kernel_map, NCARGS, 0,
256 UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
257 }
258
259 static void
260 exec_pool_free(struct pool *pp, void *addr)
261 {
262
263 uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
264 }
265
266 static struct pool exec_pool;
267
268 static struct pool_allocator exec_palloc = {
269 .pa_alloc = exec_pool_alloc,
270 .pa_free = exec_pool_free,
271 .pa_pagesz = NCARGS
272 };
273
274 /*
275 * check exec:
276 * given an "executable" described in the exec package's namei info,
277 * see what we can do with it.
278 *
279 * ON ENTRY:
280 * exec package with appropriate namei info
281 * lwp pointer of exec'ing lwp
282 * NO SELF-LOCKED VNODES
283 *
284 * ON EXIT:
285 * error: nothing held, etc. exec header still allocated.
286 * ok: filled exec package, executable's vnode (unlocked).
287 *
288 * EXEC SWITCH ENTRY:
289 * Locked vnode to check, exec package, proc.
290 *
291 * EXEC SWITCH EXIT:
292 * ok: return 0, filled exec package, executable's vnode (unlocked).
293 * error: destructive:
294 * everything deallocated execept exec header.
295 * non-destructive:
296 * error code, executable's vnode (unlocked),
297 * exec header unmodified.
298 */
299 int
300 /*ARGSUSED*/
301 check_exec(struct lwp *l, struct exec_package *epp)
302 {
303 int error, i;
304 struct vnode *vp;
305 struct nameidata *ndp;
306 size_t resid;
307
308 ndp = epp->ep_ndp;
309 ndp->ni_cnd.cn_nameiop = LOOKUP;
310 ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF | SAVENAME | TRYEMULROOT;
311 /* first get the vnode */
312 if ((error = namei(ndp)) != 0)
313 return error;
314 epp->ep_vp = vp = ndp->ni_vp;
315
316 /* check access and type */
317 if (vp->v_type != VREG) {
318 error = EACCES;
319 goto bad1;
320 }
321 if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
322 goto bad1;
323
324 /* get attributes */
325 if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
326 goto bad1;
327
328 /* Check mount point */
329 if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
330 error = EACCES;
331 goto bad1;
332 }
333 if (vp->v_mount->mnt_flag & MNT_NOSUID)
334 epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
335
336 /* try to open it */
337 if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
338 goto bad1;
339
340 /* unlock vp, since we need it unlocked from here on out. */
341 VOP_UNLOCK(vp, 0);
342
343 #if NVERIEXEC > 0
344 error = veriexec_verify(l, vp, ndp->ni_cnd.cn_pnbuf,
345 epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
346 NULL);
347 if (error)
348 goto bad2;
349 #endif /* NVERIEXEC > 0 */
350
351 #ifdef PAX_SEGVGUARD
352 error = pax_segvguard(l, vp, ndp->ni_cnd.cn_pnbuf, false);
353 if (error)
354 goto bad2;
355 #endif /* PAX_SEGVGUARD */
356
357 /* now we have the file, get the exec header */
358 error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
359 UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
360 if (error)
361 goto bad2;
362 epp->ep_hdrvalid = epp->ep_hdrlen - resid;
363
364 /*
365 * Set up default address space limits. Can be overridden
366 * by individual exec packages.
367 *
368 * XXX probably should be all done in the exec packages.
369 */
370 epp->ep_vm_minaddr = VM_MIN_ADDRESS;
371 epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
372 /*
373 * set up the vmcmds for creation of the process
374 * address space
375 */
376 error = ENOEXEC;
377 for (i = 0; i < nexecs; i++) {
378 int newerror;
379
380 epp->ep_esch = execsw[i];
381 newerror = (*execsw[i]->es_makecmds)(l, epp);
382
383 if (!newerror) {
384 /* Seems ok: check that entry point is sane */
385 if (epp->ep_entry > VM_MAXUSER_ADDRESS) {
386 error = ENOEXEC;
387 break;
388 }
389
390 /* check limits */
391 if ((epp->ep_tsize > MAXTSIZ) ||
392 (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
393 [RLIMIT_DATA].rlim_cur)) {
394 error = ENOMEM;
395 break;
396 }
397 return 0;
398 }
399
400 if (epp->ep_emul_root != NULL) {
401 vrele(epp->ep_emul_root);
402 epp->ep_emul_root = NULL;
403 }
404 if (epp->ep_interp != NULL) {
405 vrele(epp->ep_interp);
406 epp->ep_interp = NULL;
407 }
408
409 /* make sure the first "interesting" error code is saved. */
410 if (error == ENOEXEC)
411 error = newerror;
412
413 if (epp->ep_flags & EXEC_DESTR)
414 /* Error from "#!" code, tidied up by recursive call */
415 return error;
416 }
417
418 /* not found, error */
419
420 /*
421 * free any vmspace-creation commands,
422 * and release their references
423 */
424 kill_vmcmds(&epp->ep_vmcmds);
425
426 bad2:
427 /*
428 * close and release the vnode, restore the old one, free the
429 * pathname buf, and punt.
430 */
431 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
432 VOP_CLOSE(vp, FREAD, l->l_cred);
433 vput(vp);
434 PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
435 return error;
436
437 bad1:
438 /*
439 * free the namei pathname buffer, and put the vnode
440 * (which we don't yet have open).
441 */
442 vput(vp); /* was still locked */
443 PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
444 return error;
445 }
446
447 #ifdef __MACHINE_STACK_GROWS_UP
448 #define STACK_PTHREADSPACE NBPG
449 #else
450 #define STACK_PTHREADSPACE 0
451 #endif
452
453 static int
454 execve_fetch_element(char * const *array, size_t index, char **value)
455 {
456 return copyin(array + index, value, sizeof(*value));
457 }
458
459 /*
460 * exec system call
461 */
462 /* ARGSUSED */
463 int
464 sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
465 {
466 /* {
467 syscallarg(const char *) path;
468 syscallarg(char * const *) argp;
469 syscallarg(char * const *) envp;
470 } */
471
472 return execve1(l, SCARG(uap, path), SCARG(uap, argp),
473 SCARG(uap, envp), execve_fetch_element);
474 }
475
476 int
477 execve1(struct lwp *l, const char *path, char * const *args,
478 char * const *envs, execve_fetch_element_t fetch_element)
479 {
480 int error;
481 struct exec_package pack;
482 struct nameidata nid;
483 struct vattr attr;
484 struct proc *p;
485 char *argp;
486 char *dp, *sp;
487 long argc, envc;
488 size_t i, len;
489 char *stack;
490 struct ps_strings arginfo;
491 struct ps_strings *aip = &arginfo;
492 struct vmspace *vm;
493 struct exec_fakearg *tmpfap;
494 int szsigcode;
495 struct exec_vmcmd *base_vcp;
496 int oldlwpflags;
497 ksiginfo_t ksi;
498 ksiginfoq_t kq;
499 char *pathbuf;
500 size_t pathbuflen;
501 uid_t uid;
502
503 p = l->l_proc;
504
505 /*
506 * Check if we have exceeded our number of processes limit.
507 * This is so that we handle the case where a root daemon
508 * forked, ran setuid to become the desired user and is trying
509 * to exec. The obvious place to do the reference counting check
510 * is setuid(), but we don't do the reference counting check there
511 * like other OS's do because then all the programs that use setuid()
512 * must be modified to check the return code of setuid() and exit().
513 * It is dangerous to make setuid() fail, because it fails open and
514 * the program will continue to run as root. If we make it succeed
515 * and return an error code, again we are not enforcing the limit.
516 * The best place to enforce the limit is here, when the process tries
517 * to execute a new image, because eventually the process will need
518 * to call exec in order to do something useful.
519 */
520
521 if ((p->p_flag & PK_SUGID) && (uid = kauth_cred_getuid(l->l_cred)) != 0
522 && chgproccnt(uid, 0) > p->p_rlimit[RLIMIT_NPROC].rlim_cur)
523 return EAGAIN;
524
525 oldlwpflags = l->l_flag & (LW_SA | LW_SA_UPCALL);
526 if (l->l_flag & LW_SA) {
527 lwp_lock(l);
528 l->l_flag &= ~(LW_SA | LW_SA_UPCALL);
529 lwp_unlock(l);
530 }
531
532 /*
533 * Drain existing references and forbid new ones. The process
534 * should be left alone until we're done here. This is necessary
535 * to avoid race conditions - e.g. in ptrace() - that might allow
536 * a local user to illicitly obtain elevated privileges.
537 */
538 rw_enter(&p->p_reflock, RW_WRITER);
539
540 base_vcp = NULL;
541 /*
542 * Init the namei data to point the file user's program name.
543 * This is done here rather than in check_exec(), so that it's
544 * possible to override this settings if any of makecmd/probe
545 * functions call check_exec() recursively - for example,
546 * see exec_script_makecmds().
547 */
548 pathbuf = PNBUF_GET();
549 error = copyinstr(path, pathbuf, MAXPATHLEN, &pathbuflen);
550 if (error) {
551 DPRINTF(("execve: copyinstr path %d", error));
552 goto clrflg;
553 }
554
555 NDINIT(&nid, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_SYSSPACE, pathbuf);
556
557 /*
558 * initialize the fields of the exec package.
559 */
560 pack.ep_name = path;
561 pack.ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
562 pack.ep_hdrlen = exec_maxhdrsz;
563 pack.ep_hdrvalid = 0;
564 pack.ep_ndp = &nid;
565 pack.ep_emul_arg = NULL;
566 pack.ep_vmcmds.evs_cnt = 0;
567 pack.ep_vmcmds.evs_used = 0;
568 pack.ep_vap = &attr;
569 pack.ep_flags = 0;
570 pack.ep_emul_root = NULL;
571 pack.ep_interp = NULL;
572 pack.ep_esch = NULL;
573 pack.ep_pax_flags = 0;
574
575 rw_enter(&exec_lock, RW_READER);
576
577 /* see if we can run it. */
578 if ((error = check_exec(l, &pack)) != 0) {
579 if (error != ENOENT) {
580 DPRINTF(("execve: check exec failed %d\n", error));
581 }
582 goto freehdr;
583 }
584
585 /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
586
587 /* allocate an argument buffer */
588 argp = pool_get(&exec_pool, PR_WAITOK);
589 KASSERT(argp != NULL);
590 dp = argp;
591 argc = 0;
592
593 /* copy the fake args list, if there's one, freeing it as we go */
594 if (pack.ep_flags & EXEC_HASARGL) {
595 tmpfap = pack.ep_fa;
596 while (tmpfap->fa_arg != NULL) {
597 const char *cp;
598
599 cp = tmpfap->fa_arg;
600 while (*cp)
601 *dp++ = *cp++;
602 *dp++ = '\0';
603
604 kmem_free(tmpfap->fa_arg, tmpfap->fa_len);
605 tmpfap++; argc++;
606 }
607 kmem_free(pack.ep_fa, pack.ep_fa_len);
608 pack.ep_flags &= ~EXEC_HASARGL;
609 }
610
611 /* Now get argv & environment */
612 if (args == NULL) {
613 DPRINTF(("execve: null args\n"));
614 error = EINVAL;
615 goto bad;
616 }
617 /* 'i' will index the argp/envp element to be retrieved */
618 i = 0;
619 if (pack.ep_flags & EXEC_SKIPARG)
620 i++;
621
622 while (1) {
623 len = argp + ARG_MAX - dp;
624 if ((error = (*fetch_element)(args, i, &sp)) != 0) {
625 DPRINTF(("execve: fetch_element args %d\n", error));
626 goto bad;
627 }
628 if (!sp)
629 break;
630 if ((error = copyinstr(sp, dp, len, &len)) != 0) {
631 DPRINTF(("execve: copyinstr args %d\n", error));
632 if (error == ENAMETOOLONG)
633 error = E2BIG;
634 goto bad;
635 }
636 ktrexecarg(dp, len - 1);
637 dp += len;
638 i++;
639 argc++;
640 }
641
642 envc = 0;
643 /* environment need not be there */
644 if (envs != NULL) {
645 i = 0;
646 while (1) {
647 len = argp + ARG_MAX - dp;
648 if ((error = (*fetch_element)(envs, i, &sp)) != 0) {
649 DPRINTF(("execve: fetch_element env %d\n", error));
650 goto bad;
651 }
652 if (!sp)
653 break;
654 if ((error = copyinstr(sp, dp, len, &len)) != 0) {
655 DPRINTF(("execve: copyinstr env %d\n", error));
656 if (error == ENAMETOOLONG)
657 error = E2BIG;
658 goto bad;
659 }
660 ktrexecenv(dp, len - 1);
661 dp += len;
662 i++;
663 envc++;
664 }
665 }
666
667 dp = (char *) ALIGN(dp);
668
669 szsigcode = pack.ep_esch->es_emul->e_esigcode -
670 pack.ep_esch->es_emul->e_sigcode;
671
672 #ifdef __MACHINE_STACK_GROWS_UP
673 /* See big comment lower down */
674 #define RTLD_GAP 32
675 #else
676 #define RTLD_GAP 0
677 #endif
678
679 /* Now check if args & environ fit into new stack */
680 if (pack.ep_flags & EXEC_32)
681 len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
682 sizeof(int) + sizeof(int) + dp + RTLD_GAP +
683 szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
684 - argp;
685 else
686 len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
687 sizeof(char *) + sizeof(int) + dp + RTLD_GAP +
688 szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
689 - argp;
690
691 #ifdef PAX_ASLR
692 if (pax_aslr_active(l))
693 len += (arc4random() % PAGE_SIZE);
694 #endif /* PAX_ASLR */
695
696 #ifdef STACKLALIGN /* arm, etc. */
697 len = STACKALIGN(len); /* make the stack "safely" aligned */
698 #else
699 len = ALIGN(len); /* make the stack "safely" aligned */
700 #endif
701
702 if (len > pack.ep_ssize) { /* in effect, compare to initial limit */
703 DPRINTF(("execve: stack limit exceeded %zu\n", len));
704 error = ENOMEM;
705 goto bad;
706 }
707
708 /* Get rid of other LWPs. */
709 if (p->p_sa || p->p_nlwps > 1) {
710 mutex_enter(p->p_lock);
711 exit_lwps(l);
712 mutex_exit(p->p_lock);
713 }
714 KDASSERT(p->p_nlwps == 1);
715
716 /* Destroy any lwpctl info. */
717 if (p->p_lwpctl != NULL)
718 lwp_ctl_exit();
719
720 /* This is now LWP 1 */
721 l->l_lid = 1;
722 p->p_nlwpid = 1;
723
724 #ifdef KERN_SA
725 /* Release any SA state. */
726 if (p->p_sa)
727 sa_release(p);
728 #endif /* KERN_SA */
729
730 /* Remove POSIX timers */
731 timers_free(p, TIMERS_POSIX);
732
733 /* adjust "active stack depth" for process VSZ */
734 pack.ep_ssize = len; /* maybe should go elsewhere, but... */
735
736 /*
737 * Do whatever is necessary to prepare the address space
738 * for remapping. Note that this might replace the current
739 * vmspace with another!
740 */
741 uvmspace_exec(l, pack.ep_vm_minaddr, pack.ep_vm_maxaddr);
742
743 /* record proc's vnode, for use by procfs and others */
744 if (p->p_textvp)
745 vrele(p->p_textvp);
746 VREF(pack.ep_vp);
747 p->p_textvp = pack.ep_vp;
748
749 /* Now map address space */
750 vm = p->p_vmspace;
751 vm->vm_taddr = (void *)pack.ep_taddr;
752 vm->vm_tsize = btoc(pack.ep_tsize);
753 vm->vm_daddr = (void*)pack.ep_daddr;
754 vm->vm_dsize = btoc(pack.ep_dsize);
755 vm->vm_ssize = btoc(pack.ep_ssize);
756 vm->vm_issize = 0;
757 vm->vm_maxsaddr = (void *)pack.ep_maxsaddr;
758 vm->vm_minsaddr = (void *)pack.ep_minsaddr;
759
760 #ifdef PAX_ASLR
761 pax_aslr_init(l, vm);
762 #endif /* PAX_ASLR */
763
764 /* create the new process's VM space by running the vmcmds */
765 #ifdef DIAGNOSTIC
766 if (pack.ep_vmcmds.evs_used == 0)
767 panic("execve: no vmcmds");
768 #endif
769 for (i = 0; i < pack.ep_vmcmds.evs_used && !error; i++) {
770 struct exec_vmcmd *vcp;
771
772 vcp = &pack.ep_vmcmds.evs_cmds[i];
773 if (vcp->ev_flags & VMCMD_RELATIVE) {
774 #ifdef DIAGNOSTIC
775 if (base_vcp == NULL)
776 panic("execve: relative vmcmd with no base");
777 if (vcp->ev_flags & VMCMD_BASE)
778 panic("execve: illegal base & relative vmcmd");
779 #endif
780 vcp->ev_addr += base_vcp->ev_addr;
781 }
782 error = (*vcp->ev_proc)(l, vcp);
783 #ifdef DEBUG_EXEC
784 if (error) {
785 size_t j;
786 struct exec_vmcmd *vp = &pack.ep_vmcmds.evs_cmds[0];
787 for (j = 0; j <= i; j++)
788 uprintf(
789 "vmcmd[%zu] = %#lx/%#lx fd@%#lx prot=0%o flags=%d\n",
790 j, vp[j].ev_addr, vp[j].ev_len,
791 vp[j].ev_offset, vp[j].ev_prot,
792 vp[j].ev_flags);
793 }
794 #endif /* DEBUG_EXEC */
795 if (vcp->ev_flags & VMCMD_BASE)
796 base_vcp = vcp;
797 }
798
799 /* free the vmspace-creation commands, and release their references */
800 kill_vmcmds(&pack.ep_vmcmds);
801
802 vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
803 VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
804 vput(pack.ep_vp);
805
806 /* if an error happened, deallocate and punt */
807 if (error) {
808 DPRINTF(("execve: vmcmd %zu failed: %d\n", i - 1, error));
809 goto exec_abort;
810 }
811
812 /* remember information about the process */
813 arginfo.ps_nargvstr = argc;
814 arginfo.ps_nenvstr = envc;
815
816 /* set command name & other accounting info */
817 i = min(nid.ni_cnd.cn_namelen, MAXCOMLEN);
818 (void)memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, i);
819 p->p_comm[i] = '\0';
820
821 dp = PNBUF_GET();
822 /*
823 * If the path starts with /, we don't need to do any work.
824 * This handles the majority of the cases.
825 * In the future perhaps we could canonicalize it?
826 */
827 if (pathbuf[0] == '/')
828 (void)strlcpy(pack.ep_path = dp, pathbuf, MAXPATHLEN);
829 #ifdef notyet
830 /*
831 * Although this works most of the time [since the entry was just
832 * entered in the cache] we don't use it because it theoretically
833 * can fail and it is not the cleanest interface, because there
834 * could be races. When the namei cache is re-written, this can
835 * be changed to use the appropriate function.
836 */
837 else if (!(error = vnode_to_path(dp, MAXPATHLEN, p->p_textvp, l, p)))
838 pack.ep_path = dp;
839 #endif
840 else {
841 #ifdef notyet
842 printf("Cannot get path for pid %d [%s] (error %d)",
843 (int)p->p_pid, p->p_comm, error);
844 #endif
845 pack.ep_path = NULL;
846 PNBUF_PUT(dp);
847 }
848
849 stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
850 STACK_PTHREADSPACE + sizeof(struct ps_strings) + szsigcode),
851 len - (sizeof(struct ps_strings) + szsigcode));
852
853 #ifdef __MACHINE_STACK_GROWS_UP
854 /*
855 * The copyargs call always copies into lower addresses
856 * first, moving towards higher addresses, starting with
857 * the stack pointer that we give. When the stack grows
858 * down, this puts argc/argv/envp very shallow on the
859 * stack, right at the first user stack pointer.
860 * When the stack grows up, the situation is reversed.
861 *
862 * Normally, this is no big deal. But the ld_elf.so _rtld()
863 * function expects to be called with a single pointer to
864 * a region that has a few words it can stash values into,
865 * followed by argc/argv/envp. When the stack grows down,
866 * it's easy to decrement the stack pointer a little bit to
867 * allocate the space for these few words and pass the new
868 * stack pointer to _rtld. When the stack grows up, however,
869 * a few words before argc is part of the signal trampoline, XXX
870 * so we have a problem.
871 *
872 * Instead of changing how _rtld works, we take the easy way
873 * out and steal 32 bytes before we call copyargs.
874 * This extra space was allowed for when 'len' was calculated.
875 */
876 stack += RTLD_GAP;
877 #endif /* __MACHINE_STACK_GROWS_UP */
878
879 /* Now copy argc, args & environ to new stack */
880 error = (*pack.ep_esch->es_copyargs)(l, &pack, &arginfo, &stack, argp);
881 if (pack.ep_path) {
882 PNBUF_PUT(pack.ep_path);
883 pack.ep_path = NULL;
884 }
885 if (error) {
886 DPRINTF(("execve: copyargs failed %d\n", error));
887 goto exec_abort;
888 }
889 /* Move the stack back to original point */
890 stack = (char *)STACK_GROW(vm->vm_minsaddr, len);
891
892 /* fill process ps_strings info */
893 p->p_psstr = (struct ps_strings *)
894 STACK_ALLOC(STACK_GROW(vm->vm_minsaddr, STACK_PTHREADSPACE),
895 sizeof(struct ps_strings));
896 p->p_psargv = offsetof(struct ps_strings, ps_argvstr);
897 p->p_psnargv = offsetof(struct ps_strings, ps_nargvstr);
898 p->p_psenv = offsetof(struct ps_strings, ps_envstr);
899 p->p_psnenv = offsetof(struct ps_strings, ps_nenvstr);
900
901 /* copy out the process's ps_strings structure */
902 if ((error = copyout(aip, (char *)p->p_psstr,
903 sizeof(arginfo))) != 0) {
904 DPRINTF(("execve: ps_strings copyout %p->%p size %ld failed\n",
905 aip, (char *)p->p_psstr, (long)sizeof(arginfo)));
906 goto exec_abort;
907 }
908
909 fd_closeexec(); /* handle close on exec */
910 execsigs(p); /* reset catched signals */
911
912 l->l_ctxlink = NULL; /* reset ucontext link */
913
914
915 p->p_acflag &= ~AFORK;
916 mutex_enter(p->p_lock);
917 p->p_flag |= PK_EXEC;
918 mutex_exit(p->p_lock);
919
920 /*
921 * Stop profiling.
922 */
923 if ((p->p_stflag & PST_PROFIL) != 0) {
924 mutex_spin_enter(&p->p_stmutex);
925 stopprofclock(p);
926 mutex_spin_exit(&p->p_stmutex);
927 }
928
929 /*
930 * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
931 * exited and exec()/exit() are the only places it will be cleared.
932 */
933 if ((p->p_lflag & PL_PPWAIT) != 0) {
934 mutex_enter(proc_lock);
935 p->p_lflag &= ~PL_PPWAIT;
936 cv_broadcast(&p->p_pptr->p_waitcv);
937 mutex_exit(proc_lock);
938 }
939
940 /*
941 * Deal with set[ug]id. MNT_NOSUID has already been used to disable
942 * s[ug]id. It's OK to check for PSL_TRACED here as we have blocked
943 * out additional references on the process for the moment.
944 */
945 if ((p->p_slflag & PSL_TRACED) == 0 &&
946
947 (((attr.va_mode & S_ISUID) != 0 &&
948 kauth_cred_geteuid(l->l_cred) != attr.va_uid) ||
949
950 ((attr.va_mode & S_ISGID) != 0 &&
951 kauth_cred_getegid(l->l_cred) != attr.va_gid))) {
952 /*
953 * Mark the process as SUGID before we do
954 * anything that might block.
955 */
956 proc_crmod_enter();
957 proc_crmod_leave(NULL, NULL, true);
958
959 /* Make sure file descriptors 0..2 are in use. */
960 if ((error = fd_checkstd()) != 0) {
961 DPRINTF(("execve: fdcheckstd failed %d\n", error));
962 goto exec_abort;
963 }
964
965 /*
966 * Copy the credential so other references don't see our
967 * changes.
968 */
969 l->l_cred = kauth_cred_copy(l->l_cred);
970 #ifdef KTRACE
971 /*
972 * If the persistent trace flag isn't set, turn off.
973 */
974 if (p->p_tracep) {
975 mutex_enter(&ktrace_lock);
976 if (!(p->p_traceflag & KTRFAC_PERSISTENT))
977 ktrderef(p);
978 mutex_exit(&ktrace_lock);
979 }
980 #endif
981 if (attr.va_mode & S_ISUID)
982 kauth_cred_seteuid(l->l_cred, attr.va_uid);
983 if (attr.va_mode & S_ISGID)
984 kauth_cred_setegid(l->l_cred, attr.va_gid);
985 } else {
986 if (kauth_cred_geteuid(l->l_cred) ==
987 kauth_cred_getuid(l->l_cred) &&
988 kauth_cred_getegid(l->l_cred) ==
989 kauth_cred_getgid(l->l_cred))
990 p->p_flag &= ~PK_SUGID;
991 }
992
993 /*
994 * Copy the credential so other references don't see our changes.
995 * Test to see if this is necessary first, since in the common case
996 * we won't need a private reference.
997 */
998 if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
999 kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
1000 l->l_cred = kauth_cred_copy(l->l_cred);
1001 kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
1002 kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
1003 }
1004
1005 /* Update the master credentials. */
1006 if (l->l_cred != p->p_cred) {
1007 kauth_cred_t ocred;
1008
1009 kauth_cred_hold(l->l_cred);
1010 mutex_enter(p->p_lock);
1011 ocred = p->p_cred;
1012 p->p_cred = l->l_cred;
1013 mutex_exit(p->p_lock);
1014 kauth_cred_free(ocred);
1015 }
1016
1017 #if defined(__HAVE_RAS)
1018 /*
1019 * Remove all RASs from the address space.
1020 */
1021 ras_purgeall();
1022 #endif
1023
1024 doexechooks(p);
1025
1026 /* setup new registers and do misc. setup. */
1027 (*pack.ep_esch->es_emul->e_setregs)(l, &pack, (u_long) stack);
1028 if (pack.ep_esch->es_setregs)
1029 (*pack.ep_esch->es_setregs)(l, &pack, (u_long) stack);
1030
1031 /* map the process's signal trampoline code */
1032 if (exec_sigcode_map(p, pack.ep_esch->es_emul)) {
1033 DPRINTF(("execve: map sigcode failed %d\n", error));
1034 goto exec_abort;
1035 }
1036
1037 pool_put(&exec_pool, argp);
1038
1039 PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
1040
1041 /* notify others that we exec'd */
1042 KNOTE(&p->p_klist, NOTE_EXEC);
1043
1044 kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1045
1046 /* The emulation root will usually have been found when we looked
1047 * for the elf interpreter (or similar), if not look now. */
1048 if (pack.ep_esch->es_emul->e_path != NULL && pack.ep_emul_root == NULL)
1049 emul_find_root(l, &pack);
1050
1051 /* Any old emulation root got removed by fdcloseexec */
1052 rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
1053 p->p_cwdi->cwdi_edir = pack.ep_emul_root;
1054 rw_exit(&p->p_cwdi->cwdi_lock);
1055 pack.ep_emul_root = NULL;
1056 if (pack.ep_interp != NULL)
1057 vrele(pack.ep_interp);
1058
1059 /*
1060 * Call emulation specific exec hook. This can setup per-process
1061 * p->p_emuldata or do any other per-process stuff an emulation needs.
1062 *
1063 * If we are executing process of different emulation than the
1064 * original forked process, call e_proc_exit() of the old emulation
1065 * first, then e_proc_exec() of new emulation. If the emulation is
1066 * same, the exec hook code should deallocate any old emulation
1067 * resources held previously by this process.
1068 */
1069 if (p->p_emul && p->p_emul->e_proc_exit
1070 && p->p_emul != pack.ep_esch->es_emul)
1071 (*p->p_emul->e_proc_exit)(p);
1072
1073 /*
1074 * Call exec hook. Emulation code may NOT store reference to anything
1075 * from &pack.
1076 */
1077 if (pack.ep_esch->es_emul->e_proc_exec)
1078 (*pack.ep_esch->es_emul->e_proc_exec)(p, &pack);
1079
1080 /* update p_emul, the old value is no longer needed */
1081 p->p_emul = pack.ep_esch->es_emul;
1082
1083 /* ...and the same for p_execsw */
1084 p->p_execsw = pack.ep_esch;
1085
1086 #ifdef __HAVE_SYSCALL_INTERN
1087 (*p->p_emul->e_syscall_intern)(p);
1088 #endif
1089 ktremul();
1090
1091 /* Allow new references from the debugger/procfs. */
1092 rw_exit(&p->p_reflock);
1093 rw_exit(&exec_lock);
1094
1095 mutex_enter(proc_lock);
1096
1097 if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
1098 KSI_INIT_EMPTY(&ksi);
1099 ksi.ksi_signo = SIGTRAP;
1100 ksi.ksi_lid = l->l_lid;
1101 kpsignal(p, &ksi, NULL);
1102 }
1103
1104 if (p->p_sflag & PS_STOPEXEC) {
1105 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1106 p->p_pptr->p_nstopchild++;
1107 p->p_pptr->p_waited = 0;
1108 mutex_enter(p->p_lock);
1109 ksiginfo_queue_init(&kq);
1110 sigclearall(p, &contsigmask, &kq);
1111 lwp_lock(l);
1112 l->l_stat = LSSTOP;
1113 p->p_stat = SSTOP;
1114 p->p_nrlwps--;
1115 mutex_exit(p->p_lock);
1116 mutex_exit(proc_lock);
1117 mi_switch(l);
1118 ksiginfo_queue_drain(&kq);
1119 KERNEL_LOCK(l->l_biglocks, l);
1120 } else {
1121 mutex_exit(proc_lock);
1122 }
1123
1124 PNBUF_PUT(pathbuf);
1125 return (EJUSTRETURN);
1126
1127 bad:
1128 /* free the vmspace-creation commands, and release their references */
1129 kill_vmcmds(&pack.ep_vmcmds);
1130 /* kill any opened file descriptor, if necessary */
1131 if (pack.ep_flags & EXEC_HASFD) {
1132 pack.ep_flags &= ~EXEC_HASFD;
1133 fd_close(pack.ep_fd);
1134 }
1135 /* close and put the exec'd file */
1136 vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
1137 VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
1138 vput(pack.ep_vp);
1139 PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
1140 pool_put(&exec_pool, argp);
1141
1142 freehdr:
1143 kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1144 if (pack.ep_emul_root != NULL)
1145 vrele(pack.ep_emul_root);
1146 if (pack.ep_interp != NULL)
1147 vrele(pack.ep_interp);
1148
1149 rw_exit(&exec_lock);
1150
1151 clrflg:
1152 lwp_lock(l);
1153 l->l_flag |= oldlwpflags;
1154 lwp_unlock(l);
1155 PNBUF_PUT(pathbuf);
1156 rw_exit(&p->p_reflock);
1157
1158 return error;
1159
1160 exec_abort:
1161 PNBUF_PUT(pathbuf);
1162 rw_exit(&p->p_reflock);
1163 rw_exit(&exec_lock);
1164
1165 /*
1166 * the old process doesn't exist anymore. exit gracefully.
1167 * get rid of the (new) address space we have created, if any, get rid
1168 * of our namei data and vnode, and exit noting failure
1169 */
1170 uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
1171 VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
1172 if (pack.ep_emul_arg)
1173 FREE(pack.ep_emul_arg, M_TEMP);
1174 PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
1175 pool_put(&exec_pool, argp);
1176 kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1177 if (pack.ep_emul_root != NULL)
1178 vrele(pack.ep_emul_root);
1179 if (pack.ep_interp != NULL)
1180 vrele(pack.ep_interp);
1181
1182 /* Acquire the sched-state mutex (exit1() will release it). */
1183 mutex_enter(p->p_lock);
1184 exit1(l, W_EXITCODE(error, SIGABRT));
1185
1186 /* NOTREACHED */
1187 return 0;
1188 }
1189
1190
1191 int
1192 copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
1193 char **stackp, void *argp)
1194 {
1195 char **cpp, *dp, *sp;
1196 size_t len;
1197 void *nullp;
1198 long argc, envc;
1199 int error;
1200
1201 cpp = (char **)*stackp;
1202 nullp = NULL;
1203 argc = arginfo->ps_nargvstr;
1204 envc = arginfo->ps_nenvstr;
1205 if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0)
1206 return error;
1207
1208 dp = (char *) (cpp + argc + envc + 2 + pack->ep_esch->es_arglen);
1209 sp = argp;
1210
1211 /* XXX don't copy them out, remap them! */
1212 arginfo->ps_argvstr = cpp; /* remember location of argv for later */
1213
1214 for (; --argc >= 0; sp += len, dp += len)
1215 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
1216 (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
1217 return error;
1218
1219 if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
1220 return error;
1221
1222 arginfo->ps_envstr = cpp; /* remember location of envp for later */
1223
1224 for (; --envc >= 0; sp += len, dp += len)
1225 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
1226 (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
1227 return error;
1228
1229 if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
1230 return error;
1231
1232 *stackp = (char *)cpp;
1233 return 0;
1234 }
1235
1236 #ifdef LKM
1237 /*
1238 * Find an emulation of given name in list of emulations.
1239 * Needs to be called with the exec_lock held.
1240 */
1241 const struct emul *
1242 emul_search(const char *name)
1243 {
1244 struct emul_entry *it;
1245
1246 LIST_FOREACH(it, &el_head, el_list) {
1247 if (strcmp(name, it->el_emul->e_name) == 0)
1248 return it->el_emul;
1249 }
1250
1251 return NULL;
1252 }
1253
1254 /*
1255 * Add an emulation to list, if it's not there already.
1256 */
1257 int
1258 emul_register(const struct emul *emul, int ro_entry)
1259 {
1260 struct emul_entry *ee;
1261 int error;
1262
1263 error = 0;
1264 rw_enter(&exec_lock, RW_WRITER);
1265
1266 if (emul_search(emul->e_name)) {
1267 error = EEXIST;
1268 goto out;
1269 }
1270
1271 ee = kmem_alloc(sizeof(*ee), KM_SLEEP);
1272 ee->el_emul = emul;
1273 ee->ro_entry = ro_entry;
1274 LIST_INSERT_HEAD(&el_head, ee, el_list);
1275
1276 out:
1277 rw_exit(&exec_lock);
1278 return error;
1279 }
1280
1281 /*
1282 * Remove emulation with name 'name' from list of supported emulations.
1283 */
1284 int
1285 emul_unregister(const char *name)
1286 {
1287 const struct proclist_desc *pd;
1288 struct emul_entry *it;
1289 int i, error;
1290 struct proc *ptmp;
1291
1292 error = 0;
1293 rw_enter(&exec_lock, RW_WRITER);
1294
1295 LIST_FOREACH(it, &el_head, el_list) {
1296 if (strcmp(it->el_emul->e_name, name) == 0)
1297 break;
1298 }
1299
1300 if (!it) {
1301 error = ENOENT;
1302 goto out;
1303 }
1304
1305 if (it->ro_entry) {
1306 error = EBUSY;
1307 goto out;
1308 }
1309
1310 /* test if any execw[] entry is still using this */
1311 for(i=0; i < nexecs; i++) {
1312 if (execsw[i]->es_emul == it->el_emul) {
1313 error = EBUSY;
1314 goto out;
1315 }
1316 }
1317
1318 /*
1319 * Test if any process is running under this emulation - since
1320 * emul_unregister() is running quite sendomly, it's better
1321 * to do expensive check here than to use any locking.
1322 */
1323 mutex_enter(proc_lock);
1324 for (pd = proclists; pd->pd_list != NULL && !error; pd++) {
1325 PROCLIST_FOREACH(ptmp, pd->pd_list) {
1326 if (ptmp->p_emul == it->el_emul) {
1327 error = EBUSY;
1328 break;
1329 }
1330 }
1331 }
1332 mutex_exit(proc_lock);
1333
1334 if (error)
1335 goto out;
1336
1337
1338 /* entry is not used, remove it */
1339 LIST_REMOVE(it, el_list);
1340 kmem_free(it, sizeof(*it));
1341
1342 out:
1343 rw_exit(&exec_lock);
1344 return error;
1345 }
1346
1347 /*
1348 * Add execsw[] entry.
1349 */
1350 int
1351 exec_add(struct execsw *esp, const char *e_name)
1352 {
1353 struct exec_entry *it;
1354 int error;
1355
1356 error = 0;
1357 rw_enter(&exec_lock, RW_WRITER);
1358
1359 if (!esp->es_emul) {
1360 esp->es_emul = emul_search(e_name);
1361 if (!esp->es_emul) {
1362 error = ENOENT;
1363 goto out;
1364 }
1365 }
1366
1367 LIST_FOREACH(it, &ex_head, ex_list) {
1368 /* assume tuple (makecmds, probe_func, emulation) is unique */
1369 if (it->es->es_makecmds == esp->es_makecmds
1370 && it->es->u.elf_probe_func == esp->u.elf_probe_func
1371 && it->es->es_emul == esp->es_emul) {
1372 error = EEXIST;
1373 goto out;
1374 }
1375 }
1376
1377 /* if we got here, the entry doesn't exist yet */
1378 it = kmem_alloc(sizeof(*it), KM_SLEEP);
1379 it->es = esp;
1380 LIST_INSERT_HEAD(&ex_head, it, ex_list);
1381
1382 /* update execsw[] */
1383 exec_init(0);
1384
1385 out:
1386 rw_exit(&exec_lock);
1387 return error;
1388 }
1389
1390 /*
1391 * Remove execsw[] entry.
1392 */
1393 int
1394 exec_remove(const struct execsw *esp)
1395 {
1396 struct exec_entry *it;
1397 int error;
1398
1399 error = 0;
1400 rw_enter(&exec_lock, RW_WRITER);
1401
1402 LIST_FOREACH(it, &ex_head, ex_list) {
1403 /* assume tuple (makecmds, probe_func, emulation) is unique */
1404 if (it->es->es_makecmds == esp->es_makecmds
1405 && it->es->u.elf_probe_func == esp->u.elf_probe_func
1406 && it->es->es_emul == esp->es_emul)
1407 break;
1408 }
1409 if (!it) {
1410 error = ENOENT;
1411 goto out;
1412 }
1413
1414 /* remove item from list and free resources */
1415 LIST_REMOVE(it, ex_list);
1416 kmem_free(it, sizeof(*it));
1417
1418 /* update execsw[] */
1419 exec_init(0);
1420
1421 out:
1422 rw_exit(&exec_lock);
1423 return error;
1424 }
1425
1426 static void
1427 link_es(struct execsw_entry **listp, const struct execsw *esp)
1428 {
1429 struct execsw_entry *et, *e1;
1430
1431 et = (struct execsw_entry *) malloc(sizeof(struct execsw_entry),
1432 M_TEMP, M_WAITOK);
1433 et->next = NULL;
1434 et->es = esp;
1435 if (*listp == NULL) {
1436 *listp = et;
1437 return;
1438 }
1439
1440 switch(et->es->es_prio) {
1441 case EXECSW_PRIO_FIRST:
1442 /* put new entry as the first */
1443 et->next = *listp;
1444 *listp = et;
1445 break;
1446 case EXECSW_PRIO_ANY:
1447 /* put new entry after all *_FIRST and *_ANY entries */
1448 for(e1 = *listp; e1->next
1449 && e1->next->es->es_prio != EXECSW_PRIO_LAST;
1450 e1 = e1->next);
1451 et->next = e1->next;
1452 e1->next = et;
1453 break;
1454 case EXECSW_PRIO_LAST:
1455 /* put new entry as the last one */
1456 for(e1 = *listp; e1->next; e1 = e1->next);
1457 e1->next = et;
1458 break;
1459 default:
1460 #ifdef DIAGNOSTIC
1461 panic("execw[] entry with unknown priority %d found",
1462 et->es->es_prio);
1463 #else
1464 free(et, M_TEMP);
1465 #endif
1466 break;
1467 }
1468 }
1469
1470 /*
1471 * Initialize exec structures. If init_boot is true, also does necessary
1472 * one-time initialization (it's called from main() that way).
1473 * Once system is multiuser, this should be called with exec_lock held,
1474 * i.e. via exec_{add|remove}().
1475 */
1476 int
1477 exec_init(int init_boot)
1478 {
1479 const struct execsw **new_es, * const *old_es;
1480 struct execsw_entry *list, *e1;
1481 struct exec_entry *e2;
1482 int i, es_sz;
1483
1484 if (init_boot) {
1485 /* do one-time initializations */
1486 rw_init(&exec_lock);
1487 mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
1488 pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
1489 "execargs", &exec_palloc, IPL_NONE);
1490 pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
1491
1492 /* register compiled-in emulations */
1493 for(i=0; i < nexecs_builtin; i++) {
1494 if (execsw_builtin[i].es_emul)
1495 emul_register(execsw_builtin[i].es_emul, 1);
1496 }
1497 #ifdef DIAGNOSTIC
1498 if (i == 0)
1499 panic("no emulations found in execsw_builtin[]");
1500 #endif
1501 }
1502
1503 /*
1504 * Build execsw[] array from builtin entries and entries added
1505 * at runtime.
1506 */
1507 list = NULL;
1508 for(i=0; i < nexecs_builtin; i++)
1509 link_es(&list, &execsw_builtin[i]);
1510
1511 /* Add dynamically loaded entries */
1512 es_sz = nexecs_builtin;
1513 LIST_FOREACH(e2, &ex_head, ex_list) {
1514 link_es(&list, e2->es);
1515 es_sz++;
1516 }
1517
1518 /*
1519 * Now that we have sorted all execw entries, create new execsw[]
1520 * and free no longer needed memory in the process.
1521 */
1522 new_es = kmem_alloc(es_sz * sizeof(struct execsw *), KM_SLEEP);
1523 for(i=0; list; i++) {
1524 new_es[i] = list->es;
1525 e1 = list->next;
1526 free(list, M_TEMP);
1527 list = e1;
1528 }
1529
1530 /*
1531 * New execsw[] array built, now replace old execsw[] and free
1532 * used memory.
1533 */
1534 old_es = execsw;
1535 if (old_es)
1536 /*XXXUNCONST*/
1537 kmem_free(__UNCONST(old_es), nexecs * sizeof(struct execsw *));
1538 execsw = new_es;
1539 nexecs = es_sz;
1540
1541 /*
1542 * Figure out the maximum size of an exec header.
1543 */
1544 exec_maxhdrsz = 0;
1545 for (i = 0; i < nexecs; i++) {
1546 if (execsw[i]->es_hdrsz > exec_maxhdrsz)
1547 exec_maxhdrsz = execsw[i]->es_hdrsz;
1548 }
1549
1550 return 0;
1551 }
1552 #endif
1553
1554 #ifndef LKM
1555 /*
1556 * Simplified exec_init() for kernels without LKMs. Only initialize
1557 * exec_maxhdrsz and execsw[].
1558 */
1559 int
1560 exec_init(int init_boot)
1561 {
1562 int i;
1563
1564 #ifdef DIAGNOSTIC
1565 if (!init_boot)
1566 panic("exec_init(): called with init_boot == 0");
1567 #endif
1568
1569 /* do one-time initializations */
1570 nexecs = nexecs_builtin;
1571 execsw = kmem_alloc(nexecs * sizeof(struct execsw *), KM_SLEEP);
1572
1573 rw_init(&exec_lock);
1574 pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
1575 "execargs", &exec_palloc, IPL_NONE);
1576 pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
1577
1578 /*
1579 * Fill in execsw[] and figure out the maximum size of an exec header.
1580 */
1581 exec_maxhdrsz = 0;
1582 for(i=0; i < nexecs; i++) {
1583 execsw[i] = &execsw_builtin[i];
1584 if (execsw_builtin[i].es_hdrsz > exec_maxhdrsz)
1585 exec_maxhdrsz = execsw_builtin[i].es_hdrsz;
1586 }
1587
1588 return 0;
1589
1590 }
1591 #endif /* !LKM */
1592
1593 static int
1594 exec_sigcode_map(struct proc *p, const struct emul *e)
1595 {
1596 vaddr_t va;
1597 vsize_t sz;
1598 int error;
1599 struct uvm_object *uobj;
1600
1601 sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
1602
1603 if (e->e_sigobject == NULL || sz == 0) {
1604 return 0;
1605 }
1606
1607 /*
1608 * If we don't have a sigobject for this emulation, create one.
1609 *
1610 * sigobject is an anonymous memory object (just like SYSV shared
1611 * memory) that we keep a permanent reference to and that we map
1612 * in all processes that need this sigcode. The creation is simple,
1613 * we create an object, add a permanent reference to it, map it in
1614 * kernel space, copy out the sigcode to it and unmap it.
1615 * We map it with PROT_READ|PROT_EXEC into the process just
1616 * the way sys_mmap() would map it.
1617 */
1618
1619 uobj = *e->e_sigobject;
1620 if (uobj == NULL) {
1621 mutex_enter(&sigobject_lock);
1622 if ((uobj = *e->e_sigobject) == NULL) {
1623 uobj = uao_create(sz, 0);
1624 (*uobj->pgops->pgo_reference)(uobj);
1625 va = vm_map_min(kernel_map);
1626 if ((error = uvm_map(kernel_map, &va, round_page(sz),
1627 uobj, 0, 0,
1628 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
1629 UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
1630 printf("kernel mapping failed %d\n", error);
1631 (*uobj->pgops->pgo_detach)(uobj);
1632 mutex_exit(&sigobject_lock);
1633 return (error);
1634 }
1635 memcpy((void *)va, e->e_sigcode, sz);
1636 #ifdef PMAP_NEED_PROCWR
1637 pmap_procwr(&proc0, va, sz);
1638 #endif
1639 uvm_unmap(kernel_map, va, va + round_page(sz));
1640 *e->e_sigobject = uobj;
1641 }
1642 mutex_exit(&sigobject_lock);
1643 }
1644
1645 /* Just a hint to uvm_map where to put it. */
1646 va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
1647 round_page(sz));
1648
1649 #ifdef __alpha__
1650 /*
1651 * Tru64 puts /sbin/loader at the end of user virtual memory,
1652 * which causes the above calculation to put the sigcode at
1653 * an invalid address. Put it just below the text instead.
1654 */
1655 if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1656 va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
1657 }
1658 #endif
1659
1660 (*uobj->pgops->pgo_reference)(uobj);
1661 error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
1662 uobj, 0, 0,
1663 UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
1664 UVM_ADV_RANDOM, 0));
1665 if (error) {
1666 (*uobj->pgops->pgo_detach)(uobj);
1667 return (error);
1668 }
1669 p->p_sigctx.ps_sigcode = (void *)va;
1670 return (0);
1671 }
Cache object: ab9a9b47eb5ef49be007c9320c0bbb50
|