FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_exec.c
1 /*
2 * Copyright (c) 1993, David Greenman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD: releng/5.2/sys/kern/kern_exec.c 122524 2003-11-12 03:14:31Z rwatson $");
29
30 #include "opt_ktrace.h"
31 #include "opt_mac.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/eventhandler.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 #include <sys/sysproto.h>
39 #include <sys/signalvar.h>
40 #include <sys/kernel.h>
41 #include <sys/mac.h>
42 #include <sys/mount.h>
43 #include <sys/filedesc.h>
44 #include <sys/fcntl.h>
45 #include <sys/acct.h>
46 #include <sys/exec.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/wait.h>
50 #include <sys/malloc.h>
51 #include <sys/proc.h>
52 #include <sys/pioctl.h>
53 #include <sys/namei.h>
54 #include <sys/sysent.h>
55 #include <sys/shm.h>
56 #include <sys/sysctl.h>
57 #include <sys/user.h>
58 #include <sys/vnode.h>
59 #ifdef KTRACE
60 #include <sys/ktrace.h>
61 #endif
62
63 #include <vm/vm.h>
64 #include <vm/vm_param.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_page.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_kern.h>
69 #include <vm/vm_extern.h>
70 #include <vm/vm_object.h>
71 #include <vm/vm_pager.h>
72
73 #include <machine/reg.h>
74
75 MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
76
77 static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
78 static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
79 static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);
80 static int kern_execve(struct thread *td, char *fname, char **argv,
81 char **envv, struct mac *mac_p);
82
83 /* XXX This should be vm_size_t. */
84 SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD,
85 NULL, 0, sysctl_kern_ps_strings, "LU", "");
86
87 /* XXX This should be vm_size_t. */
88 SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD,
89 NULL, 0, sysctl_kern_usrstack, "LU", "");
90
91 SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD,
92 NULL, 0, sysctl_kern_stackprot, "I", "");
93
94 u_long ps_arg_cache_limit = PAGE_SIZE / 16;
95 SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW,
96 &ps_arg_cache_limit, 0, "");
97
98 int ps_argsopen = 1;
99 SYSCTL_INT(_kern, OID_AUTO, ps_argsopen, CTLFLAG_RW, &ps_argsopen, 0, "");
100
101 static int
102 sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS)
103 {
104 struct proc *p;
105
106 p = curproc;
107 return (SYSCTL_OUT(req, &p->p_sysent->sv_psstrings,
108 sizeof(p->p_sysent->sv_psstrings)));
109 }
110
111 static int
112 sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
113 {
114 struct proc *p;
115
116 p = curproc;
117 return (SYSCTL_OUT(req, &p->p_sysent->sv_usrstack,
118 sizeof(p->p_sysent->sv_usrstack)));
119 }
120
121 static int
122 sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS)
123 {
124 struct proc *p;
125
126 p = curproc;
127 return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot,
128 sizeof(p->p_sysent->sv_stackprot)));
129 }
130
131 /*
132 * Each of the items is a pointer to a `const struct execsw', hence the
133 * double pointer here.
134 */
135 static const struct execsw **execsw;
136
137 /*
138 * In-kernel implementation of execve(). All arguments are assumed to be
139 * userspace pointers from the passed thread.
140 *
141 * MPSAFE
142 */
143 static int
144 kern_execve(td, fname, argv, envv, mac_p)
145 struct thread *td;
146 char *fname;
147 char **argv;
148 char **envv;
149 struct mac *mac_p;
150 {
151 struct proc *p = td->td_proc;
152 struct nameidata nd, *ndp;
153 struct ucred *newcred = NULL, *oldcred;
154 struct uidinfo *euip;
155 register_t *stack_base;
156 int error, len, i;
157 struct image_params image_params, *imgp;
158 struct vattr attr;
159 int (*img_first)(struct image_params *);
160 struct pargs *oldargs = NULL, *newargs = NULL;
161 struct sigacts *oldsigacts, *newsigacts;
162 #ifdef KTRACE
163 struct vnode *tracevp = NULL;
164 struct ucred *tracecred = NULL;
165 #endif
166 struct vnode *textvp = NULL;
167 int credential_changing;
168 int textset;
169 #ifdef MAC
170 struct label *interplabel = NULL;
171 int will_transition;
172 #endif
173
174 imgp = &image_params;
175
176 /*
177 * Lock the process and set the P_INEXEC flag to indicate that
178 * it should be left alone until we're done here. This is
179 * necessary to avoid race conditions - e.g. in ptrace() -
180 * that might allow a local user to illicitly obtain elevated
181 * privileges.
182 */
183 PROC_LOCK(p);
184 KASSERT((p->p_flag & P_INEXEC) == 0,
185 ("%s(): process already has P_INEXEC flag", __func__));
186 if (p->p_flag & P_SA || p->p_numthreads > 1) {
187 if (thread_single(SINGLE_EXIT)) {
188 PROC_UNLOCK(p);
189 return (ERESTART); /* Try again later. */
190 }
191 /*
192 * If we get here all other threads are dead,
193 * so unset the associated flags and lose KSE mode.
194 */
195 p->p_flag &= ~P_SA;
196 td->td_mailbox = NULL;
197 thread_single_end();
198 }
199 p->p_flag |= P_INEXEC;
200 PROC_UNLOCK(p);
201
202 /*
203 * Initialize part of the common data
204 */
205 imgp->proc = p;
206 imgp->userspace_argv = argv;
207 imgp->userspace_envv = envv;
208 imgp->execlabel = NULL;
209 imgp->attr = &attr;
210 imgp->argc = imgp->envc = 0;
211 imgp->argv0 = NULL;
212 imgp->entry_addr = 0;
213 imgp->vmspace_destroyed = 0;
214 imgp->interpreted = 0;
215 imgp->interpreter_name[0] = '\0';
216 imgp->auxargs = NULL;
217 imgp->vp = NULL;
218 imgp->object = NULL;
219 imgp->firstpage = NULL;
220 imgp->ps_strings = 0;
221 imgp->auxarg_size = 0;
222
223 #ifdef MAC
224 error = mac_execve_enter(imgp, mac_p);
225 if (error) {
226 mtx_lock(&Giant);
227 goto exec_fail;
228 }
229 #endif
230
231 /*
232 * Allocate temporary demand zeroed space for argument and
233 * environment strings
234 */
235 imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX +
236 PAGE_SIZE);
237 if (imgp->stringbase == NULL) {
238 error = ENOMEM;
239 mtx_lock(&Giant);
240 goto exec_fail;
241 }
242 imgp->stringp = imgp->stringbase;
243 imgp->stringspace = ARG_MAX;
244 imgp->image_header = imgp->stringbase + ARG_MAX;
245
246 /*
247 * Translate the file name. namei() returns a vnode pointer
248 * in ni_vp amoung other things.
249 */
250 ndp = &nd;
251 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
252 UIO_USERSPACE, fname, td);
253
254 mtx_lock(&Giant);
255 interpret:
256
257 error = namei(ndp);
258 if (error) {
259 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
260 ARG_MAX + PAGE_SIZE);
261 goto exec_fail;
262 }
263
264 imgp->vp = ndp->ni_vp;
265 imgp->fname = fname;
266
267 /*
268 * Check file permissions (also 'opens' file)
269 */
270 error = exec_check_permissions(imgp);
271 if (error)
272 goto exec_fail_dealloc;
273
274 if (VOP_GETVOBJECT(imgp->vp, &imgp->object) == 0)
275 vm_object_reference(imgp->object);
276
277 /*
278 * Set VV_TEXT now so no one can write to the executable while we're
279 * activating it.
280 *
281 * Remember if this was set before and unset it in case this is not
282 * actually an executable image.
283 */
284 textset = imgp->vp->v_vflag & VV_TEXT;
285 imgp->vp->v_vflag |= VV_TEXT;
286
287 error = exec_map_first_page(imgp);
288 if (error)
289 goto exec_fail_dealloc;
290
291 /*
292 * If the current process has a special image activator it
293 * wants to try first, call it. For example, emulating shell
294 * scripts differently.
295 */
296 error = -1;
297 if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
298 error = img_first(imgp);
299
300 /*
301 * Loop through the list of image activators, calling each one.
302 * An activator returns -1 if there is no match, 0 on success,
303 * and an error otherwise.
304 */
305 for (i = 0; error == -1 && execsw[i]; ++i) {
306 if (execsw[i]->ex_imgact == NULL ||
307 execsw[i]->ex_imgact == img_first) {
308 continue;
309 }
310 error = (*execsw[i]->ex_imgact)(imgp);
311 }
312
313 if (error) {
314 if (error == -1) {
315 if (textset == 0)
316 imgp->vp->v_vflag &= ~VV_TEXT;
317 error = ENOEXEC;
318 }
319 goto exec_fail_dealloc;
320 }
321
322 /*
323 * Special interpreter operation, cleanup and loop up to try to
324 * activate the interpreter.
325 */
326 if (imgp->interpreted) {
327 exec_unmap_first_page(imgp);
328 /*
329 * VV_TEXT needs to be unset for scripts. There is a short
330 * period before we determine that something is a script where
331 * VV_TEXT will be set. The vnode lock is held over this
332 * entire period so nothing should illegitimately be blocked.
333 */
334 imgp->vp->v_vflag &= ~VV_TEXT;
335 /* free name buffer and old vnode */
336 NDFREE(ndp, NDF_ONLY_PNBUF);
337 #ifdef MAC
338 interplabel = mac_vnode_label_alloc();
339 mac_copy_vnode_label(ndp->ni_vp->v_label, interplabel);
340 #endif
341 vput(ndp->ni_vp);
342 vm_object_deallocate(imgp->object);
343 imgp->object = NULL;
344 /* set new name to that of the interpreter */
345 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
346 UIO_SYSSPACE, imgp->interpreter_name, td);
347 goto interpret;
348 }
349
350 /*
351 * Copy out strings (args and env) and initialize stack base
352 */
353 if (p->p_sysent->sv_copyout_strings)
354 stack_base = (*p->p_sysent->sv_copyout_strings)(imgp);
355 else
356 stack_base = exec_copyout_strings(imgp);
357
358 /*
359 * If custom stack fixup routine present for this process
360 * let it do the stack setup.
361 * Else stuff argument count as first item on stack
362 */
363 if (p->p_sysent->sv_fixup)
364 (*p->p_sysent->sv_fixup)(&stack_base, imgp);
365 else
366 suword(--stack_base, imgp->argc);
367
368 /*
369 * For security and other reasons, the file descriptor table cannot
370 * be shared after an exec.
371 */
372 FILEDESC_LOCK(p->p_fd);
373 if (p->p_fd->fd_refcnt > 1) {
374 struct filedesc *tmp;
375
376 tmp = fdcopy(td->td_proc->p_fd);
377 FILEDESC_UNLOCK(p->p_fd);
378 fdfree(td);
379 p->p_fd = tmp;
380 } else
381 FILEDESC_UNLOCK(p->p_fd);
382
383 /*
384 * Malloc things before we need locks.
385 */
386 newcred = crget();
387 euip = uifind(attr.va_uid);
388 i = imgp->endargs - imgp->stringbase;
389 if (ps_arg_cache_limit >= i + sizeof(struct pargs))
390 newargs = pargs_alloc(i);
391
392 /* close files on exec */
393 fdcloseexec(td);
394
395 /* Get a reference to the vnode prior to locking the proc */
396 VREF(ndp->ni_vp);
397
398 /*
399 * For security and other reasons, signal handlers cannot
400 * be shared after an exec. The new process gets a copy of the old
401 * handlers. In execsigs(), the new process will have its signals
402 * reset.
403 */
404 PROC_LOCK(p);
405 if (sigacts_shared(p->p_sigacts)) {
406 oldsigacts = p->p_sigacts;
407 PROC_UNLOCK(p);
408 newsigacts = sigacts_alloc();
409 sigacts_copy(newsigacts, oldsigacts);
410 PROC_LOCK(p);
411 p->p_sigacts = newsigacts;
412 } else
413 oldsigacts = NULL;
414
415 /* Stop profiling */
416 stopprofclock(p);
417
418 /* reset caught signals */
419 execsigs(p);
420
421 /* name this process - nameiexec(p, ndp) */
422 len = min(ndp->ni_cnd.cn_namelen,MAXCOMLEN);
423 bcopy(ndp->ni_cnd.cn_nameptr, p->p_comm, len);
424 p->p_comm[len] = 0;
425
426 /*
427 * mark as execed, wakeup the process that vforked (if any) and tell
428 * it that it now has its own resources back
429 */
430 p->p_flag |= P_EXEC;
431 if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
432 p->p_flag &= ~P_PPWAIT;
433 wakeup(p->p_pptr);
434 }
435
436 /*
437 * Implement image setuid/setgid.
438 *
439 * Don't honor setuid/setgid if the filesystem prohibits it or if
440 * the process is being traced.
441 *
442 * XXXMAC: For the time being, use NOSUID to also prohibit
443 * transitions on the file system.
444 */
445 oldcred = p->p_ucred;
446 credential_changing = 0;
447 credential_changing |= (attr.va_mode & VSUID) && oldcred->cr_uid !=
448 attr.va_uid;
449 credential_changing |= (attr.va_mode & VSGID) && oldcred->cr_gid !=
450 attr.va_gid;
451 #ifdef MAC
452 will_transition = mac_execve_will_transition(oldcred, imgp->vp,
453 interplabel, imgp);
454 credential_changing |= will_transition;
455 #endif
456
457 if (credential_changing &&
458 (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
459 (p->p_flag & P_TRACED) == 0) {
460 /*
461 * Turn off syscall tracing for set-id programs, except for
462 * root. Record any set-id flags first to make sure that
463 * we do not regain any tracing during a possible block.
464 */
465 setsugid(p);
466 #ifdef KTRACE
467 if (p->p_tracevp != NULL && suser_cred(oldcred, PRISON_ROOT)) {
468 mtx_lock(&ktrace_mtx);
469 p->p_traceflag = 0;
470 tracevp = p->p_tracevp;
471 p->p_tracevp = NULL;
472 tracecred = p->p_tracecred;
473 p->p_tracecred = NULL;
474 mtx_unlock(&ktrace_mtx);
475 }
476 #endif
477 /*
478 * Close any file descriptors 0..2 that reference procfs,
479 * then make sure file descriptors 0..2 are in use.
480 *
481 * setugidsafety() may call closef() and then pfind()
482 * which may grab the process lock.
483 * fdcheckstd() may call falloc() which may block to
484 * allocate memory, so temporarily drop the process lock.
485 */
486 PROC_UNLOCK(p);
487 setugidsafety(td);
488 error = fdcheckstd(td);
489 if (error != 0)
490 goto done1;
491 PROC_LOCK(p);
492 /*
493 * Set the new credentials.
494 */
495 crcopy(newcred, oldcred);
496 if (attr.va_mode & VSUID)
497 change_euid(newcred, euip);
498 if (attr.va_mode & VSGID)
499 change_egid(newcred, attr.va_gid);
500 #ifdef MAC
501 if (will_transition) {
502 mac_execve_transition(oldcred, newcred, imgp->vp,
503 interplabel, imgp);
504 }
505 #endif
506 /*
507 * Implement correct POSIX saved-id behavior.
508 *
509 * XXXMAC: Note that the current logic will save the
510 * uid and gid if a MAC domain transition occurs, even
511 * though maybe it shouldn't.
512 */
513 change_svuid(newcred, newcred->cr_uid);
514 change_svgid(newcred, newcred->cr_gid);
515 p->p_ucred = newcred;
516 newcred = NULL;
517 } else {
518 if (oldcred->cr_uid == oldcred->cr_ruid &&
519 oldcred->cr_gid == oldcred->cr_rgid)
520 p->p_flag &= ~P_SUGID;
521 /*
522 * Implement correct POSIX saved-id behavior.
523 *
524 * XXX: It's not clear that the existing behavior is
525 * POSIX-compliant. A number of sources indicate that the
526 * saved uid/gid should only be updated if the new ruid is
527 * not equal to the old ruid, or the new euid is not equal
528 * to the old euid and the new euid is not equal to the old
529 * ruid. The FreeBSD code always updates the saved uid/gid.
530 * Also, this code uses the new (replaced) euid and egid as
531 * the source, which may or may not be the right ones to use.
532 */
533 if (oldcred->cr_svuid != oldcred->cr_uid ||
534 oldcred->cr_svgid != oldcred->cr_gid) {
535 crcopy(newcred, oldcred);
536 change_svuid(newcred, newcred->cr_uid);
537 change_svgid(newcred, newcred->cr_gid);
538 p->p_ucred = newcred;
539 newcred = NULL;
540 }
541 }
542
543 /*
544 * Store the vp for use in procfs. This vnode was referenced prior
545 * to locking the proc lock.
546 */
547 textvp = p->p_textvp;
548 p->p_textvp = ndp->ni_vp;
549
550 /*
551 * Notify others that we exec'd, and clear the P_INEXEC flag
552 * as we're now a bona fide freshly-execed process.
553 */
554 KNOTE(&p->p_klist, NOTE_EXEC);
555 p->p_flag &= ~P_INEXEC;
556
557 /*
558 * If tracing the process, trap to debugger so breakpoints
559 * can be set before the program executes.
560 */
561 if (p->p_flag & P_TRACED)
562 psignal(p, SIGTRAP);
563
564 /* clear "fork but no exec" flag, as we _are_ execing */
565 p->p_acflag &= ~AFORK;
566
567 /* Free any previous argument cache */
568 oldargs = p->p_args;
569 p->p_args = NULL;
570
571 /* Cache arguments if they fit inside our allowance */
572 if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
573 bcopy(imgp->stringbase, newargs->ar_args, i);
574 p->p_args = newargs;
575 newargs = NULL;
576 }
577 PROC_UNLOCK(p);
578
579 /* Set values passed into the program in registers. */
580 if (p->p_sysent->sv_setregs)
581 (*p->p_sysent->sv_setregs)(td, imgp->entry_addr,
582 (u_long)(uintptr_t)stack_base, imgp->ps_strings);
583 else
584 exec_setregs(td, imgp->entry_addr,
585 (u_long)(uintptr_t)stack_base, imgp->ps_strings);
586
587 done1:
588 /*
589 * Free any resources malloc'd earlier that we didn't use.
590 */
591 uifree(euip);
592 if (newcred == NULL)
593 crfree(oldcred);
594 else
595 crfree(newcred);
596 /*
597 * Handle deferred decrement of ref counts.
598 */
599 if (textvp != NULL)
600 vrele(textvp);
601 if (ndp->ni_vp && error != 0)
602 vrele(ndp->ni_vp);
603 #ifdef KTRACE
604 if (tracevp != NULL)
605 vrele(tracevp);
606 if (tracecred != NULL)
607 crfree(tracecred);
608 #endif
609 if (oldargs != NULL)
610 pargs_drop(oldargs);
611 if (newargs != NULL)
612 pargs_drop(newargs);
613 if (oldsigacts != NULL)
614 sigacts_free(oldsigacts);
615
616 exec_fail_dealloc:
617
618 /*
619 * free various allocated resources
620 */
621 if (imgp->firstpage)
622 exec_unmap_first_page(imgp);
623
624 if (imgp->vp) {
625 NDFREE(ndp, NDF_ONLY_PNBUF);
626 vput(imgp->vp);
627 }
628
629 if (imgp->stringbase != NULL)
630 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
631 ARG_MAX + PAGE_SIZE);
632
633 if (imgp->object)
634 vm_object_deallocate(imgp->object);
635
636 if (error == 0) {
637 /*
638 * Stop the process here if its stop event mask has
639 * the S_EXEC bit set.
640 */
641 STOPEVENT(p, S_EXEC, 0);
642 goto done2;
643 }
644
645 exec_fail:
646 /* we're done here, clear P_INEXEC */
647 PROC_LOCK(p);
648 p->p_flag &= ~P_INEXEC;
649 PROC_UNLOCK(p);
650
651 if (imgp->vmspace_destroyed) {
652 /* sorry, no more process anymore. exit gracefully */
653 #ifdef MAC
654 mac_execve_exit(imgp);
655 if (interplabel != NULL)
656 mac_vnode_label_free(interplabel);
657 #endif
658 exit1(td, W_EXITCODE(0, SIGABRT));
659 /* NOT REACHED */
660 error = 0;
661 }
662 done2:
663 #ifdef MAC
664 mac_execve_exit(imgp);
665 if (interplabel != NULL)
666 mac_vnode_label_free(interplabel);
667 #endif
668 mtx_unlock(&Giant);
669 return (error);
670 }
671
672 #ifndef _SYS_SYSPROTO_H_
673 struct execve_args {
674 char *fname;
675 char **argv;
676 char **envv;
677 };
678 #endif
679
680 /*
681 * MPSAFE
682 */
683 int
684 execve(td, uap)
685 struct thread *td;
686 struct execve_args /* {
687 char *fname;
688 char **argv;
689 char **envv;
690 } */ *uap;
691 {
692
693 return (kern_execve(td, uap->fname, uap->argv, uap->envv, NULL));
694 }
695
696 #ifndef _SYS_SYSPROTO_H_
697 struct __mac_execve_args {
698 char *fname;
699 char **argv;
700 char **envv;
701 struct mac *mac_p;
702 };
703 #endif
704
705 /*
706 * MPSAFE
707 */
708 int
709 __mac_execve(td, uap)
710 struct thread *td;
711 struct __mac_execve_args /* {
712 char *fname;
713 char **argv;
714 char **envv;
715 struct mac *mac_p;
716 } */ *uap;
717 {
718
719 #ifdef MAC
720 return (kern_execve(td, uap->fname, uap->argv, uap->envv,
721 uap->mac_p));
722 #else
723 return (ENOSYS);
724 #endif
725 }
726
727 int
728 exec_map_first_page(imgp)
729 struct image_params *imgp;
730 {
731 int rv, i;
732 int initial_pagein;
733 vm_page_t ma[VM_INITIAL_PAGEIN];
734 vm_object_t object;
735
736 GIANT_REQUIRED;
737
738 if (imgp->firstpage) {
739 exec_unmap_first_page(imgp);
740 }
741
742 VOP_GETVOBJECT(imgp->vp, &object);
743 VM_OBJECT_LOCK(object);
744 ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
745 if ((ma[0]->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
746 initial_pagein = VM_INITIAL_PAGEIN;
747 if (initial_pagein > object->size)
748 initial_pagein = object->size;
749 for (i = 1; i < initial_pagein; i++) {
750 if ((ma[i] = vm_page_lookup(object, i)) != NULL) {
751 if (ma[i]->valid)
752 break;
753 vm_page_lock_queues();
754 if ((ma[i]->flags & PG_BUSY) || ma[i]->busy) {
755 vm_page_unlock_queues();
756 break;
757 }
758 vm_page_busy(ma[i]);
759 vm_page_unlock_queues();
760 } else {
761 ma[i] = vm_page_alloc(object, i,
762 VM_ALLOC_NORMAL);
763 if (ma[i] == NULL)
764 break;
765 }
766 }
767 initial_pagein = i;
768 rv = vm_pager_get_pages(object, ma, initial_pagein, 0);
769 ma[0] = vm_page_lookup(object, 0);
770 if ((rv != VM_PAGER_OK) || (ma[0] == NULL) ||
771 (ma[0]->valid == 0)) {
772 if (ma[0]) {
773 vm_page_lock_queues();
774 pmap_remove_all(ma[0]);
775 vm_page_free(ma[0]);
776 vm_page_unlock_queues();
777 }
778 VM_OBJECT_UNLOCK(object);
779 return (EIO);
780 }
781 }
782 vm_page_lock_queues();
783 vm_page_wire(ma[0]);
784 vm_page_wakeup(ma[0]);
785 vm_page_unlock_queues();
786 VM_OBJECT_UNLOCK(object);
787
788 pmap_qenter((vm_offset_t)imgp->image_header, ma, 1);
789 imgp->firstpage = ma[0];
790
791 return (0);
792 }
793
794 void
795 exec_unmap_first_page(imgp)
796 struct image_params *imgp;
797 {
798 GIANT_REQUIRED;
799
800 if (imgp->firstpage) {
801 pmap_qremove((vm_offset_t)imgp->image_header, 1);
802 vm_page_lock_queues();
803 vm_page_unwire(imgp->firstpage, 1);
804 vm_page_unlock_queues();
805 imgp->firstpage = NULL;
806 }
807 }
808
809 /*
810 * Destroy old address space, and allocate a new stack
811 * The new stack is only SGROWSIZ large because it is grown
812 * automatically in trap.c.
813 */
814 int
815 exec_new_vmspace(imgp, sv)
816 struct image_params *imgp;
817 struct sysentvec *sv;
818 {
819 int error;
820 struct proc *p = imgp->proc;
821 struct vmspace *vmspace = p->p_vmspace;
822 vm_offset_t stack_addr;
823 vm_map_t map;
824
825 GIANT_REQUIRED;
826
827 imgp->vmspace_destroyed = 1;
828
829 EVENTHANDLER_INVOKE(process_exec, p);
830
831 /*
832 * Here is as good a place as any to do any resource limit cleanups.
833 * This is needed if a 64 bit binary exec's a 32 bit binary - the
834 * data size limit may need to be changed to a value that makes
835 * sense for the 32 bit binary.
836 */
837 if (sv->sv_fixlimits)
838 sv->sv_fixlimits(imgp);
839
840 /*
841 * Blow away entire process VM, if address space not shared,
842 * otherwise, create a new VM space so that other threads are
843 * not disrupted
844 */
845 map = &vmspace->vm_map;
846 if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv->sv_minuser &&
847 vm_map_max(map) == sv->sv_maxuser) {
848 shmexit(vmspace);
849 vm_page_lock_queues();
850 pmap_remove_pages(vmspace_pmap(vmspace), vm_map_min(map),
851 vm_map_max(map));
852 vm_page_unlock_queues();
853 vm_map_remove(map, vm_map_min(map), vm_map_max(map));
854 } else {
855 vmspace_exec(p, sv->sv_minuser, sv->sv_maxuser);
856 vmspace = p->p_vmspace;
857 map = &vmspace->vm_map;
858 }
859
860 /* Allocate a new stack */
861 stack_addr = sv->sv_usrstack - maxssiz;
862 error = vm_map_stack(map, stack_addr, (vm_size_t)maxssiz,
863 sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
864 if (error)
865 return (error);
866
867 #ifdef __ia64__
868 /* Allocate a new register stack */
869 stack_addr = IA64_BACKINGSTORE;
870 error = vm_map_stack(map, stack_addr, (vm_size_t)maxssiz,
871 sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_UP);
872 if (error)
873 return (error);
874 #endif
875
876 /* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the
877 * VM_STACK case, but they are still used to monitor the size of the
878 * process stack so we can check the stack rlimit.
879 */
880 vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
881 vmspace->vm_maxsaddr = (char *)sv->sv_usrstack - maxssiz;
882
883 return (0);
884 }
885
886 /*
887 * Copy out argument and environment strings from the old process
888 * address space into the temporary string buffer.
889 */
890 int
891 exec_extract_strings(imgp)
892 struct image_params *imgp;
893 {
894 char **argv, **envv;
895 char *argp, *envp;
896 int error;
897 size_t length;
898
899 /*
900 * extract arguments first
901 */
902
903 argv = imgp->userspace_argv;
904
905 if (argv) {
906 argp = (caddr_t)(intptr_t)fuword(argv);
907 if (argp == (caddr_t)-1)
908 return (EFAULT);
909 if (argp)
910 argv++;
911 if (imgp->argv0)
912 argp = imgp->argv0;
913 if (argp) {
914 do {
915 if (argp == (caddr_t)-1)
916 return (EFAULT);
917 if ((error = copyinstr(argp, imgp->stringp,
918 imgp->stringspace, &length))) {
919 if (error == ENAMETOOLONG)
920 return (E2BIG);
921 return (error);
922 }
923 imgp->stringspace -= length;
924 imgp->stringp += length;
925 imgp->argc++;
926 } while ((argp = (caddr_t)(intptr_t)fuword(argv++)));
927 }
928 }
929
930 imgp->endargs = imgp->stringp;
931
932 /*
933 * extract environment strings
934 */
935
936 envv = imgp->userspace_envv;
937
938 if (envv) {
939 while ((envp = (caddr_t)(intptr_t)fuword(envv++))) {
940 if (envp == (caddr_t)-1)
941 return (EFAULT);
942 if ((error = copyinstr(envp, imgp->stringp,
943 imgp->stringspace, &length))) {
944 if (error == ENAMETOOLONG)
945 return (E2BIG);
946 return (error);
947 }
948 imgp->stringspace -= length;
949 imgp->stringp += length;
950 imgp->envc++;
951 }
952 }
953
954 return (0);
955 }
956
957 /*
958 * Copy strings out to the new process address space, constructing
959 * new arg and env vector tables. Return a pointer to the base
960 * so that it can be used as the initial stack pointer.
961 */
962 register_t *
963 exec_copyout_strings(imgp)
964 struct image_params *imgp;
965 {
966 int argc, envc;
967 char **vectp;
968 char *stringp, *destp;
969 register_t *stack_base;
970 struct ps_strings *arginfo;
971 struct proc *p;
972 int szsigcode;
973
974 /*
975 * Calculate string base and vector table pointers.
976 * Also deal with signal trampoline code for this exec type.
977 */
978 p = imgp->proc;
979 szsigcode = 0;
980 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
981 if (p->p_sysent->sv_szsigcode != NULL)
982 szsigcode = *(p->p_sysent->sv_szsigcode);
983 destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE -
984 roundup((ARG_MAX - imgp->stringspace), sizeof(char *));
985
986 /*
987 * install sigcode
988 */
989 if (szsigcode)
990 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
991 szsigcode), szsigcode);
992
993 /*
994 * If we have a valid auxargs ptr, prepare some room
995 * on the stack.
996 */
997 if (imgp->auxargs) {
998 /*
999 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
1000 * lower compatibility.
1001 */
1002 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
1003 (AT_COUNT * 2);
1004 /*
1005 * The '+ 2' is for the null pointers at the end of each of
1006 * the arg and env vector sets,and imgp->auxarg_size is room
1007 * for argument of Runtime loader.
1008 */
1009 vectp = (char **)(destp - (imgp->argc + imgp->envc + 2 +
1010 imgp->auxarg_size) * sizeof(char *));
1011
1012 } else
1013 /*
1014 * The '+ 2' is for the null pointers at the end of each of
1015 * the arg and env vector sets
1016 */
1017 vectp = (char **)(destp - (imgp->argc + imgp->envc + 2) *
1018 sizeof(char *));
1019
1020 /*
1021 * vectp also becomes our initial stack base
1022 */
1023 stack_base = (register_t *)vectp;
1024
1025 stringp = imgp->stringbase;
1026 argc = imgp->argc;
1027 envc = imgp->envc;
1028
1029 /*
1030 * Copy out strings - arguments and environment.
1031 */
1032 copyout(stringp, destp, ARG_MAX - imgp->stringspace);
1033
1034 /*
1035 * Fill in "ps_strings" struct for ps, w, etc.
1036 */
1037 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
1038 suword(&arginfo->ps_nargvstr, argc);
1039
1040 /*
1041 * Fill in argument portion of vector table.
1042 */
1043 for (; argc > 0; --argc) {
1044 suword(vectp++, (long)(intptr_t)destp);
1045 while (*stringp++ != 0)
1046 destp++;
1047 destp++;
1048 }
1049
1050 /* a null vector table pointer separates the argp's from the envp's */
1051 suword(vectp++, 0);
1052
1053 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
1054 suword(&arginfo->ps_nenvstr, envc);
1055
1056 /*
1057 * Fill in environment portion of vector table.
1058 */
1059 for (; envc > 0; --envc) {
1060 suword(vectp++, (long)(intptr_t)destp);
1061 while (*stringp++ != 0)
1062 destp++;
1063 destp++;
1064 }
1065
1066 /* end of vector table is a null pointer */
1067 suword(vectp, 0);
1068
1069 return (stack_base);
1070 }
1071
1072 /*
1073 * Check permissions of file to execute.
1074 * Called with imgp->vp locked.
1075 * Return 0 for success or error code on failure.
1076 */
1077 int
1078 exec_check_permissions(imgp)
1079 struct image_params *imgp;
1080 {
1081 struct vnode *vp = imgp->vp;
1082 struct vattr *attr = imgp->attr;
1083 struct thread *td;
1084 int error;
1085
1086 td = curthread; /* XXXKSE */
1087
1088 /* Get file attributes */
1089 error = VOP_GETATTR(vp, attr, td->td_ucred, td);
1090 if (error)
1091 return (error);
1092
1093 #ifdef MAC
1094 error = mac_check_vnode_exec(td->td_ucred, imgp->vp, imgp);
1095 if (error)
1096 return (error);
1097 #endif
1098
1099 /*
1100 * 1) Check if file execution is disabled for the filesystem that this
1101 * file resides on.
1102 * 2) Insure that at least one execute bit is on - otherwise root
1103 * will always succeed, and we don't want to happen unless the
1104 * file really is executable.
1105 * 3) Insure that the file is a regular file.
1106 */
1107 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
1108 ((attr->va_mode & 0111) == 0) ||
1109 (attr->va_type != VREG))
1110 return (EACCES);
1111
1112 /*
1113 * Zero length files can't be exec'd
1114 */
1115 if (attr->va_size == 0)
1116 return (ENOEXEC);
1117
1118 /*
1119 * Check for execute permission to file based on current credentials.
1120 */
1121 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1122 if (error)
1123 return (error);
1124
1125 /*
1126 * Check number of open-for-writes on the file and deny execution
1127 * if there are any.
1128 */
1129 if (vp->v_writecount)
1130 return (ETXTBSY);
1131
1132 /*
1133 * Call filesystem specific open routine (which does nothing in the
1134 * general case).
1135 */
1136 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1);
1137 return (error);
1138 }
1139
1140 /*
1141 * Exec handler registration
1142 */
1143 int
1144 exec_register(execsw_arg)
1145 const struct execsw *execsw_arg;
1146 {
1147 const struct execsw **es, **xs, **newexecsw;
1148 int count = 2; /* New slot and trailing NULL */
1149
1150 if (execsw)
1151 for (es = execsw; *es; es++)
1152 count++;
1153 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
1154 if (newexecsw == NULL)
1155 return (ENOMEM);
1156 xs = newexecsw;
1157 if (execsw)
1158 for (es = execsw; *es; es++)
1159 *xs++ = *es;
1160 *xs++ = execsw_arg;
1161 *xs = NULL;
1162 if (execsw)
1163 free(execsw, M_TEMP);
1164 execsw = newexecsw;
1165 return (0);
1166 }
1167
1168 int
1169 exec_unregister(execsw_arg)
1170 const struct execsw *execsw_arg;
1171 {
1172 const struct execsw **es, **xs, **newexecsw;
1173 int count = 1;
1174
1175 if (execsw == NULL)
1176 panic("unregister with no handlers left?\n");
1177
1178 for (es = execsw; *es; es++) {
1179 if (*es == execsw_arg)
1180 break;
1181 }
1182 if (*es == NULL)
1183 return (ENOENT);
1184 for (es = execsw; *es; es++)
1185 if (*es != execsw_arg)
1186 count++;
1187 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
1188 if (newexecsw == NULL)
1189 return (ENOMEM);
1190 xs = newexecsw;
1191 for (es = execsw; *es; es++)
1192 if (*es != execsw_arg)
1193 *xs++ = *es;
1194 *xs = NULL;
1195 if (execsw)
1196 free(execsw, M_TEMP);
1197 execsw = newexecsw;
1198 return (0);
1199 }
Cache object: 74df381c629709c065741882ff8a6f44
|