FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_exec.c
1 /*
2 * Copyright (c) 1993, David Greenman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: src/sys/kern/kern_exec.c,v 1.47.2.15 2001/06/16 23:43:32 peter Exp $
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/sysproto.h>
32 #include <sys/signalvar.h>
33 #include <sys/kernel.h>
34 #include <sys/mount.h>
35 #include <sys/filedesc.h>
36 #include <sys/fcntl.h>
37 #include <sys/acct.h>
38 #include <sys/exec.h>
39 #include <sys/imgact.h>
40 #include <sys/imgact_elf.h>
41 #include <sys/wait.h>
42 #include <sys/proc.h>
43 #include <sys/malloc.h>
44 #include <sys/namei.h>
45 #include <sys/sysent.h>
46 #include <sys/syslog.h>
47 #include <sys/shm.h>
48 #include <sys/sysctl.h>
49 #include <sys/vnode.h>
50 #include <sys/buf.h>
51
52 #include <vm/vm.h>
53 #include <vm/vm_param.h>
54 #include <vm/vm_prot.h>
55 #include <vm/lock.h>
56 #include <vm/pmap.h>
57 #include <vm/vm_map.h>
58 #include <vm/vm_kern.h>
59 #include <vm/vm_extern.h>
60 #include <vm/vm_object.h>
61
62 #include <machine/reg.h>
63
64 static int *exec_copyout_strings __P((struct image_params *));
65
66 static int exec_check_permissions(struct image_params *);
67
68 /*
69 * XXX trouble here if sizeof(caddr_t) != sizeof(int), other parts
70 * of the sysctl code also assumes this, and sizeof(int) == sizeof(long).
71 */
72 static struct ps_strings *ps_strings = PS_STRINGS;
73 SYSCTL_INT(_kern, KERN_PS_STRINGS, ps_strings, 0, &ps_strings, 0, "");
74
75 static caddr_t usrstack = (caddr_t)USRSTACK;
76 SYSCTL_INT(_kern, KERN_USRSTACK, usrstack, 0, &usrstack, 0, "");
77
78 /*
79 * execsw_set is constructed for us by the linker. Each of the items
80 * is a pointer to a `const struct execsw', hence the double pointer here.
81 */
82 static const struct execsw **execsw =
83 (const struct execsw **)&execsw_set.ls_items[0];
84
85 #ifndef _SYS_SYSPROTO_H_
86 struct execve_args {
87 char *fname;
88 char **argv;
89 char **envv;
90 };
91 #endif
92
93 /*
94 * execve() system call.
95 */
96 int
97 execve(p, uap, retval)
98 struct proc *p;
99 register struct execve_args *uap;
100 int *retval;
101 {
102 struct nameidata nd, *ndp;
103 int *stack_base;
104 int error, len, i;
105 struct image_params image_params, *imgp;
106 struct vattr attr;
107 struct buf *bp = NULL;
108
109 imgp = &image_params;
110
111 /*
112 * Initialize part of the common data
113 */
114 imgp->proc = p;
115 imgp->uap = uap;
116 imgp->attr = &attr;
117 imgp->image_header = NULL;
118 imgp->argc = imgp->envc = 0;
119 imgp->argv0 = NULL;
120 imgp->entry_addr = 0;
121 imgp->vmspace_destroyed = 0;
122 imgp->interpreted = 0;
123 imgp->interpreter_name[0] = '\0';
124 imgp->auxargs = NULL;
125
126 /*
127 * Allocate temporary demand zeroed space for argument and
128 * environment strings
129 */
130 imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX);
131 if (imgp->stringbase == NULL) {
132 error = ENOMEM;
133 goto exec_fail;
134 }
135 imgp->stringp = imgp->stringbase;
136 imgp->stringspace = ARG_MAX;
137
138 /*
139 * Translate the file name. namei() returns a vnode pointer
140 * in ni_vp amoung other things.
141 */
142 ndp = &nd;
143 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
144 UIO_USERSPACE, uap->fname, p);
145
146 interpret:
147
148 error = namei(ndp);
149 if (error) {
150 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, ARG_MAX);
151 goto exec_fail;
152 }
153
154 imgp->vp = ndp->ni_vp;
155
156 /*
157 * Check file permissions (also 'opens' file)
158 */
159 error = exec_check_permissions(imgp);
160 if (error) {
161 VOP_UNLOCK(imgp->vp);
162 goto exec_fail_dealloc;
163 }
164
165 /* XXX temporary hack for CODA filesystem XXX */
166 #ifndef CFS
167 /*
168 * Get the image header, which we define here as meaning the first
169 * page of the executable.
170 */
171 if (imgp->vp->v_object && imgp->vp->v_mount &&
172 imgp->vp->v_mount->mnt_stat.f_iosize >= PAGE_SIZE &&
173 imgp->vp->v_object->un_pager.vnp.vnp_size >=
174 imgp->vp->v_mount->mnt_stat.f_iosize) {
175 /*
176 * Get a buffer with (at least) the first page.
177 */
178 error = bread(imgp->vp, 0, imgp->vp->v_mount->mnt_stat.f_iosize,
179 p->p_ucred, &bp);
180 imgp->image_header = bp->b_data;
181 } else
182 #endif
183 {
184 int resid;
185
186 /*
187 * The filesystem block size is too small, so do this the hard
188 * way. Malloc some space and read PAGE_SIZE worth of the image
189 * header into it.
190 */
191 imgp->image_header = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
192 error = vn_rdwr(UIO_READ, imgp->vp, (void *)imgp->image_header, PAGE_SIZE, 0,
193 UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid, p);
194 /*
195 * Clear out any remaining junk.
196 */
197 if (!error && resid)
198 bzero((char *)imgp->image_header + PAGE_SIZE - resid, resid);
199 }
200 VOP_UNLOCK(imgp->vp);
201 if (error)
202 goto exec_fail_dealloc;
203
204 /*
205 * Loop through list of image activators, calling each one.
206 * If there is no match, the activator returns -1. If there
207 * is a match, but there was an error during the activation,
208 * the error is returned. Otherwise 0 means success. If the
209 * image is interpreted, loop back up and try activating
210 * the interpreter.
211 */
212 for (i = 0; execsw[i]; ++i) {
213 if (execsw[i]->ex_imgact)
214 error = (*execsw[i]->ex_imgact)(imgp);
215 else
216 continue;
217 if (error == -1)
218 continue;
219 if (error)
220 goto exec_fail_dealloc;
221 if (imgp->interpreted) {
222 /* free old bp/image_header */
223 if (bp != NULL) {
224 brelse(bp);
225 bp = NULL;
226 } else
227 free((void *)imgp->image_header, M_TEMP);
228 imgp->image_header = NULL;
229 /* free old vnode and name buffer */
230 vrele(ndp->ni_vp);
231 FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI);
232 /* set new name to that of the interpreter */
233 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
234 UIO_SYSSPACE, imgp->interpreter_name, p);
235 goto interpret;
236 }
237 break;
238 }
239 /* If we made it through all the activators and none matched, exit. */
240 if (error == -1) {
241 error = ENOEXEC;
242 goto exec_fail_dealloc;
243 }
244
245 /*
246 * Copy out strings (args and env) and initialize stack base
247 */
248 stack_base = exec_copyout_strings(imgp);
249 p->p_vmspace->vm_minsaddr = (char *)stack_base;
250
251 /*
252 * If custom stack fixup routine present for this process
253 * let it do the stack setup.
254 * Else stuff argument count as first item on stack
255 */
256 if (p->p_sysent->sv_fixup)
257 (*p->p_sysent->sv_fixup)(&stack_base, imgp);
258 else
259 suword(--stack_base, imgp->argc);
260
261 /*
262 * For security and other reasons, the file descriptor table cannot
263 * be shared after an exec.
264 */
265 if (p->p_fd->fd_refcnt > 1) {
266 struct filedesc *tmp;
267
268 tmp = fdcopy(p);
269 fdfree(p);
270 p->p_fd = tmp;
271 }
272
273 /* close files on exec */
274 fdcloseexec(p);
275
276 /* reset caught signals */
277 execsigs(p);
278
279 /* name this process - nameiexec(p, ndp) */
280 len = min(ndp->ni_cnd.cn_namelen,MAXCOMLEN);
281 bcopy(ndp->ni_cnd.cn_nameptr, p->p_comm, len);
282 p->p_comm[len] = 0;
283
284 /*
285 * mark as execed, wakeup the process that vforked (if any) and tell
286 * it that it now has it's own resources back
287 */
288 p->p_flag |= P_EXEC;
289 if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
290 p->p_flag &= ~P_PPWAIT;
291 wakeup((caddr_t)p->p_pptr);
292 }
293
294 /*
295 * Implement image setuid/setgid.
296 *
297 * Don't honor setuid/setgid if the filesystem prohibits it or if
298 * the process is being traced.
299 */
300 if ((attr.va_mode & VSUID && p->p_ucred->cr_uid != attr.va_uid ||
301 attr.va_mode & VSGID && p->p_ucred->cr_gid != attr.va_gid) &&
302 (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
303 (p->p_flag & P_TRACED) == 0) {
304 /*
305 * Turn off syscall tracing for set-id programs, except for
306 * root. Record any set-id flags first to make sure that
307 * we do not regain any tracing during a possible block.
308 */
309 p->p_flag |= P_SUGID;
310 if (p->p_tracep && suser(p->p_ucred, &p->p_acflag)) {
311 p->p_traceflag = 0;
312 vrele(p->p_tracep);
313 p->p_tracep = NULL;
314 }
315 /*
316 * Set the new credentials.
317 */
318 p->p_ucred = crcopy(p->p_ucred);
319 if (attr.va_mode & VSUID)
320 p->p_ucred->cr_uid = attr.va_uid;
321 if (attr.va_mode & VSGID)
322 p->p_ucred->cr_gid = attr.va_gid;
323 setugidsafety(p);
324 } else {
325 if (p->p_ucred->cr_uid == p->p_cred->p_ruid &&
326 p->p_ucred->cr_gid == p->p_cred->p_rgid)
327 p->p_flag &= ~P_SUGID;
328 }
329
330 /*
331 * Implement correct POSIX saved-id behavior.
332 */
333 p->p_cred->p_svuid = p->p_ucred->cr_uid;
334 p->p_cred->p_svgid = p->p_ucred->cr_gid;
335
336 /*
337 * Store the vp for use in procfs
338 */
339 if (p->p_textvp) /* release old reference */
340 vrele(p->p_textvp);
341 VREF(ndp->ni_vp);
342 p->p_textvp = ndp->ni_vp;
343
344 /*
345 * If tracing the process, trap to debugger so breakpoints
346 * can be set before the program executes.
347 */
348 if (p->p_flag & P_TRACED)
349 psignal(p, SIGTRAP);
350
351 /* clear "fork but no exec" flag, as we _are_ execing */
352 p->p_acflag &= ~AFORK;
353
354 /* Set entry address */
355 setregs(p, imgp->entry_addr, (u_long)stack_base);
356
357 /*
358 * free various allocated resources
359 */
360 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, ARG_MAX);
361 if (bp != NULL)
362 brelse(bp);
363 else if (imgp->image_header != NULL)
364 free((void *)imgp->image_header, M_TEMP);
365 vrele(ndp->ni_vp);
366 FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI);
367
368 return (0);
369
370 exec_fail_dealloc:
371 if (imgp->stringbase != NULL)
372 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, ARG_MAX);
373 if (bp != NULL)
374 brelse(bp);
375 else if (imgp->image_header != NULL)
376 free((void *)imgp->image_header, M_TEMP);
377 if (ndp->ni_vp) {
378 vrele(ndp->ni_vp);
379 FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI);
380 }
381
382 exec_fail:
383 if (imgp->vmspace_destroyed) {
384 /* sorry, no more process anymore. exit gracefully */
385 exit1(p, W_EXITCODE(0, SIGABRT));
386 /* NOT REACHED */
387 return(0);
388 } else {
389 return(error);
390 }
391 }
392
393 /*
394 * Destroy old address space, and allocate a new stack
395 * The new stack is only SGROWSIZ large because it is grown
396 * automatically in trap.c.
397 */
398 int
399 exec_new_vmspace(imgp)
400 struct image_params *imgp;
401 {
402 int error;
403 struct vmspace *vmspace = imgp->proc->p_vmspace;
404 caddr_t stack_addr = (caddr_t) (USRSTACK - SGROWSIZ);
405
406 imgp->vmspace_destroyed = 1;
407
408 /* Blow away entire process VM */
409 if (vmspace->vm_shm)
410 shmexit(imgp->proc);
411 pmap_remove_pages(&vmspace->vm_pmap, 0, USRSTACK);
412 vm_map_remove(&vmspace->vm_map, 0, USRSTACK);
413
414 /* Allocate a new stack */
415 error = vm_map_find(&vmspace->vm_map, NULL, 0, (vm_offset_t *)&stack_addr,
416 SGROWSIZ, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0);
417 if (error)
418 return(error);
419
420 vmspace->vm_ssize = SGROWSIZ >> PAGE_SHIFT;
421
422 /* Initialize maximum stack address */
423 vmspace->vm_maxsaddr = (char *)USRSTACK - MAXSSIZ;
424
425 return(0);
426 }
427
428 /*
429 * Copy out argument and environment strings from the old process
430 * address space into the temporary string buffer.
431 */
432 int
433 exec_extract_strings(imgp)
434 struct image_params *imgp;
435 {
436 char **argv, **envv;
437 char *argp, *envp;
438 int error, length;
439
440 /*
441 * extract arguments first
442 */
443
444 argv = imgp->uap->argv;
445
446 if (argv) {
447 argp = (caddr_t) fuword(argv);
448 if (argp == (caddr_t) -1)
449 return (EFAULT);
450 if (argp)
451 argv++;
452 if (imgp->argv0)
453 argp = imgp->argv0;
454 if (argp) {
455 do {
456 if (argp == (caddr_t) -1)
457 return (EFAULT);
458 if ((error = copyinstr(argp, imgp->stringp,
459 imgp->stringspace, &length))) {
460 if (error == ENAMETOOLONG)
461 return(E2BIG);
462 return (error);
463 }
464 imgp->stringspace -= length;
465 imgp->stringp += length;
466 imgp->argc++;
467 } while ((argp = (caddr_t) fuword(argv++)));
468 }
469 }
470
471 /*
472 * extract environment strings
473 */
474
475 envv = imgp->uap->envv;
476
477 if (envv) {
478 while ((envp = (caddr_t) fuword(envv++))) {
479 if (envp == (caddr_t) -1)
480 return (EFAULT);
481 if ((error = copyinstr(envp, imgp->stringp,
482 imgp->stringspace, &length))) {
483 if (error == ENAMETOOLONG)
484 return(E2BIG);
485 return (error);
486 }
487 imgp->stringspace -= length;
488 imgp->stringp += length;
489 imgp->envc++;
490 }
491 }
492
493 return (0);
494 }
495
496 /*
497 * Copy strings out to the new process address space, constructing
498 * new arg and env vector tables. Return a pointer to the base
499 * so that it can be used as the initial stack pointer.
500 */
501 int *
502 exec_copyout_strings(imgp)
503 struct image_params *imgp;
504 {
505 int argc, envc;
506 char **vectp;
507 char *stringp, *destp;
508 int *stack_base;
509 struct ps_strings *arginfo;
510 int szsigcode;
511
512 /*
513 * Calculate string base and vector table pointers.
514 * Also deal with signal trampoline code for this exec type.
515 */
516 arginfo = PS_STRINGS;
517 szsigcode = *(imgp->proc->p_sysent->sv_szsigcode);
518 destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE -
519 roundup((ARG_MAX - imgp->stringspace), sizeof(char *));
520
521 /*
522 * install sigcode
523 */
524 if (szsigcode)
525 copyout(imgp->proc->p_sysent->sv_sigcode,
526 ((caddr_t)arginfo - szsigcode), szsigcode);
527
528 /*
529 * If we have a valid auxargs ptr, prepare some room
530 * on the stack.
531 */
532 if (imgp->auxargs)
533 /*
534 * The '+ 2' is for the null pointers at the end of each of the
535 * arg and env vector sets, and 'AT_COUNT*2' is room for the
536 * ELF Auxargs data.
537 */
538 vectp = (char **)(destp - (imgp->argc + imgp->envc + 2 +
539 AT_COUNT*2) * sizeof(char*));
540 else
541 /*
542 * The '+ 2' is for the null pointers at the end of each of the
543 * arg and env vector sets
544 */
545 vectp = (char **)
546 (destp - (imgp->argc + imgp->envc + 2) * sizeof(char*));
547
548 /*
549 * vectp also becomes our initial stack base
550 */
551 stack_base = (int *)vectp;
552
553 stringp = imgp->stringbase;
554 argc = imgp->argc;
555 envc = imgp->envc;
556
557 /*
558 * Copy out strings - arguments and environment.
559 */
560 copyout(stringp, destp, ARG_MAX - imgp->stringspace);
561
562 /*
563 * Fill in "ps_strings" struct for ps, w, etc.
564 */
565 suword(&arginfo->ps_argvstr, (int)vectp);
566 suword(&arginfo->ps_nargvstr, argc);
567
568 /*
569 * Fill in argument portion of vector table.
570 */
571 for (; argc > 0; --argc) {
572 suword(vectp++, (int)destp);
573 while (*stringp++ != 0)
574 destp++;
575 destp++;
576 }
577
578 /* a null vector table pointer seperates the argp's from the envp's */
579 suword(vectp++, 0);
580
581 suword(&arginfo->ps_envstr, (int)vectp);
582 suword(&arginfo->ps_nenvstr, envc);
583
584 /*
585 * Fill in environment portion of vector table.
586 */
587 for (; envc > 0; --envc) {
588 suword(vectp++, (int)destp);
589 while (*stringp++ != 0)
590 destp++;
591 destp++;
592 }
593
594 /* end of vector table is a null pointer */
595 suword(vectp, 0);
596
597 return (stack_base);
598 }
599
600 /*
601 * Check permissions of file to execute.
602 * Return 0 for success or error code on failure.
603 */
604 static int
605 exec_check_permissions(imgp)
606 struct image_params *imgp;
607 {
608 struct proc *p = imgp->proc;
609 struct vnode *vp = imgp->vp;
610 struct vattr *attr = imgp->attr;
611 int error;
612
613 /* Get file attributes */
614 error = VOP_GETATTR(vp, attr, p->p_ucred, p);
615 if (error)
616 return (error);
617
618 /*
619 * 1) Check if file execution is disabled for the filesystem that this
620 * file resides on.
621 * 2) Insure that at least one execute bit is on - otherwise root
622 * will always succeed, and we don't want to happen unless the
623 * file really is executable.
624 * 3) Insure that the file is a regular file.
625 */
626 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
627 ((attr->va_mode & 0111) == 0) ||
628 (attr->va_type != VREG)) {
629 return (EACCES);
630 }
631
632 /*
633 * Zero length files can't be exec'd
634 */
635 if (attr->va_size == 0)
636 return (ENOEXEC);
637
638 /*
639 * Check for execute permission to file based on current credentials.
640 */
641 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
642 if (error)
643 return (error);
644
645 /*
646 * Check number of open-for-writes on the file and deny execution
647 * if there are any.
648 */
649 if (vp->v_writecount)
650 return (ETXTBSY);
651
652 /*
653 * Call filesystem specific open routine (which does nothing in the
654 * general case).
655 */
656 error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
657 if (error)
658 return (error);
659
660 return (0);
661 }
Cache object: d12814b5c06439306cd269ce6928e70a
|