1 /*-
2 * Copyright (c) 1994-1996 Søren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: releng/5.1/sys/i386/linux/linux_sysvec.c 114983 2003-05-13 20:36:02Z jhb $
29 */
30
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/sysent.h>
50 #include <sys/sysproto.h>
51 #include <sys/user.h>
52 #include <sys/vnode.h>
53
54 #include <vm/vm.h>
55 #include <vm/vm_param.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_extern.h>
58 #include <sys/exec.h>
59 #include <sys/kernel.h>
60 #include <sys/module.h>
61 #include <machine/cpu.h>
62 #include <machine/md_var.h>
63 #include <sys/mutex.h>
64
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <vm/pmap.h>
68 #include <vm/vm_map.h>
69 #include <vm/vm_object.h>
70
71 #include <i386/linux/linux.h>
72 #include <i386/linux/linux_proto.h>
73 #include <compat/linux/linux_mib.h>
74 #include <compat/linux/linux_signal.h>
75 #include <compat/linux/linux_util.h>
76
77 MODULE_VERSION(linux, 1);
78 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
79 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
80 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
81
82 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
83
84 #if BYTE_ORDER == LITTLE_ENDIAN
85 #define SHELLMAGIC 0x2123 /* #! */
86 #else
87 #define SHELLMAGIC 0x2321
88 #endif
89
90 /*
91 * Allow the sendsig functions to use the ldebug() facility
92 * even though they are not syscalls themselves. Map them
93 * to syscall 0. This is slightly less bogus than using
94 * ldebug(sigreturn).
95 */
96 #define LINUX_SYS_linux_rt_sendsig 0
97 #define LINUX_SYS_linux_sendsig 0
98
99 extern char linux_sigcode[];
100 extern int linux_szsigcode;
101
102 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
103
104 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
105
106 static int linux_fixup(register_t **stack_base,
107 struct image_params *iparams);
108 static int elf_linux_fixup(register_t **stack_base,
109 struct image_params *iparams);
110 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
111 caddr_t *params);
112 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
113 u_long code);
114 static void exec_linux_setregs(struct thread *td, u_long entry,
115 u_long stack, u_long ps_strings);
116
117 /*
118 * Linux syscalls return negative errno's, we do positive and map them
119 */
120 static int bsd_to_linux_errno[ELAST + 1] = {
121 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
122 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
123 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
124 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
125 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
126 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
127 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
128 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
129 -6, -6, -43, -42, -75, -6, -84
130 };
131
132 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
133 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
134 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
135 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
136 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
137 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
138 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
139 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
140 0, LINUX_SIGUSR1, LINUX_SIGUSR2
141 };
142
143 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
144 SIGHUP, SIGINT, SIGQUIT, SIGILL,
145 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
146 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
147 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
148 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
149 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
150 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
151 SIGIO, SIGURG, SIGSYS
152 };
153
154 #define LINUX_T_UNKNOWN 255
155 static int _bsd_to_linux_trapcode[] = {
156 LINUX_T_UNKNOWN, /* 0 */
157 6, /* 1 T_PRIVINFLT */
158 LINUX_T_UNKNOWN, /* 2 */
159 3, /* 3 T_BPTFLT */
160 LINUX_T_UNKNOWN, /* 4 */
161 LINUX_T_UNKNOWN, /* 5 */
162 16, /* 6 T_ARITHTRAP */
163 254, /* 7 T_ASTFLT */
164 LINUX_T_UNKNOWN, /* 8 */
165 13, /* 9 T_PROTFLT */
166 1, /* 10 T_TRCTRAP */
167 LINUX_T_UNKNOWN, /* 11 */
168 14, /* 12 T_PAGEFLT */
169 LINUX_T_UNKNOWN, /* 13 */
170 17, /* 14 T_ALIGNFLT */
171 LINUX_T_UNKNOWN, /* 15 */
172 LINUX_T_UNKNOWN, /* 16 */
173 LINUX_T_UNKNOWN, /* 17 */
174 0, /* 18 T_DIVIDE */
175 2, /* 19 T_NMI */
176 4, /* 20 T_OFLOW */
177 5, /* 21 T_BOUND */
178 7, /* 22 T_DNA */
179 8, /* 23 T_DOUBLEFLT */
180 9, /* 24 T_FPOPFLT */
181 10, /* 25 T_TSSFLT */
182 11, /* 26 T_SEGNPFLT */
183 12, /* 27 T_STKFLT */
184 18, /* 28 T_MCHK */
185 19, /* 29 T_XMMFLT */
186 15 /* 30 T_RESERVED */
187 };
188 #define bsd_to_linux_trapcode(code) \
189 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
190 _bsd_to_linux_trapcode[(code)]: \
191 LINUX_T_UNKNOWN)
192
193 /*
194 * If FreeBSD & Linux have a difference of opinion about what a trap
195 * means, deal with it here.
196 *
197 * MPSAFE
198 */
199 static int
200 translate_traps(int signal, int trap_code)
201 {
202 if (signal != SIGBUS)
203 return signal;
204 switch (trap_code) {
205 case T_PROTFLT:
206 case T_TSSFLT:
207 case T_DOUBLEFLT:
208 case T_PAGEFLT:
209 return SIGSEGV;
210 default:
211 return signal;
212 }
213 }
214
215 static int
216 linux_fixup(register_t **stack_base, struct image_params *imgp)
217 {
218 register_t *argv, *envp;
219
220 argv = *stack_base;
221 envp = *stack_base + (imgp->argc + 1);
222 (*stack_base)--;
223 **stack_base = (intptr_t)(void *)envp;
224 (*stack_base)--;
225 **stack_base = (intptr_t)(void *)argv;
226 (*stack_base)--;
227 **stack_base = imgp->argc;
228 return 0;
229 }
230
231 static int
232 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
233 {
234 Elf32_Auxargs *args;
235 register_t *pos;
236
237 KASSERT(curthread->td_proc == imgp->proc &&
238 (curthread->td_proc->p_flag & P_THREADED) == 0,
239 ("unsafe elf_linux_fixup(), should be curproc"));
240 args = (Elf32_Auxargs *)imgp->auxargs;
241 pos = *stack_base + (imgp->argc + imgp->envc + 2);
242
243 if (args->trace)
244 AUXARGS_ENTRY(pos, AT_DEBUG, 1);
245 if (args->execfd != -1)
246 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
247 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
248 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
249 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
250 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
251 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
252 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
253 AUXARGS_ENTRY(pos, AT_BASE, args->base);
254 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
255 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
256 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
257 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
258 AUXARGS_ENTRY(pos, AT_NULL, 0);
259
260 free(imgp->auxargs, M_TEMP);
261 imgp->auxargs = NULL;
262
263 (*stack_base)--;
264 **stack_base = (register_t)imgp->argc;
265 return 0;
266 }
267
268 extern int _ucodesel, _udatasel;
269 extern unsigned long linux_sznonrtsigcode;
270
271 static void
272 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
273 {
274 struct thread *td = curthread;
275 struct proc *p = td->td_proc;
276 struct sigacts *psp;
277 struct trapframe *regs;
278 struct l_rt_sigframe *fp, frame;
279 int oonstack;
280
281 PROC_LOCK_ASSERT(p, MA_OWNED);
282 psp = p->p_sigacts;
283 mtx_assert(&psp->ps_mtx, MA_OWNED);
284 regs = td->td_frame;
285 oonstack = sigonstack(regs->tf_esp);
286
287 #ifdef DEBUG
288 if (ldebug(rt_sendsig))
289 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
290 catcher, sig, (void*)mask, code);
291 #endif
292 /*
293 * Allocate space for the signal handler context.
294 */
295 if ((p->p_flag & P_ALTSTACK) && !oonstack &&
296 SIGISMEMBER(psp->ps_sigonstack, sig)) {
297 fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
298 p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
299 } else
300 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
301 mtx_unlock(&psp->ps_mtx);
302
303 /*
304 * Build the argument list for the signal handler.
305 */
306 if (p->p_sysent->sv_sigtbl)
307 if (sig <= p->p_sysent->sv_sigsize)
308 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
309
310 bzero(&frame, sizeof(frame));
311
312 frame.sf_handler = catcher;
313 frame.sf_sig = sig;
314 frame.sf_siginfo = &fp->sf_si;
315 frame.sf_ucontext = &fp->sf_sc;
316
317 /* Fill in POSIX parts */
318 frame.sf_si.lsi_signo = sig;
319 frame.sf_si.lsi_code = code;
320 frame.sf_si.lsi_addr = (void *)regs->tf_err;
321
322 /*
323 * Build the signal context to be used by sigreturn.
324 */
325 frame.sf_sc.uc_flags = 0; /* XXX ??? */
326 frame.sf_sc.uc_link = NULL; /* XXX ??? */
327
328 frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
329 frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
330 frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
331 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
332 PROC_UNLOCK(p);
333
334 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
335
336 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
337 frame.sf_sc.uc_mcontext.sc_gs = rgs();
338 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
339 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
340 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
341 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
342 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
343 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
344 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
345 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
346 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
347 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
348 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
349 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
350 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
351 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
352 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
353 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
354 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
355
356 #ifdef DEBUG
357 if (ldebug(rt_sendsig))
358 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
359 frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
360 p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
361 #endif
362
363 if (copyout(&frame, fp, sizeof(frame)) != 0) {
364 /*
365 * Process has trashed its stack; give it an illegal
366 * instruction to halt it in its tracks.
367 */
368 #ifdef DEBUG
369 if (ldebug(rt_sendsig))
370 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
371 fp, oonstack);
372 #endif
373 PROC_LOCK(p);
374 sigexit(td, SIGILL);
375 }
376
377 /*
378 * Build context to run handler in.
379 */
380 regs->tf_esp = (int)fp;
381 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
382 linux_sznonrtsigcode;
383 regs->tf_eflags &= ~(PSL_T | PSL_VM);
384 regs->tf_cs = _ucodesel;
385 regs->tf_ds = _udatasel;
386 regs->tf_es = _udatasel;
387 regs->tf_fs = _udatasel;
388 regs->tf_ss = _udatasel;
389 PROC_LOCK(p);
390 mtx_lock(&psp->ps_mtx);
391 }
392
393
394 /*
395 * Send an interrupt to process.
396 *
397 * Stack is set up to allow sigcode stored
398 * in u. to call routine, followed by kcall
399 * to sigreturn routine below. After sigreturn
400 * resets the signal mask, the stack, and the
401 * frame pointer, it returns to the user
402 * specified pc, psl.
403 */
404 static void
405 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
406 {
407 struct thread *td = curthread;
408 struct proc *p = td->td_proc;
409 struct sigacts *psp;
410 struct trapframe *regs;
411 struct l_sigframe *fp, frame;
412 l_sigset_t lmask;
413 int oonstack, i;
414
415 PROC_LOCK_ASSERT(p, MA_OWNED);
416 psp = p->p_sigacts;
417 mtx_assert(&psp->ps_mtx, MA_OWNED);
418 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
419 /* Signal handler installed with SA_SIGINFO. */
420 linux_rt_sendsig(catcher, sig, mask, code);
421 return;
422 }
423
424 regs = td->td_frame;
425 oonstack = sigonstack(regs->tf_esp);
426
427 #ifdef DEBUG
428 if (ldebug(sendsig))
429 printf(ARGS(sendsig, "%p, %d, %p, %lu"),
430 catcher, sig, (void*)mask, code);
431 #endif
432
433 /*
434 * Allocate space for the signal handler context.
435 */
436 if ((p->p_flag & P_ALTSTACK) && !oonstack &&
437 SIGISMEMBER(psp->ps_sigonstack, sig)) {
438 fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
439 p->p_sigstk.ss_size - sizeof(struct l_sigframe));
440 } else
441 fp = (struct l_sigframe *)regs->tf_esp - 1;
442 mtx_unlock(&psp->ps_mtx);
443 PROC_UNLOCK(p);
444
445 /*
446 * Build the argument list for the signal handler.
447 */
448 if (p->p_sysent->sv_sigtbl)
449 if (sig <= p->p_sysent->sv_sigsize)
450 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
451
452 bzero(&frame, sizeof(frame));
453
454 frame.sf_handler = catcher;
455 frame.sf_sig = sig;
456
457 bsd_to_linux_sigset(mask, &lmask);
458
459 /*
460 * Build the signal context to be used by sigreturn.
461 */
462 frame.sf_sc.sc_mask = lmask.__bits[0];
463 frame.sf_sc.sc_gs = rgs();
464 frame.sf_sc.sc_fs = regs->tf_fs;
465 frame.sf_sc.sc_es = regs->tf_es;
466 frame.sf_sc.sc_ds = regs->tf_ds;
467 frame.sf_sc.sc_edi = regs->tf_edi;
468 frame.sf_sc.sc_esi = regs->tf_esi;
469 frame.sf_sc.sc_ebp = regs->tf_ebp;
470 frame.sf_sc.sc_ebx = regs->tf_ebx;
471 frame.sf_sc.sc_edx = regs->tf_edx;
472 frame.sf_sc.sc_ecx = regs->tf_ecx;
473 frame.sf_sc.sc_eax = regs->tf_eax;
474 frame.sf_sc.sc_eip = regs->tf_eip;
475 frame.sf_sc.sc_cs = regs->tf_cs;
476 frame.sf_sc.sc_eflags = regs->tf_eflags;
477 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
478 frame.sf_sc.sc_ss = regs->tf_ss;
479 frame.sf_sc.sc_err = regs->tf_err;
480 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
481
482 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
483 frame.sf_extramask[i] = lmask.__bits[i+1];
484
485 if (copyout(&frame, fp, sizeof(frame)) != 0) {
486 /*
487 * Process has trashed its stack; give it an illegal
488 * instruction to halt it in its tracks.
489 */
490 PROC_LOCK(p);
491 sigexit(td, SIGILL);
492 }
493
494 /*
495 * Build context to run handler in.
496 */
497 regs->tf_esp = (int)fp;
498 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
499 regs->tf_eflags &= ~(PSL_T | PSL_VM);
500 regs->tf_cs = _ucodesel;
501 regs->tf_ds = _udatasel;
502 regs->tf_es = _udatasel;
503 regs->tf_fs = _udatasel;
504 regs->tf_ss = _udatasel;
505 PROC_LOCK(p);
506 mtx_lock(&psp->ps_mtx);
507 }
508
509 /*
510 * System call to cleanup state after a signal
511 * has been taken. Reset signal mask and
512 * stack state from context left by sendsig (above).
513 * Return to previous pc and psl as specified by
514 * context left by sendsig. Check carefully to
515 * make sure that the user has not modified the
516 * psl to gain improper privileges or to cause
517 * a machine fault.
518 */
519 int
520 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
521 {
522 struct proc *p = td->td_proc;
523 struct l_sigframe frame;
524 struct trapframe *regs;
525 l_sigset_t lmask;
526 int eflags, i;
527
528 regs = td->td_frame;
529
530 #ifdef DEBUG
531 if (ldebug(sigreturn))
532 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
533 #endif
534 /*
535 * The trampoline code hands us the sigframe.
536 * It is unsafe to keep track of it ourselves, in the event that a
537 * program jumps out of a signal handler.
538 */
539 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
540 return (EFAULT);
541
542 /*
543 * Check for security violations.
544 */
545 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
546 eflags = frame.sf_sc.sc_eflags;
547 /*
548 * XXX do allow users to change the privileged flag PSL_RF. The
549 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
550 * sometimes set it there too. tf_eflags is kept in the signal
551 * context during signal handling and there is no other place
552 * to remember it, so the PSL_RF bit may be corrupted by the
553 * signal handler without us knowing. Corruption of the PSL_RF
554 * bit at worst causes one more or one less debugger trap, so
555 * allowing it is fairly harmless.
556 */
557 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
558 return(EINVAL);
559
560 /*
561 * Don't allow users to load a valid privileged %cs. Let the
562 * hardware check for invalid selectors, excess privilege in
563 * other selectors, invalid %eip's and invalid %esp's.
564 */
565 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
566 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
567 trapsignal(td, SIGBUS, T_PROTFLT);
568 return(EINVAL);
569 }
570
571 lmask.__bits[0] = frame.sf_sc.sc_mask;
572 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
573 lmask.__bits[i+1] = frame.sf_extramask[i];
574 PROC_LOCK(p);
575 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
576 SIG_CANTMASK(td->td_sigmask);
577 signotify(td);
578 PROC_UNLOCK(p);
579
580 /*
581 * Restore signal context.
582 */
583 /* %gs was restored by the trampoline. */
584 regs->tf_fs = frame.sf_sc.sc_fs;
585 regs->tf_es = frame.sf_sc.sc_es;
586 regs->tf_ds = frame.sf_sc.sc_ds;
587 regs->tf_edi = frame.sf_sc.sc_edi;
588 regs->tf_esi = frame.sf_sc.sc_esi;
589 regs->tf_ebp = frame.sf_sc.sc_ebp;
590 regs->tf_ebx = frame.sf_sc.sc_ebx;
591 regs->tf_edx = frame.sf_sc.sc_edx;
592 regs->tf_ecx = frame.sf_sc.sc_ecx;
593 regs->tf_eax = frame.sf_sc.sc_eax;
594 regs->tf_eip = frame.sf_sc.sc_eip;
595 regs->tf_cs = frame.sf_sc.sc_cs;
596 regs->tf_eflags = eflags;
597 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
598 regs->tf_ss = frame.sf_sc.sc_ss;
599
600 return (EJUSTRETURN);
601 }
602
603 /*
604 * System call to cleanup state after a signal
605 * has been taken. Reset signal mask and
606 * stack state from context left by rt_sendsig (above).
607 * Return to previous pc and psl as specified by
608 * context left by sendsig. Check carefully to
609 * make sure that the user has not modified the
610 * psl to gain improper privileges or to cause
611 * a machine fault.
612 */
613 int
614 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
615 {
616 struct proc *p = td->td_proc;
617 struct l_ucontext uc;
618 struct l_sigcontext *context;
619 l_stack_t *lss;
620 stack_t ss;
621 struct trapframe *regs;
622 int eflags;
623
624 regs = td->td_frame;
625
626 #ifdef DEBUG
627 if (ldebug(rt_sigreturn))
628 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
629 #endif
630 /*
631 * The trampoline code hands us the ucontext.
632 * It is unsafe to keep track of it ourselves, in the event that a
633 * program jumps out of a signal handler.
634 */
635 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
636 return (EFAULT);
637
638 context = &uc.uc_mcontext;
639
640 /*
641 * Check for security violations.
642 */
643 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
644 eflags = context->sc_eflags;
645 /*
646 * XXX do allow users to change the privileged flag PSL_RF. The
647 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
648 * sometimes set it there too. tf_eflags is kept in the signal
649 * context during signal handling and there is no other place
650 * to remember it, so the PSL_RF bit may be corrupted by the
651 * signal handler without us knowing. Corruption of the PSL_RF
652 * bit at worst causes one more or one less debugger trap, so
653 * allowing it is fairly harmless.
654 */
655 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
656 return(EINVAL);
657
658 /*
659 * Don't allow users to load a valid privileged %cs. Let the
660 * hardware check for invalid selectors, excess privilege in
661 * other selectors, invalid %eip's and invalid %esp's.
662 */
663 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
664 if (!CS_SECURE(context->sc_cs)) {
665 trapsignal(td, SIGBUS, T_PROTFLT);
666 return(EINVAL);
667 }
668
669 PROC_LOCK(p);
670 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
671 SIG_CANTMASK(td->td_sigmask);
672 signotify(td);
673 PROC_UNLOCK(p);
674
675 /*
676 * Restore signal context
677 */
678 /* %gs was restored by the trampoline. */
679 regs->tf_fs = context->sc_fs;
680 regs->tf_es = context->sc_es;
681 regs->tf_ds = context->sc_ds;
682 regs->tf_edi = context->sc_edi;
683 regs->tf_esi = context->sc_esi;
684 regs->tf_ebp = context->sc_ebp;
685 regs->tf_ebx = context->sc_ebx;
686 regs->tf_edx = context->sc_edx;
687 regs->tf_ecx = context->sc_ecx;
688 regs->tf_eax = context->sc_eax;
689 regs->tf_eip = context->sc_eip;
690 regs->tf_cs = context->sc_cs;
691 regs->tf_eflags = eflags;
692 regs->tf_esp = context->sc_esp_at_signal;
693 regs->tf_ss = context->sc_ss;
694
695 /*
696 * call sigaltstack & ignore results..
697 */
698 lss = &uc.uc_stack;
699 ss.ss_sp = lss->ss_sp;
700 ss.ss_size = lss->ss_size;
701 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
702
703 #ifdef DEBUG
704 if (ldebug(rt_sigreturn))
705 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
706 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
707 #endif
708 (void)kern_sigaltstack(td, &ss, NULL);
709
710 return (EJUSTRETURN);
711 }
712
713 /*
714 * MPSAFE
715 */
716 static void
717 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
718 {
719 args[0] = tf->tf_ebx;
720 args[1] = tf->tf_ecx;
721 args[2] = tf->tf_edx;
722 args[3] = tf->tf_esi;
723 args[4] = tf->tf_edi;
724 args[5] = tf->tf_ebp; /* Unconfirmed */
725 *params = NULL; /* no copyin */
726 }
727
728
729
730 /*
731 * Dump core, into a file named as described in the comments for
732 * expand_name(), unless the process was setuid/setgid.
733 */
734 static int
735 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit)
736 {
737 struct proc *p = td->td_proc;
738 struct ucred *cred = td->td_ucred;
739 struct vmspace *vm = p->p_vmspace;
740 char *tempuser;
741 int error;
742
743 if (ctob((uarea_pages + kstack_pages) +
744 vm->vm_dsize + vm->vm_ssize) >= limit)
745 return (EFAULT);
746 tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP,
747 M_WAITOK | M_ZERO);
748 if (tempuser == NULL)
749 return (ENOMEM);
750 PROC_LOCK(p);
751 fill_kinfo_proc(p, &p->p_uarea->u_kproc);
752 PROC_UNLOCK(p);
753 bcopy(p->p_uarea, tempuser, sizeof(struct user));
754 bcopy(td->td_frame,
755 tempuser + ctob(uarea_pages) +
756 ((caddr_t)td->td_frame - (caddr_t)td->td_kstack),
757 sizeof(struct trapframe));
758 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser,
759 ctob(uarea_pages + kstack_pages),
760 (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED,
761 (int *)NULL, td);
762 free(tempuser, M_TEMP);
763 if (error == 0)
764 error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
765 (int)ctob(vm->vm_dsize),
766 (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE,
767 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
768 if (error == 0)
769 error = vn_rdwr_inchunks(UIO_WRITE, vp,
770 (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)),
771 round_page(ctob(vm->vm_ssize)),
772 (off_t)ctob(uarea_pages + kstack_pages) +
773 ctob(vm->vm_dsize), UIO_USERSPACE,
774 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
775 return (error);
776 }
777 /*
778 * If a linux binary is exec'ing something, try this image activator
779 * first. We override standard shell script execution in order to
780 * be able to modify the interpreter path. We only do this if a linux
781 * binary is doing the exec, so we do not create an EXEC module for it.
782 */
783 static int exec_linux_imgact_try(struct image_params *iparams);
784
785 static int
786 exec_linux_imgact_try(struct image_params *imgp)
787 {
788 const char *head = (const char *)imgp->image_header;
789 int error = -1;
790
791 /*
792 * The interpreter for shell scripts run from a linux binary needs
793 * to be located in /compat/linux if possible in order to recursively
794 * maintain linux path emulation.
795 */
796 if (((const short *)head)[0] == SHELLMAGIC) {
797 /*
798 * Run our normal shell image activator. If it succeeds attempt
799 * to use the alternate path for the interpreter. If an alternate
800 * path is found, use our stringspace to store it.
801 */
802 if ((error = exec_shell_imgact(imgp)) == 0) {
803 char *rpath = NULL;
804
805 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
806 imgp->interpreter_name, &rpath, 0);
807 if (rpath != imgp->interpreter_name) {
808 int len = strlen(rpath) + 1;
809
810 if (len <= MAXSHELLCMDLEN) {
811 memcpy(imgp->interpreter_name, rpath, len);
812 }
813 free(rpath, M_TEMP);
814 }
815 }
816 }
817 return(error);
818 }
819
820 /*
821 * exec_setregs may initialize some registers differently than Linux
822 * does, thus potentially confusing Linux binaries. If necessary, we
823 * override the exec_setregs default(s) here.
824 */
825 static void
826 exec_linux_setregs(struct thread *td, u_long entry,
827 u_long stack, u_long ps_strings)
828 {
829 struct pcb *pcb = td->td_pcb;
830
831 exec_setregs(td, entry, stack, ps_strings);
832
833 /* Linux sets %gs to 0, we default to _udatasel */
834 pcb->pcb_gs = 0; load_gs(0);
835 }
836
837 struct sysentvec linux_sysvec = {
838 LINUX_SYS_MAXSYSCALL,
839 linux_sysent,
840 0xff,
841 LINUX_SIGTBLSZ,
842 bsd_to_linux_signal,
843 ELAST + 1,
844 bsd_to_linux_errno,
845 translate_traps,
846 linux_fixup,
847 linux_sendsig,
848 linux_sigcode,
849 &linux_szsigcode,
850 linux_prepsyscall,
851 "Linux a.out",
852 linux_aout_coredump,
853 exec_linux_imgact_try,
854 LINUX_MINSIGSTKSZ,
855 PAGE_SIZE,
856 VM_MIN_ADDRESS,
857 VM_MAXUSER_ADDRESS,
858 USRSTACK,
859 PS_STRINGS,
860 VM_PROT_ALL,
861 exec_copyout_strings,
862 exec_linux_setregs
863 };
864
865 struct sysentvec elf_linux_sysvec = {
866 LINUX_SYS_MAXSYSCALL,
867 linux_sysent,
868 0xff,
869 LINUX_SIGTBLSZ,
870 bsd_to_linux_signal,
871 ELAST + 1,
872 bsd_to_linux_errno,
873 translate_traps,
874 elf_linux_fixup,
875 linux_sendsig,
876 linux_sigcode,
877 &linux_szsigcode,
878 linux_prepsyscall,
879 "Linux ELF",
880 elf32_coredump,
881 exec_linux_imgact_try,
882 LINUX_MINSIGSTKSZ,
883 PAGE_SIZE,
884 VM_MIN_ADDRESS,
885 VM_MAXUSER_ADDRESS,
886 USRSTACK,
887 PS_STRINGS,
888 VM_PROT_ALL,
889 exec_copyout_strings,
890 exec_linux_setregs
891 };
892
893 static Elf32_Brandinfo linux_brand = {
894 ELFOSABI_LINUX,
895 EM_386,
896 "Linux",
897 "/compat/linux",
898 "/lib/ld-linux.so.1",
899 &elf_linux_sysvec
900 };
901
902 static Elf32_Brandinfo linux_glibc2brand = {
903 ELFOSABI_LINUX,
904 EM_386,
905 "Linux",
906 "/compat/linux",
907 "/lib/ld-linux.so.2",
908 &elf_linux_sysvec
909 };
910
911 Elf32_Brandinfo *linux_brandlist[] = {
912 &linux_brand,
913 &linux_glibc2brand,
914 NULL
915 };
916
917 static int
918 linux_elf_modevent(module_t mod, int type, void *data)
919 {
920 Elf32_Brandinfo **brandinfo;
921 int error;
922 struct linux_ioctl_handler **lihp;
923
924 error = 0;
925
926 switch(type) {
927 case MOD_LOAD:
928 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
929 ++brandinfo)
930 if (elf32_insert_brand_entry(*brandinfo) < 0)
931 error = EINVAL;
932 if (error == 0) {
933 SET_FOREACH(lihp, linux_ioctl_handler_set)
934 linux_ioctl_register_handler(*lihp);
935 if (bootverbose)
936 printf("Linux ELF exec handler installed\n");
937 } else
938 printf("cannot insert Linux ELF brand handler\n");
939 break;
940 case MOD_UNLOAD:
941 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
942 ++brandinfo)
943 if (elf32_brand_inuse(*brandinfo))
944 error = EBUSY;
945 if (error == 0) {
946 for (brandinfo = &linux_brandlist[0];
947 *brandinfo != NULL; ++brandinfo)
948 if (elf32_remove_brand_entry(*brandinfo) < 0)
949 error = EINVAL;
950 }
951 if (error == 0) {
952 SET_FOREACH(lihp, linux_ioctl_handler_set)
953 linux_ioctl_unregister_handler(*lihp);
954 if (bootverbose)
955 printf("Linux ELF exec handler removed\n");
956 linux_mib_destroy();
957 } else
958 printf("Could not deinstall ELF interpreter entry\n");
959 break;
960 default:
961 break;
962 }
963 return error;
964 }
965
966 static moduledata_t linux_elf_mod = {
967 "linuxelf",
968 linux_elf_modevent,
969 0
970 };
971
972 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Cache object: 7d624f6f246173d04cbd49d12efe9447
|