1 /*-
2 * Copyright (c) 1994-1996 Søren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/imgact.h>
36 #include <sys/imgact_aout.h>
37 #include <sys/imgact_elf.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/module.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/signalvar.h>
45 #include <sys/syscallsubr.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 #include <sys/vnode.h>
49
50 #include <vm/vm.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_param.h>
57
58 #include <machine/cpu.h>
59 #include <machine/md_var.h>
60 #include <machine/pcb.h>
61
62 #include <i386/linux/linux.h>
63 #include <i386/linux/linux_proto.h>
64 #include <compat/linux/linux_mib.h>
65 #include <compat/linux/linux_signal.h>
66 #include <compat/linux/linux_util.h>
67
68 MODULE_VERSION(linux, 1);
69 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
70 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
71 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
72
73 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
74
75 #if BYTE_ORDER == LITTLE_ENDIAN
76 #define SHELLMAGIC 0x2123 /* #! */
77 #else
78 #define SHELLMAGIC 0x2321
79 #endif
80
81 /*
82 * Allow the sendsig functions to use the ldebug() facility
83 * even though they are not syscalls themselves. Map them
84 * to syscall 0. This is slightly less bogus than using
85 * ldebug(sigreturn).
86 */
87 #define LINUX_SYS_linux_rt_sendsig 0
88 #define LINUX_SYS_linux_sendsig 0
89
90 #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr)))
91 #define __LINUX_NPXCW__ 0x37f
92
93 extern char linux_sigcode[];
94 extern int linux_szsigcode;
95
96 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
97
98 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
100
101 static int linux_fixup(register_t **stack_base,
102 struct image_params *iparams);
103 static int elf_linux_fixup(register_t **stack_base,
104 struct image_params *iparams);
105 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
106 caddr_t *params);
107 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
108 u_long code);
109 static void exec_linux_setregs(struct thread *td, u_long entry,
110 u_long stack, u_long ps_strings);
111
112 /*
113 * Linux syscalls return negative errno's, we do positive and map them
114 * Reference:
115 * FreeBSD: src/sys/sys/errno.h
116 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
117 * linux-2.6.17.8/include/asm-generic/errno.h
118 */
119 static int bsd_to_linux_errno[ELAST + 1] = {
120 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
121 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
122 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
123 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
124 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
125 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
126 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
127 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
128 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
129 -72, -67, -71
130 };
131
132 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
133 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
134 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
135 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
136 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
137 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
138 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
139 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
140 0, LINUX_SIGUSR1, LINUX_SIGUSR2
141 };
142
143 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
144 SIGHUP, SIGINT, SIGQUIT, SIGILL,
145 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
146 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
147 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
148 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
149 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
150 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
151 SIGIO, SIGURG, SIGSYS
152 };
153
154 #define LINUX_T_UNKNOWN 255
155 static int _bsd_to_linux_trapcode[] = {
156 LINUX_T_UNKNOWN, /* 0 */
157 6, /* 1 T_PRIVINFLT */
158 LINUX_T_UNKNOWN, /* 2 */
159 3, /* 3 T_BPTFLT */
160 LINUX_T_UNKNOWN, /* 4 */
161 LINUX_T_UNKNOWN, /* 5 */
162 16, /* 6 T_ARITHTRAP */
163 254, /* 7 T_ASTFLT */
164 LINUX_T_UNKNOWN, /* 8 */
165 13, /* 9 T_PROTFLT */
166 1, /* 10 T_TRCTRAP */
167 LINUX_T_UNKNOWN, /* 11 */
168 14, /* 12 T_PAGEFLT */
169 LINUX_T_UNKNOWN, /* 13 */
170 17, /* 14 T_ALIGNFLT */
171 LINUX_T_UNKNOWN, /* 15 */
172 LINUX_T_UNKNOWN, /* 16 */
173 LINUX_T_UNKNOWN, /* 17 */
174 0, /* 18 T_DIVIDE */
175 2, /* 19 T_NMI */
176 4, /* 20 T_OFLOW */
177 5, /* 21 T_BOUND */
178 7, /* 22 T_DNA */
179 8, /* 23 T_DOUBLEFLT */
180 9, /* 24 T_FPOPFLT */
181 10, /* 25 T_TSSFLT */
182 11, /* 26 T_SEGNPFLT */
183 12, /* 27 T_STKFLT */
184 18, /* 28 T_MCHK */
185 19, /* 29 T_XMMFLT */
186 15 /* 30 T_RESERVED */
187 };
188 #define bsd_to_linux_trapcode(code) \
189 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
190 _bsd_to_linux_trapcode[(code)]: \
191 LINUX_T_UNKNOWN)
192
193 /*
194 * If FreeBSD & Linux have a difference of opinion about what a trap
195 * means, deal with it here.
196 *
197 * MPSAFE
198 */
199 static int
200 translate_traps(int signal, int trap_code)
201 {
202 if (signal != SIGBUS)
203 return signal;
204 switch (trap_code) {
205 case T_PROTFLT:
206 case T_TSSFLT:
207 case T_DOUBLEFLT:
208 case T_PAGEFLT:
209 return SIGSEGV;
210 default:
211 return signal;
212 }
213 }
214
215 static int
216 linux_fixup(register_t **stack_base, struct image_params *imgp)
217 {
218 register_t *argv, *envp;
219
220 argv = *stack_base;
221 envp = *stack_base + (imgp->args->argc + 1);
222 (*stack_base)--;
223 **stack_base = (intptr_t)(void *)envp;
224 (*stack_base)--;
225 **stack_base = (intptr_t)(void *)argv;
226 (*stack_base)--;
227 **stack_base = imgp->args->argc;
228 return 0;
229 }
230
231 static int
232 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
233 {
234 Elf32_Auxargs *args;
235 register_t *pos;
236
237 KASSERT(curthread->td_proc == imgp->proc &&
238 (curthread->td_proc->p_flag & P_SA) == 0,
239 ("unsafe elf_linux_fixup(), should be curproc"));
240 args = (Elf32_Auxargs *)imgp->auxargs;
241 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
242
243 if (args->trace)
244 AUXARGS_ENTRY(pos, AT_DEBUG, 1);
245 if (args->execfd != -1)
246 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
247 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
248 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
249 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
250 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
251 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
252 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
253 AUXARGS_ENTRY(pos, AT_BASE, args->base);
254 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
255 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
256 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
257 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
258 AUXARGS_ENTRY(pos, AT_NULL, 0);
259
260 free(imgp->auxargs, M_TEMP);
261 imgp->auxargs = NULL;
262
263 (*stack_base)--;
264 **stack_base = (register_t)imgp->args->argc;
265 return 0;
266 }
267
268 extern int _ucodesel, _udatasel;
269 extern unsigned long linux_sznonrtsigcode;
270
271 static void
272 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
273 {
274 struct thread *td = curthread;
275 struct proc *p = td->td_proc;
276 struct sigacts *psp;
277 struct trapframe *regs;
278 struct l_rt_sigframe *fp, frame;
279 int oonstack;
280
281 PROC_LOCK_ASSERT(p, MA_OWNED);
282 psp = p->p_sigacts;
283 mtx_assert(&psp->ps_mtx, MA_OWNED);
284 regs = td->td_frame;
285 oonstack = sigonstack(regs->tf_esp);
286
287 #ifdef DEBUG
288 if (ldebug(rt_sendsig))
289 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
290 catcher, sig, (void*)mask, code);
291 #endif
292 /*
293 * Allocate space for the signal handler context.
294 */
295 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
296 SIGISMEMBER(psp->ps_sigonstack, sig)) {
297 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
298 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
299 } else
300 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
301 mtx_unlock(&psp->ps_mtx);
302
303 /*
304 * Build the argument list for the signal handler.
305 */
306 if (p->p_sysent->sv_sigtbl)
307 if (sig <= p->p_sysent->sv_sigsize)
308 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
309
310 bzero(&frame, sizeof(frame));
311
312 frame.sf_handler = catcher;
313 frame.sf_sig = sig;
314 frame.sf_siginfo = &fp->sf_si;
315 frame.sf_ucontext = &fp->sf_sc;
316
317 /* Fill in POSIX parts */
318 frame.sf_si.lsi_signo = sig;
319 frame.sf_si.lsi_code = code;
320 frame.sf_si.lsi_addr = (void *)td->td_md.md_fault_addr;
321
322 /*
323 * Build the signal context to be used by sigreturn.
324 */
325 frame.sf_sc.uc_flags = 0; /* XXX ??? */
326 frame.sf_sc.uc_link = NULL; /* XXX ??? */
327
328 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
329 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
330 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
331 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
332 PROC_UNLOCK(p);
333
334 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
335
336 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
337 frame.sf_sc.uc_mcontext.sc_gs = rgs();
338 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
339 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
340 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
341 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
342 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
343 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
344 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
345 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
346 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
347 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
348 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
349 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
350 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
351 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
352 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
353 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
354 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
355
356 #ifdef DEBUG
357 if (ldebug(rt_sendsig))
358 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
359 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
360 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
361 #endif
362
363 if (copyout(&frame, fp, sizeof(frame)) != 0) {
364 /*
365 * Process has trashed its stack; give it an illegal
366 * instruction to halt it in its tracks.
367 */
368 #ifdef DEBUG
369 if (ldebug(rt_sendsig))
370 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
371 fp, oonstack);
372 #endif
373 PROC_LOCK(p);
374 sigexit(td, SIGILL);
375 }
376
377 /*
378 * Build context to run handler in.
379 */
380 regs->tf_esp = (int)fp;
381 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
382 linux_sznonrtsigcode;
383 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
384 regs->tf_cs = _ucodesel;
385 regs->tf_ds = _udatasel;
386 regs->tf_es = _udatasel;
387 regs->tf_fs = _udatasel;
388 regs->tf_ss = _udatasel;
389 PROC_LOCK(p);
390 mtx_lock(&psp->ps_mtx);
391 }
392
393
394 /*
395 * Send an interrupt to process.
396 *
397 * Stack is set up to allow sigcode stored
398 * in u. to call routine, followed by kcall
399 * to sigreturn routine below. After sigreturn
400 * resets the signal mask, the stack, and the
401 * frame pointer, it returns to the user
402 * specified pc, psl.
403 */
404 static void
405 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
406 {
407 struct thread *td = curthread;
408 struct proc *p = td->td_proc;
409 struct sigacts *psp;
410 struct trapframe *regs;
411 struct l_sigframe *fp, frame;
412 l_sigset_t lmask;
413 int oonstack, i;
414
415 PROC_LOCK_ASSERT(p, MA_OWNED);
416 psp = p->p_sigacts;
417 mtx_assert(&psp->ps_mtx, MA_OWNED);
418 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
419 /* Signal handler installed with SA_SIGINFO. */
420 linux_rt_sendsig(catcher, sig, mask, code);
421 return;
422 }
423
424 regs = td->td_frame;
425 oonstack = sigonstack(regs->tf_esp);
426
427 #ifdef DEBUG
428 if (ldebug(sendsig))
429 printf(ARGS(sendsig, "%p, %d, %p, %lu"),
430 catcher, sig, (void*)mask, code);
431 #endif
432
433 /*
434 * Allocate space for the signal handler context.
435 */
436 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
437 SIGISMEMBER(psp->ps_sigonstack, sig)) {
438 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
439 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
440 } else
441 fp = (struct l_sigframe *)regs->tf_esp - 1;
442 mtx_unlock(&psp->ps_mtx);
443 PROC_UNLOCK(p);
444
445 /*
446 * Build the argument list for the signal handler.
447 */
448 if (p->p_sysent->sv_sigtbl)
449 if (sig <= p->p_sysent->sv_sigsize)
450 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
451
452 bzero(&frame, sizeof(frame));
453
454 frame.sf_handler = catcher;
455 frame.sf_sig = sig;
456
457 bsd_to_linux_sigset(mask, &lmask);
458
459 /*
460 * Build the signal context to be used by sigreturn.
461 */
462 frame.sf_sc.sc_mask = lmask.__bits[0];
463 frame.sf_sc.sc_gs = rgs();
464 frame.sf_sc.sc_fs = regs->tf_fs;
465 frame.sf_sc.sc_es = regs->tf_es;
466 frame.sf_sc.sc_ds = regs->tf_ds;
467 frame.sf_sc.sc_edi = regs->tf_edi;
468 frame.sf_sc.sc_esi = regs->tf_esi;
469 frame.sf_sc.sc_ebp = regs->tf_ebp;
470 frame.sf_sc.sc_ebx = regs->tf_ebx;
471 frame.sf_sc.sc_edx = regs->tf_edx;
472 frame.sf_sc.sc_ecx = regs->tf_ecx;
473 frame.sf_sc.sc_eax = regs->tf_eax;
474 frame.sf_sc.sc_eip = regs->tf_eip;
475 frame.sf_sc.sc_cs = regs->tf_cs;
476 frame.sf_sc.sc_eflags = regs->tf_eflags;
477 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
478 frame.sf_sc.sc_ss = regs->tf_ss;
479 frame.sf_sc.sc_err = regs->tf_err;
480 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
481
482 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
483 frame.sf_extramask[i] = lmask.__bits[i+1];
484
485 if (copyout(&frame, fp, sizeof(frame)) != 0) {
486 /*
487 * Process has trashed its stack; give it an illegal
488 * instruction to halt it in its tracks.
489 */
490 PROC_LOCK(p);
491 sigexit(td, SIGILL);
492 }
493
494 /*
495 * Build context to run handler in.
496 */
497 regs->tf_esp = (int)fp;
498 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
499 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
500 regs->tf_cs = _ucodesel;
501 regs->tf_ds = _udatasel;
502 regs->tf_es = _udatasel;
503 regs->tf_fs = _udatasel;
504 regs->tf_ss = _udatasel;
505 PROC_LOCK(p);
506 mtx_lock(&psp->ps_mtx);
507 }
508
509 /*
510 * System call to cleanup state after a signal
511 * has been taken. Reset signal mask and
512 * stack state from context left by sendsig (above).
513 * Return to previous pc and psl as specified by
514 * context left by sendsig. Check carefully to
515 * make sure that the user has not modified the
516 * psl to gain improper privileges or to cause
517 * a machine fault.
518 */
519 int
520 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
521 {
522 struct proc *p = td->td_proc;
523 struct l_sigframe frame;
524 struct trapframe *regs;
525 l_sigset_t lmask;
526 int eflags, i;
527
528 regs = td->td_frame;
529
530 #ifdef DEBUG
531 if (ldebug(sigreturn))
532 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
533 #endif
534 /*
535 * The trampoline code hands us the sigframe.
536 * It is unsafe to keep track of it ourselves, in the event that a
537 * program jumps out of a signal handler.
538 */
539 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
540 return (EFAULT);
541
542 /*
543 * Check for security violations.
544 */
545 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
546 eflags = frame.sf_sc.sc_eflags;
547 /*
548 * XXX do allow users to change the privileged flag PSL_RF. The
549 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
550 * sometimes set it there too. tf_eflags is kept in the signal
551 * context during signal handling and there is no other place
552 * to remember it, so the PSL_RF bit may be corrupted by the
553 * signal handler without us knowing. Corruption of the PSL_RF
554 * bit at worst causes one more or one less debugger trap, so
555 * allowing it is fairly harmless.
556 */
557 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
558 return(EINVAL);
559
560 /*
561 * Don't allow users to load a valid privileged %cs. Let the
562 * hardware check for invalid selectors, excess privilege in
563 * other selectors, invalid %eip's and invalid %esp's.
564 */
565 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
566 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
567 trapsignal(td, SIGBUS, T_PROTFLT);
568 return(EINVAL);
569 }
570
571 lmask.__bits[0] = frame.sf_sc.sc_mask;
572 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
573 lmask.__bits[i+1] = frame.sf_extramask[i];
574 PROC_LOCK(p);
575 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
576 SIG_CANTMASK(td->td_sigmask);
577 signotify(td);
578 PROC_UNLOCK(p);
579
580 /*
581 * Restore signal context.
582 */
583 /* %gs was restored by the trampoline. */
584 regs->tf_fs = frame.sf_sc.sc_fs;
585 regs->tf_es = frame.sf_sc.sc_es;
586 regs->tf_ds = frame.sf_sc.sc_ds;
587 regs->tf_edi = frame.sf_sc.sc_edi;
588 regs->tf_esi = frame.sf_sc.sc_esi;
589 regs->tf_ebp = frame.sf_sc.sc_ebp;
590 regs->tf_ebx = frame.sf_sc.sc_ebx;
591 regs->tf_edx = frame.sf_sc.sc_edx;
592 regs->tf_ecx = frame.sf_sc.sc_ecx;
593 regs->tf_eax = frame.sf_sc.sc_eax;
594 regs->tf_eip = frame.sf_sc.sc_eip;
595 regs->tf_cs = frame.sf_sc.sc_cs;
596 regs->tf_eflags = eflags;
597 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
598 regs->tf_ss = frame.sf_sc.sc_ss;
599
600 return (EJUSTRETURN);
601 }
602
603 /*
604 * System call to cleanup state after a signal
605 * has been taken. Reset signal mask and
606 * stack state from context left by rt_sendsig (above).
607 * Return to previous pc and psl as specified by
608 * context left by sendsig. Check carefully to
609 * make sure that the user has not modified the
610 * psl to gain improper privileges or to cause
611 * a machine fault.
612 */
613 int
614 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
615 {
616 struct proc *p = td->td_proc;
617 struct l_ucontext uc;
618 struct l_sigcontext *context;
619 l_stack_t *lss;
620 stack_t ss;
621 struct trapframe *regs;
622 int eflags;
623
624 regs = td->td_frame;
625
626 #ifdef DEBUG
627 if (ldebug(rt_sigreturn))
628 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
629 #endif
630 /*
631 * The trampoline code hands us the ucontext.
632 * It is unsafe to keep track of it ourselves, in the event that a
633 * program jumps out of a signal handler.
634 */
635 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
636 return (EFAULT);
637
638 context = &uc.uc_mcontext;
639
640 /*
641 * Check for security violations.
642 */
643 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
644 eflags = context->sc_eflags;
645 /*
646 * XXX do allow users to change the privileged flag PSL_RF. The
647 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
648 * sometimes set it there too. tf_eflags is kept in the signal
649 * context during signal handling and there is no other place
650 * to remember it, so the PSL_RF bit may be corrupted by the
651 * signal handler without us knowing. Corruption of the PSL_RF
652 * bit at worst causes one more or one less debugger trap, so
653 * allowing it is fairly harmless.
654 */
655 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
656 return(EINVAL);
657
658 /*
659 * Don't allow users to load a valid privileged %cs. Let the
660 * hardware check for invalid selectors, excess privilege in
661 * other selectors, invalid %eip's and invalid %esp's.
662 */
663 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
664 if (!CS_SECURE(context->sc_cs)) {
665 trapsignal(td, SIGBUS, T_PROTFLT);
666 return(EINVAL);
667 }
668
669 PROC_LOCK(p);
670 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
671 SIG_CANTMASK(td->td_sigmask);
672 signotify(td);
673 PROC_UNLOCK(p);
674
675 /*
676 * Restore signal context
677 */
678 /* %gs was restored by the trampoline. */
679 regs->tf_fs = context->sc_fs;
680 regs->tf_es = context->sc_es;
681 regs->tf_ds = context->sc_ds;
682 regs->tf_edi = context->sc_edi;
683 regs->tf_esi = context->sc_esi;
684 regs->tf_ebp = context->sc_ebp;
685 regs->tf_ebx = context->sc_ebx;
686 regs->tf_edx = context->sc_edx;
687 regs->tf_ecx = context->sc_ecx;
688 regs->tf_eax = context->sc_eax;
689 regs->tf_eip = context->sc_eip;
690 regs->tf_cs = context->sc_cs;
691 regs->tf_eflags = eflags;
692 regs->tf_esp = context->sc_esp_at_signal;
693 regs->tf_ss = context->sc_ss;
694
695 /*
696 * call sigaltstack & ignore results..
697 */
698 lss = &uc.uc_stack;
699 ss.ss_sp = lss->ss_sp;
700 ss.ss_size = lss->ss_size;
701 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
702
703 #ifdef DEBUG
704 if (ldebug(rt_sigreturn))
705 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
706 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
707 #endif
708 (void)kern_sigaltstack(td, &ss, NULL);
709
710 return (EJUSTRETURN);
711 }
712
713 /*
714 * MPSAFE
715 */
716 static void
717 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
718 {
719 args[0] = tf->tf_ebx;
720 args[1] = tf->tf_ecx;
721 args[2] = tf->tf_edx;
722 args[3] = tf->tf_esi;
723 args[4] = tf->tf_edi;
724 args[5] = tf->tf_ebp; /* Unconfirmed */
725 *params = NULL; /* no copyin */
726 }
727
728 /*
729 * If a linux binary is exec'ing something, try this image activator
730 * first. We override standard shell script execution in order to
731 * be able to modify the interpreter path. We only do this if a linux
732 * binary is doing the exec, so we do not create an EXEC module for it.
733 */
734 static int exec_linux_imgact_try(struct image_params *iparams);
735
736 static int
737 exec_linux_imgact_try(struct image_params *imgp)
738 {
739 const char *head = (const char *)imgp->image_header;
740 char *rpath;
741 int error = -1, len;
742
743 /*
744 * The interpreter for shell scripts run from a linux binary needs
745 * to be located in /compat/linux if possible in order to recursively
746 * maintain linux path emulation.
747 */
748 if (((const short *)head)[0] == SHELLMAGIC) {
749 /*
750 * Run our normal shell image activator. If it succeeds attempt
751 * to use the alternate path for the interpreter. If an alternate
752 * path is found, use our stringspace to store it.
753 */
754 if ((error = exec_shell_imgact(imgp)) == 0) {
755 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
756 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
757 if (rpath != NULL) {
758 len = strlen(rpath) + 1;
759
760 if (len <= MAXSHELLCMDLEN) {
761 memcpy(imgp->interpreter_name, rpath, len);
762 }
763 free(rpath, M_TEMP);
764 }
765 }
766 }
767 return(error);
768 }
769
770 /*
771 * exec_setregs may initialize some registers differently than Linux
772 * does, thus potentially confusing Linux binaries. If necessary, we
773 * override the exec_setregs default(s) here.
774 */
775 static void
776 exec_linux_setregs(struct thread *td, u_long entry,
777 u_long stack, u_long ps_strings)
778 {
779 static const u_short control = __LINUX_NPXCW__;
780 struct pcb *pcb = td->td_pcb;
781
782 exec_setregs(td, entry, stack, ps_strings);
783
784 /* Linux sets %gs to 0, we default to _udatasel */
785 pcb->pcb_gs = 0; load_gs(0);
786
787 /* Linux sets the i387 to extended precision. */
788 fldcw(&control);
789 }
790
791 struct sysentvec linux_sysvec = {
792 LINUX_SYS_MAXSYSCALL,
793 linux_sysent,
794 0,
795 LINUX_SIGTBLSZ,
796 bsd_to_linux_signal,
797 ELAST + 1,
798 bsd_to_linux_errno,
799 translate_traps,
800 linux_fixup,
801 linux_sendsig,
802 linux_sigcode,
803 &linux_szsigcode,
804 linux_prepsyscall,
805 "Linux a.out",
806 NULL,
807 exec_linux_imgact_try,
808 LINUX_MINSIGSTKSZ,
809 PAGE_SIZE,
810 VM_MIN_ADDRESS,
811 VM_MAXUSER_ADDRESS,
812 USRSTACK,
813 PS_STRINGS,
814 VM_PROT_ALL,
815 exec_copyout_strings,
816 exec_linux_setregs,
817 NULL
818 };
819
820 struct sysentvec elf_linux_sysvec = {
821 LINUX_SYS_MAXSYSCALL,
822 linux_sysent,
823 0,
824 LINUX_SIGTBLSZ,
825 bsd_to_linux_signal,
826 ELAST + 1,
827 bsd_to_linux_errno,
828 translate_traps,
829 elf_linux_fixup,
830 linux_sendsig,
831 linux_sigcode,
832 &linux_szsigcode,
833 linux_prepsyscall,
834 "Linux ELF",
835 elf32_coredump,
836 exec_linux_imgact_try,
837 LINUX_MINSIGSTKSZ,
838 PAGE_SIZE,
839 VM_MIN_ADDRESS,
840 VM_MAXUSER_ADDRESS,
841 USRSTACK,
842 PS_STRINGS,
843 VM_PROT_ALL,
844 exec_copyout_strings,
845 exec_linux_setregs,
846 NULL
847 };
848
849 static Elf32_Brandinfo linux_brand = {
850 ELFOSABI_LINUX,
851 EM_386,
852 "Linux",
853 "/compat/linux",
854 "/lib/ld-linux.so.1",
855 &elf_linux_sysvec,
856 NULL,
857 };
858
859 static Elf32_Brandinfo linux_glibc2brand = {
860 ELFOSABI_LINUX,
861 EM_386,
862 "Linux",
863 "/compat/linux",
864 "/lib/ld-linux.so.2",
865 &elf_linux_sysvec,
866 NULL,
867 };
868
869 Elf32_Brandinfo *linux_brandlist[] = {
870 &linux_brand,
871 &linux_glibc2brand,
872 NULL
873 };
874
875 static int
876 linux_elf_modevent(module_t mod, int type, void *data)
877 {
878 Elf32_Brandinfo **brandinfo;
879 int error;
880 struct linux_ioctl_handler **lihp;
881 struct linux_device_handler **ldhp;
882
883 error = 0;
884
885 switch(type) {
886 case MOD_LOAD:
887 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
888 ++brandinfo)
889 if (elf32_insert_brand_entry(*brandinfo) < 0)
890 error = EINVAL;
891 if (error == 0) {
892 SET_FOREACH(lihp, linux_ioctl_handler_set)
893 linux_ioctl_register_handler(*lihp);
894 SET_FOREACH(ldhp, linux_device_handler_set)
895 linux_device_register_handler(*ldhp);
896 if (bootverbose)
897 printf("Linux ELF exec handler installed\n");
898 } else
899 printf("cannot insert Linux ELF brand handler\n");
900 break;
901 case MOD_UNLOAD:
902 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
903 ++brandinfo)
904 if (elf32_brand_inuse(*brandinfo))
905 error = EBUSY;
906 if (error == 0) {
907 for (brandinfo = &linux_brandlist[0];
908 *brandinfo != NULL; ++brandinfo)
909 if (elf32_remove_brand_entry(*brandinfo) < 0)
910 error = EINVAL;
911 }
912 if (error == 0) {
913 SET_FOREACH(lihp, linux_ioctl_handler_set)
914 linux_ioctl_unregister_handler(*lihp);
915 SET_FOREACH(ldhp, linux_device_handler_set)
916 linux_device_unregister_handler(*ldhp);
917 if (bootverbose)
918 printf("Linux ELF exec handler removed\n");
919 } else
920 printf("Could not deinstall ELF interpreter entry\n");
921 break;
922 default:
923 return EOPNOTSUPP;
924 }
925 return error;
926 }
927
928 static moduledata_t linux_elf_mod = {
929 "linuxelf",
930 linux_elf_modevent,
931 0
932 };
933
934 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Cache object: 91a2804f3faa0067ae248dadaa6a85bc
|