1 /*-
2 * Copyright (c) 1994-1996 Søren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD: releng/6.2/sys/i386/linux/linux_sysvec.c 159642 2006-06-15 15:52:05Z ambrisko $");
31
32 /* XXX we use functions that might not exist. */
33 #include "opt_compat.h"
34
35 #ifndef COMPAT_43
36 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/exec.h>
42 #include <sys/imgact.h>
43 #include <sys/imgact_aout.h>
44 #include <sys/imgact_elf.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/module.h>
49 #include <sys/mutex.h>
50 #include <sys/proc.h>
51 #include <sys/signalvar.h>
52 #include <sys/syscallsubr.h>
53 #include <sys/sysent.h>
54 #include <sys/sysproto.h>
55 #include <sys/vnode.h>
56
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_extern.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_object.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_param.h>
64
65 #include <machine/cpu.h>
66 #include <machine/md_var.h>
67 #include <machine/pcb.h>
68
69 #include <i386/linux/linux.h>
70 #include <i386/linux/linux_proto.h>
71 #include <compat/linux/linux_mib.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
74
75 MODULE_VERSION(linux, 1);
76 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
77 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
78 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
79
80 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
81
82 #if BYTE_ORDER == LITTLE_ENDIAN
83 #define SHELLMAGIC 0x2123 /* #! */
84 #else
85 #define SHELLMAGIC 0x2321
86 #endif
87
88 /*
89 * Allow the sendsig functions to use the ldebug() facility
90 * even though they are not syscalls themselves. Map them
91 * to syscall 0. This is slightly less bogus than using
92 * ldebug(sigreturn).
93 */
94 #define LINUX_SYS_linux_rt_sendsig 0
95 #define LINUX_SYS_linux_sendsig 0
96
97 #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr)))
98 #define __LINUX_NPXCW__ 0x37f
99
100 extern char linux_sigcode[];
101 extern int linux_szsigcode;
102
103 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
104
105 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
106 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
107
108 static int linux_fixup(register_t **stack_base,
109 struct image_params *iparams);
110 static int elf_linux_fixup(register_t **stack_base,
111 struct image_params *iparams);
112 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
113 caddr_t *params);
114 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
115 u_long code);
116 static void exec_linux_setregs(struct thread *td, u_long entry,
117 u_long stack, u_long ps_strings);
118
119 /*
120 * Linux syscalls return negative errno's, we do positive and map them
121 */
122 static int bsd_to_linux_errno[ELAST + 1] = {
123 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
124 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
125 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
126 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
127 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
128 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
129 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
130 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
131 -6, -6, -43, -42, -75, -6, -84
132 };
133
134 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
135 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
136 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
137 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
138 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
139 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
140 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
141 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
142 0, LINUX_SIGUSR1, LINUX_SIGUSR2
143 };
144
145 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
146 SIGHUP, SIGINT, SIGQUIT, SIGILL,
147 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
148 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
149 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
150 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
151 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
152 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
153 SIGIO, SIGURG, SIGSYS
154 };
155
156 #define LINUX_T_UNKNOWN 255
157 static int _bsd_to_linux_trapcode[] = {
158 LINUX_T_UNKNOWN, /* 0 */
159 6, /* 1 T_PRIVINFLT */
160 LINUX_T_UNKNOWN, /* 2 */
161 3, /* 3 T_BPTFLT */
162 LINUX_T_UNKNOWN, /* 4 */
163 LINUX_T_UNKNOWN, /* 5 */
164 16, /* 6 T_ARITHTRAP */
165 254, /* 7 T_ASTFLT */
166 LINUX_T_UNKNOWN, /* 8 */
167 13, /* 9 T_PROTFLT */
168 1, /* 10 T_TRCTRAP */
169 LINUX_T_UNKNOWN, /* 11 */
170 14, /* 12 T_PAGEFLT */
171 LINUX_T_UNKNOWN, /* 13 */
172 17, /* 14 T_ALIGNFLT */
173 LINUX_T_UNKNOWN, /* 15 */
174 LINUX_T_UNKNOWN, /* 16 */
175 LINUX_T_UNKNOWN, /* 17 */
176 0, /* 18 T_DIVIDE */
177 2, /* 19 T_NMI */
178 4, /* 20 T_OFLOW */
179 5, /* 21 T_BOUND */
180 7, /* 22 T_DNA */
181 8, /* 23 T_DOUBLEFLT */
182 9, /* 24 T_FPOPFLT */
183 10, /* 25 T_TSSFLT */
184 11, /* 26 T_SEGNPFLT */
185 12, /* 27 T_STKFLT */
186 18, /* 28 T_MCHK */
187 19, /* 29 T_XMMFLT */
188 15 /* 30 T_RESERVED */
189 };
190 #define bsd_to_linux_trapcode(code) \
191 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
192 _bsd_to_linux_trapcode[(code)]: \
193 LINUX_T_UNKNOWN)
194
195 /*
196 * If FreeBSD & Linux have a difference of opinion about what a trap
197 * means, deal with it here.
198 *
199 * MPSAFE
200 */
201 static int
202 translate_traps(int signal, int trap_code)
203 {
204 if (signal != SIGBUS)
205 return signal;
206 switch (trap_code) {
207 case T_PROTFLT:
208 case T_TSSFLT:
209 case T_DOUBLEFLT:
210 case T_PAGEFLT:
211 return SIGSEGV;
212 default:
213 return signal;
214 }
215 }
216
217 static int
218 linux_fixup(register_t **stack_base, struct image_params *imgp)
219 {
220 register_t *argv, *envp;
221
222 argv = *stack_base;
223 envp = *stack_base + (imgp->args->argc + 1);
224 (*stack_base)--;
225 **stack_base = (intptr_t)(void *)envp;
226 (*stack_base)--;
227 **stack_base = (intptr_t)(void *)argv;
228 (*stack_base)--;
229 **stack_base = imgp->args->argc;
230 return 0;
231 }
232
233 static int
234 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
235 {
236 Elf32_Auxargs *args;
237 register_t *pos;
238
239 KASSERT(curthread->td_proc == imgp->proc &&
240 (curthread->td_proc->p_flag & P_SA) == 0,
241 ("unsafe elf_linux_fixup(), should be curproc"));
242 args = (Elf32_Auxargs *)imgp->auxargs;
243 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
244
245 if (args->trace)
246 AUXARGS_ENTRY(pos, AT_DEBUG, 1);
247 if (args->execfd != -1)
248 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
249 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
250 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
251 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
252 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
253 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
254 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
255 AUXARGS_ENTRY(pos, AT_BASE, args->base);
256 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
257 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
258 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
259 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
260 AUXARGS_ENTRY(pos, AT_NULL, 0);
261
262 free(imgp->auxargs, M_TEMP);
263 imgp->auxargs = NULL;
264
265 (*stack_base)--;
266 **stack_base = (register_t)imgp->args->argc;
267 return 0;
268 }
269
270 extern int _ucodesel, _udatasel;
271 extern unsigned long linux_sznonrtsigcode;
272
273 static void
274 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
275 {
276 struct thread *td = curthread;
277 struct proc *p = td->td_proc;
278 struct sigacts *psp;
279 struct trapframe *regs;
280 struct l_rt_sigframe *fp, frame;
281 int oonstack;
282
283 PROC_LOCK_ASSERT(p, MA_OWNED);
284 psp = p->p_sigacts;
285 mtx_assert(&psp->ps_mtx, MA_OWNED);
286 regs = td->td_frame;
287 oonstack = sigonstack(regs->tf_esp);
288
289 #ifdef DEBUG
290 if (ldebug(rt_sendsig))
291 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
292 catcher, sig, (void*)mask, code);
293 #endif
294 /*
295 * Allocate space for the signal handler context.
296 */
297 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
298 SIGISMEMBER(psp->ps_sigonstack, sig)) {
299 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
300 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
301 } else
302 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
303 mtx_unlock(&psp->ps_mtx);
304
305 /*
306 * Build the argument list for the signal handler.
307 */
308 if (p->p_sysent->sv_sigtbl)
309 if (sig <= p->p_sysent->sv_sigsize)
310 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
311
312 bzero(&frame, sizeof(frame));
313
314 frame.sf_handler = catcher;
315 frame.sf_sig = sig;
316 frame.sf_siginfo = &fp->sf_si;
317 frame.sf_ucontext = &fp->sf_sc;
318
319 /* Fill in POSIX parts */
320 frame.sf_si.lsi_signo = sig;
321 frame.sf_si.lsi_code = code;
322 frame.sf_si.lsi_addr = (void *)regs->tf_err;
323
324 /*
325 * Build the signal context to be used by sigreturn.
326 */
327 frame.sf_sc.uc_flags = 0; /* XXX ??? */
328 frame.sf_sc.uc_link = NULL; /* XXX ??? */
329
330 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
331 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
332 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
333 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
334 PROC_UNLOCK(p);
335
336 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
337
338 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
339 frame.sf_sc.uc_mcontext.sc_gs = rgs();
340 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
341 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
342 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
343 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
344 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
345 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
346 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
347 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
348 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
349 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
350 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
351 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
352 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
353 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
354 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
355 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
356 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
357
358 #ifdef DEBUG
359 if (ldebug(rt_sendsig))
360 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
361 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
362 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
363 #endif
364
365 if (copyout(&frame, fp, sizeof(frame)) != 0) {
366 /*
367 * Process has trashed its stack; give it an illegal
368 * instruction to halt it in its tracks.
369 */
370 #ifdef DEBUG
371 if (ldebug(rt_sendsig))
372 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
373 fp, oonstack);
374 #endif
375 PROC_LOCK(p);
376 sigexit(td, SIGILL);
377 }
378
379 /*
380 * Build context to run handler in.
381 */
382 regs->tf_esp = (int)fp;
383 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
384 linux_sznonrtsigcode;
385 regs->tf_eflags &= ~(PSL_T | PSL_VM);
386 regs->tf_cs = _ucodesel;
387 regs->tf_ds = _udatasel;
388 regs->tf_es = _udatasel;
389 regs->tf_fs = _udatasel;
390 regs->tf_ss = _udatasel;
391 PROC_LOCK(p);
392 mtx_lock(&psp->ps_mtx);
393 }
394
395
396 /*
397 * Send an interrupt to process.
398 *
399 * Stack is set up to allow sigcode stored
400 * in u. to call routine, followed by kcall
401 * to sigreturn routine below. After sigreturn
402 * resets the signal mask, the stack, and the
403 * frame pointer, it returns to the user
404 * specified pc, psl.
405 */
406 static void
407 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
408 {
409 struct thread *td = curthread;
410 struct proc *p = td->td_proc;
411 struct sigacts *psp;
412 struct trapframe *regs;
413 struct l_sigframe *fp, frame;
414 l_sigset_t lmask;
415 int oonstack, i;
416
417 PROC_LOCK_ASSERT(p, MA_OWNED);
418 psp = p->p_sigacts;
419 mtx_assert(&psp->ps_mtx, MA_OWNED);
420 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
421 /* Signal handler installed with SA_SIGINFO. */
422 linux_rt_sendsig(catcher, sig, mask, code);
423 return;
424 }
425
426 regs = td->td_frame;
427 oonstack = sigonstack(regs->tf_esp);
428
429 #ifdef DEBUG
430 if (ldebug(sendsig))
431 printf(ARGS(sendsig, "%p, %d, %p, %lu"),
432 catcher, sig, (void*)mask, code);
433 #endif
434
435 /*
436 * Allocate space for the signal handler context.
437 */
438 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
439 SIGISMEMBER(psp->ps_sigonstack, sig)) {
440 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
441 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
442 } else
443 fp = (struct l_sigframe *)regs->tf_esp - 1;
444 mtx_unlock(&psp->ps_mtx);
445 PROC_UNLOCK(p);
446
447 /*
448 * Build the argument list for the signal handler.
449 */
450 if (p->p_sysent->sv_sigtbl)
451 if (sig <= p->p_sysent->sv_sigsize)
452 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
453
454 bzero(&frame, sizeof(frame));
455
456 frame.sf_handler = catcher;
457 frame.sf_sig = sig;
458
459 bsd_to_linux_sigset(mask, &lmask);
460
461 /*
462 * Build the signal context to be used by sigreturn.
463 */
464 frame.sf_sc.sc_mask = lmask.__bits[0];
465 frame.sf_sc.sc_gs = rgs();
466 frame.sf_sc.sc_fs = regs->tf_fs;
467 frame.sf_sc.sc_es = regs->tf_es;
468 frame.sf_sc.sc_ds = regs->tf_ds;
469 frame.sf_sc.sc_edi = regs->tf_edi;
470 frame.sf_sc.sc_esi = regs->tf_esi;
471 frame.sf_sc.sc_ebp = regs->tf_ebp;
472 frame.sf_sc.sc_ebx = regs->tf_ebx;
473 frame.sf_sc.sc_edx = regs->tf_edx;
474 frame.sf_sc.sc_ecx = regs->tf_ecx;
475 frame.sf_sc.sc_eax = regs->tf_eax;
476 frame.sf_sc.sc_eip = regs->tf_eip;
477 frame.sf_sc.sc_cs = regs->tf_cs;
478 frame.sf_sc.sc_eflags = regs->tf_eflags;
479 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
480 frame.sf_sc.sc_ss = regs->tf_ss;
481 frame.sf_sc.sc_err = regs->tf_err;
482 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
483
484 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
485 frame.sf_extramask[i] = lmask.__bits[i+1];
486
487 if (copyout(&frame, fp, sizeof(frame)) != 0) {
488 /*
489 * Process has trashed its stack; give it an illegal
490 * instruction to halt it in its tracks.
491 */
492 PROC_LOCK(p);
493 sigexit(td, SIGILL);
494 }
495
496 /*
497 * Build context to run handler in.
498 */
499 regs->tf_esp = (int)fp;
500 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
501 regs->tf_eflags &= ~(PSL_T | PSL_VM);
502 regs->tf_cs = _ucodesel;
503 regs->tf_ds = _udatasel;
504 regs->tf_es = _udatasel;
505 regs->tf_fs = _udatasel;
506 regs->tf_ss = _udatasel;
507 PROC_LOCK(p);
508 mtx_lock(&psp->ps_mtx);
509 }
510
511 /*
512 * System call to cleanup state after a signal
513 * has been taken. Reset signal mask and
514 * stack state from context left by sendsig (above).
515 * Return to previous pc and psl as specified by
516 * context left by sendsig. Check carefully to
517 * make sure that the user has not modified the
518 * psl to gain improper privileges or to cause
519 * a machine fault.
520 */
521 int
522 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
523 {
524 struct proc *p = td->td_proc;
525 struct l_sigframe frame;
526 struct trapframe *regs;
527 l_sigset_t lmask;
528 int eflags, i;
529
530 regs = td->td_frame;
531
532 #ifdef DEBUG
533 if (ldebug(sigreturn))
534 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
535 #endif
536 /*
537 * The trampoline code hands us the sigframe.
538 * It is unsafe to keep track of it ourselves, in the event that a
539 * program jumps out of a signal handler.
540 */
541 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
542 return (EFAULT);
543
544 /*
545 * Check for security violations.
546 */
547 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
548 eflags = frame.sf_sc.sc_eflags;
549 /*
550 * XXX do allow users to change the privileged flag PSL_RF. The
551 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
552 * sometimes set it there too. tf_eflags is kept in the signal
553 * context during signal handling and there is no other place
554 * to remember it, so the PSL_RF bit may be corrupted by the
555 * signal handler without us knowing. Corruption of the PSL_RF
556 * bit at worst causes one more or one less debugger trap, so
557 * allowing it is fairly harmless.
558 */
559 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
560 return(EINVAL);
561
562 /*
563 * Don't allow users to load a valid privileged %cs. Let the
564 * hardware check for invalid selectors, excess privilege in
565 * other selectors, invalid %eip's and invalid %esp's.
566 */
567 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
568 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
569 trapsignal(td, SIGBUS, T_PROTFLT);
570 return(EINVAL);
571 }
572
573 lmask.__bits[0] = frame.sf_sc.sc_mask;
574 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
575 lmask.__bits[i+1] = frame.sf_extramask[i];
576 PROC_LOCK(p);
577 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
578 SIG_CANTMASK(td->td_sigmask);
579 signotify(td);
580 PROC_UNLOCK(p);
581
582 /*
583 * Restore signal context.
584 */
585 /* %gs was restored by the trampoline. */
586 regs->tf_fs = frame.sf_sc.sc_fs;
587 regs->tf_es = frame.sf_sc.sc_es;
588 regs->tf_ds = frame.sf_sc.sc_ds;
589 regs->tf_edi = frame.sf_sc.sc_edi;
590 regs->tf_esi = frame.sf_sc.sc_esi;
591 regs->tf_ebp = frame.sf_sc.sc_ebp;
592 regs->tf_ebx = frame.sf_sc.sc_ebx;
593 regs->tf_edx = frame.sf_sc.sc_edx;
594 regs->tf_ecx = frame.sf_sc.sc_ecx;
595 regs->tf_eax = frame.sf_sc.sc_eax;
596 regs->tf_eip = frame.sf_sc.sc_eip;
597 regs->tf_cs = frame.sf_sc.sc_cs;
598 regs->tf_eflags = eflags;
599 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
600 regs->tf_ss = frame.sf_sc.sc_ss;
601
602 return (EJUSTRETURN);
603 }
604
605 /*
606 * System call to cleanup state after a signal
607 * has been taken. Reset signal mask and
608 * stack state from context left by rt_sendsig (above).
609 * Return to previous pc and psl as specified by
610 * context left by sendsig. Check carefully to
611 * make sure that the user has not modified the
612 * psl to gain improper privileges or to cause
613 * a machine fault.
614 */
615 int
616 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
617 {
618 struct proc *p = td->td_proc;
619 struct l_ucontext uc;
620 struct l_sigcontext *context;
621 l_stack_t *lss;
622 stack_t ss;
623 struct trapframe *regs;
624 int eflags;
625
626 regs = td->td_frame;
627
628 #ifdef DEBUG
629 if (ldebug(rt_sigreturn))
630 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
631 #endif
632 /*
633 * The trampoline code hands us the ucontext.
634 * It is unsafe to keep track of it ourselves, in the event that a
635 * program jumps out of a signal handler.
636 */
637 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
638 return (EFAULT);
639
640 context = &uc.uc_mcontext;
641
642 /*
643 * Check for security violations.
644 */
645 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
646 eflags = context->sc_eflags;
647 /*
648 * XXX do allow users to change the privileged flag PSL_RF. The
649 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
650 * sometimes set it there too. tf_eflags is kept in the signal
651 * context during signal handling and there is no other place
652 * to remember it, so the PSL_RF bit may be corrupted by the
653 * signal handler without us knowing. Corruption of the PSL_RF
654 * bit at worst causes one more or one less debugger trap, so
655 * allowing it is fairly harmless.
656 */
657 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
658 return(EINVAL);
659
660 /*
661 * Don't allow users to load a valid privileged %cs. Let the
662 * hardware check for invalid selectors, excess privilege in
663 * other selectors, invalid %eip's and invalid %esp's.
664 */
665 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
666 if (!CS_SECURE(context->sc_cs)) {
667 trapsignal(td, SIGBUS, T_PROTFLT);
668 return(EINVAL);
669 }
670
671 PROC_LOCK(p);
672 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
673 SIG_CANTMASK(td->td_sigmask);
674 signotify(td);
675 PROC_UNLOCK(p);
676
677 /*
678 * Restore signal context
679 */
680 /* %gs was restored by the trampoline. */
681 regs->tf_fs = context->sc_fs;
682 regs->tf_es = context->sc_es;
683 regs->tf_ds = context->sc_ds;
684 regs->tf_edi = context->sc_edi;
685 regs->tf_esi = context->sc_esi;
686 regs->tf_ebp = context->sc_ebp;
687 regs->tf_ebx = context->sc_ebx;
688 regs->tf_edx = context->sc_edx;
689 regs->tf_ecx = context->sc_ecx;
690 regs->tf_eax = context->sc_eax;
691 regs->tf_eip = context->sc_eip;
692 regs->tf_cs = context->sc_cs;
693 regs->tf_eflags = eflags;
694 regs->tf_esp = context->sc_esp_at_signal;
695 regs->tf_ss = context->sc_ss;
696
697 /*
698 * call sigaltstack & ignore results..
699 */
700 lss = &uc.uc_stack;
701 ss.ss_sp = lss->ss_sp;
702 ss.ss_size = lss->ss_size;
703 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
704
705 #ifdef DEBUG
706 if (ldebug(rt_sigreturn))
707 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
708 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
709 #endif
710 (void)kern_sigaltstack(td, &ss, NULL);
711
712 return (EJUSTRETURN);
713 }
714
715 /*
716 * MPSAFE
717 */
718 static void
719 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
720 {
721 args[0] = tf->tf_ebx;
722 args[1] = tf->tf_ecx;
723 args[2] = tf->tf_edx;
724 args[3] = tf->tf_esi;
725 args[4] = tf->tf_edi;
726 args[5] = tf->tf_ebp; /* Unconfirmed */
727 *params = NULL; /* no copyin */
728 }
729
730 /*
731 * If a linux binary is exec'ing something, try this image activator
732 * first. We override standard shell script execution in order to
733 * be able to modify the interpreter path. We only do this if a linux
734 * binary is doing the exec, so we do not create an EXEC module for it.
735 */
736 static int exec_linux_imgact_try(struct image_params *iparams);
737
738 static int
739 exec_linux_imgact_try(struct image_params *imgp)
740 {
741 const char *head = (const char *)imgp->image_header;
742 char *rpath;
743 int error = -1, len;
744
745 /*
746 * The interpreter for shell scripts run from a linux binary needs
747 * to be located in /compat/linux if possible in order to recursively
748 * maintain linux path emulation.
749 */
750 if (((const short *)head)[0] == SHELLMAGIC) {
751 /*
752 * Run our normal shell image activator. If it succeeds attempt
753 * to use the alternate path for the interpreter. If an alternate
754 * path is found, use our stringspace to store it.
755 */
756 if ((error = exec_shell_imgact(imgp)) == 0) {
757 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
758 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
759 if (rpath != NULL) {
760 len = strlen(rpath) + 1;
761
762 if (len <= MAXSHELLCMDLEN) {
763 memcpy(imgp->interpreter_name, rpath, len);
764 }
765 free(rpath, M_TEMP);
766 }
767 }
768 }
769 return(error);
770 }
771
772 /*
773 * exec_setregs may initialize some registers differently than Linux
774 * does, thus potentially confusing Linux binaries. If necessary, we
775 * override the exec_setregs default(s) here.
776 */
777 static void
778 exec_linux_setregs(struct thread *td, u_long entry,
779 u_long stack, u_long ps_strings)
780 {
781 static const u_short control = __LINUX_NPXCW__;
782 struct pcb *pcb = td->td_pcb;
783
784 exec_setregs(td, entry, stack, ps_strings);
785
786 /* Linux sets %gs to 0, we default to _udatasel */
787 pcb->pcb_gs = 0; load_gs(0);
788
789 /* Linux sets the i387 to extended precision. */
790 fldcw(&control);
791 }
792
793 struct sysentvec linux_sysvec = {
794 LINUX_SYS_MAXSYSCALL,
795 linux_sysent,
796 0xff,
797 LINUX_SIGTBLSZ,
798 bsd_to_linux_signal,
799 ELAST + 1,
800 bsd_to_linux_errno,
801 translate_traps,
802 linux_fixup,
803 linux_sendsig,
804 linux_sigcode,
805 &linux_szsigcode,
806 linux_prepsyscall,
807 "Linux a.out",
808 NULL,
809 exec_linux_imgact_try,
810 LINUX_MINSIGSTKSZ,
811 PAGE_SIZE,
812 VM_MIN_ADDRESS,
813 VM_MAXUSER_ADDRESS,
814 USRSTACK,
815 PS_STRINGS,
816 VM_PROT_ALL,
817 exec_copyout_strings,
818 exec_linux_setregs,
819 NULL
820 };
821
822 struct sysentvec elf_linux_sysvec = {
823 LINUX_SYS_MAXSYSCALL,
824 linux_sysent,
825 0xff,
826 LINUX_SIGTBLSZ,
827 bsd_to_linux_signal,
828 ELAST + 1,
829 bsd_to_linux_errno,
830 translate_traps,
831 elf_linux_fixup,
832 linux_sendsig,
833 linux_sigcode,
834 &linux_szsigcode,
835 linux_prepsyscall,
836 "Linux ELF",
837 elf32_coredump,
838 exec_linux_imgact_try,
839 LINUX_MINSIGSTKSZ,
840 PAGE_SIZE,
841 VM_MIN_ADDRESS,
842 VM_MAXUSER_ADDRESS,
843 USRSTACK,
844 PS_STRINGS,
845 VM_PROT_ALL,
846 exec_copyout_strings,
847 exec_linux_setregs,
848 NULL
849 };
850
851 static Elf32_Brandinfo linux_brand = {
852 ELFOSABI_LINUX,
853 EM_386,
854 "Linux",
855 "/compat/linux",
856 "/lib/ld-linux.so.1",
857 &elf_linux_sysvec,
858 NULL,
859 };
860
861 static Elf32_Brandinfo linux_glibc2brand = {
862 ELFOSABI_LINUX,
863 EM_386,
864 "Linux",
865 "/compat/linux",
866 "/lib/ld-linux.so.2",
867 &elf_linux_sysvec,
868 NULL,
869 };
870
871 Elf32_Brandinfo *linux_brandlist[] = {
872 &linux_brand,
873 &linux_glibc2brand,
874 NULL
875 };
876
877 static int
878 linux_elf_modevent(module_t mod, int type, void *data)
879 {
880 Elf32_Brandinfo **brandinfo;
881 int error;
882 struct linux_ioctl_handler **lihp;
883 struct linux_device_handler **ldhp;
884
885 error = 0;
886
887 switch(type) {
888 case MOD_LOAD:
889 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
890 ++brandinfo)
891 if (elf32_insert_brand_entry(*brandinfo) < 0)
892 error = EINVAL;
893 if (error == 0) {
894 SET_FOREACH(lihp, linux_ioctl_handler_set)
895 linux_ioctl_register_handler(*lihp);
896 SET_FOREACH(ldhp, linux_device_handler_set)
897 linux_device_register_handler(*ldhp);
898 if (bootverbose)
899 printf("Linux ELF exec handler installed\n");
900 } else
901 printf("cannot insert Linux ELF brand handler\n");
902 break;
903 case MOD_UNLOAD:
904 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
905 ++brandinfo)
906 if (elf32_brand_inuse(*brandinfo))
907 error = EBUSY;
908 if (error == 0) {
909 for (brandinfo = &linux_brandlist[0];
910 *brandinfo != NULL; ++brandinfo)
911 if (elf32_remove_brand_entry(*brandinfo) < 0)
912 error = EINVAL;
913 }
914 if (error == 0) {
915 SET_FOREACH(lihp, linux_ioctl_handler_set)
916 linux_ioctl_unregister_handler(*lihp);
917 SET_FOREACH(ldhp, linux_device_handler_set)
918 linux_device_unregister_handler(*ldhp);
919 if (bootverbose)
920 printf("Linux ELF exec handler removed\n");
921 } else
922 printf("Could not deinstall ELF interpreter entry\n");
923 break;
924 default:
925 return EOPNOTSUPP;
926 }
927 return error;
928 }
929
930 static moduledata_t linux_elf_mod = {
931 "linuxelf",
932 linux_elf_modevent,
933 0
934 };
935
936 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Cache object: d42f98104dd9b04d051f359890452bfa
|