1 /*-
2 * Copyright (c) 1994-1996 Søren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: src/sys/i386/linux/linux_sysvec.c,v 1.55.2.9 2002/01/12 11:03:30 bde Exp $
29 */
30
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kern_syscall.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/proc.h>
43 #include <sys/signalvar.h>
44 #include <sys/sysent.h>
45 #include <sys/sysproto.h>
46 #include <sys/eventhandler.h>
47
48 #include <vm/vm.h>
49 #include <vm/vm_param.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_extern.h>
52 #include <sys/exec.h>
53 #include <sys/kernel.h>
54 #include <sys/module.h>
55 #include <machine/cpu.h>
56
57 #include "linux.h"
58 #include "linux_proto.h"
59 #include "../linux_signal.h"
60 #include "../linux_util.h"
61 #include "../linux_futex.h"
62 #include "../linux_emuldata.h"
63
64 MODULE_VERSION(linux, 1);
65
66 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
67
68 #if BYTE_ORDER == LITTLE_ENDIAN
69 #define SHELLMAGIC 0x2123 /* #! */
70 #else
71 #define SHELLMAGIC 0x2321
72 #endif
73
74 /*
75 * Allow the sendsig functions to use the ldebug() facility
76 * even though they are not syscalls themselves. Map them
77 * to syscall 0. This is slightly less bogus than using
78 * ldebug(sigreturn).
79 */
80 #define LINUX_SYS_linux_rt_sendsig 0
81 #define LINUX_SYS_linux_sendsig 0
82
83 extern char linux_sigcode[];
84 extern int linux_szsigcode;
85
86 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
87
88 static int linux_fixup (register_t **stack_base,
89 struct image_params *iparams);
90 static int elf_linux_fixup (register_t **stack_base,
91 struct image_params *iparams);
92 static void linux_prepsyscall (struct trapframe *tf, int *args,
93 u_int *code, caddr_t *params);
94 static void linux_sendsig (sig_t catcher, int sig, sigset_t *mask,
95 u_long code);
96 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
97
98 static eventhandler_tag linux_exec_tag;
99 static eventhandler_tag linux_exit_tag;
100
101 /*
102 * Linux syscalls return negative errno's, we do positive and map them
103 */
104 static int bsd_to_linux_errno[ELAST + 1] = {
105 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
106 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
107 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
108 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
109 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
110 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
111 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
112 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
113 -6, -6, -43, -42, -75, -6, -84
114 };
115
116 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
117 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
118 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
119 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
120 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
121 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
122 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
123 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
124 0, LINUX_SIGUSR1, LINUX_SIGUSR2
125 };
126
127 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
128 SIGHUP, SIGINT, SIGQUIT, SIGILL,
129 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
130 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
131 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
132 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
133 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
134 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
135 SIGIO, SIGURG, SIGSYS
136 };
137
138 #define LINUX_T_UNKNOWN 255
139 static int _bsd_to_linux_trapcode[] = {
140 LINUX_T_UNKNOWN, /* 0 */
141 6, /* 1 T_PRIVINFLT */
142 LINUX_T_UNKNOWN, /* 2 */
143 3, /* 3 T_BPTFLT */
144 LINUX_T_UNKNOWN, /* 4 */
145 LINUX_T_UNKNOWN, /* 5 */
146 16, /* 6 T_ARITHTRAP */
147 254, /* 7 T_ASTFLT */
148 LINUX_T_UNKNOWN, /* 8 */
149 13, /* 9 T_PROTFLT */
150 1, /* 10 T_TRCTRAP */
151 LINUX_T_UNKNOWN, /* 11 */
152 14, /* 12 T_PAGEFLT */
153 LINUX_T_UNKNOWN, /* 13 */
154 17, /* 14 T_ALIGNFLT */
155 LINUX_T_UNKNOWN, /* 15 */
156 LINUX_T_UNKNOWN, /* 16 */
157 LINUX_T_UNKNOWN, /* 17 */
158 0, /* 18 T_DIVIDE */
159 2, /* 19 T_NMI */
160 4, /* 20 T_OFLOW */
161 5, /* 21 T_BOUND */
162 7, /* 22 T_DNA */
163 8, /* 23 T_DOUBLEFLT */
164 9, /* 24 T_FPOPFLT */
165 10, /* 25 T_TSSFLT */
166 11, /* 26 T_SEGNPFLT */
167 12, /* 27 T_STKFLT */
168 18, /* 28 T_MCHK */
169 19, /* 29 T_XMMFLT */
170 15 /* 30 T_RESERVED */
171 };
172 #define bsd_to_linux_trapcode(code) \
173 ((code)<NELEM(_bsd_to_linux_trapcode)? \
174 _bsd_to_linux_trapcode[(code)]: \
175 LINUX_T_UNKNOWN)
176
177 /*
178 * If FreeBSD & Linux have a difference of opinion about what a trap
179 * means, deal with it here.
180 */
181 static int
182 translate_traps(int signal, int trap_code)
183 {
184 if (signal != SIGBUS)
185 return signal;
186 switch (trap_code) {
187 case T_PROTFLT:
188 case T_TSSFLT:
189 case T_DOUBLEFLT:
190 case T_PAGEFLT:
191 return SIGSEGV;
192 default:
193 return signal;
194 }
195 }
196
197 static int
198 linux_fixup(register_t **stack_base, struct image_params *imgp)
199 {
200 register_t *argv, *envp;
201
202 argv = *stack_base;
203 envp = *stack_base + (imgp->args->argc + 1);
204 (*stack_base)--;
205 **stack_base = (intptr_t)(void *)envp;
206 (*stack_base)--;
207 **stack_base = (intptr_t)(void *)argv;
208 (*stack_base)--;
209 **stack_base = imgp->args->argc;
210 return 0;
211 }
212
213 static int
214 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
215 {
216 Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
217 register_t *pos;
218
219 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
220
221 if (args->execfd != -1) {
222 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
223 }
224 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
225 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
226 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
227 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
228 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
229 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
230 AUXARGS_ENTRY(pos, AT_BASE, args->base);
231 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
232 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
233 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
234 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
235 AUXARGS_ENTRY(pos, AT_NULL, 0);
236
237 kfree(imgp->auxargs, M_TEMP);
238 imgp->auxargs = NULL;
239
240 (*stack_base)--;
241 **stack_base = (long)imgp->args->argc;
242 return 0;
243 }
244
245 extern int _ucodesel, _udatasel;
246 extern unsigned long linux_sznonrtsigcode;
247
248 static void
249 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
250 {
251 struct proc *p = curproc;
252 struct lwp *lp = curthread->td_lwp;
253 struct trapframe *regs;
254 struct l_rt_sigframe *fp, frame;
255 int oonstack;
256
257 regs = lp->lwp_md.md_regs;
258 oonstack = lp->lwp_sigstk.ss_flags & SS_ONSTACK;
259
260 #ifdef DEBUG
261 if (ldebug(rt_sendsig))
262 kprintf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
263 catcher, sig, (void*)mask, code);
264 #endif
265 /*
266 * Allocate space for the signal handler context.
267 */
268 if ((lp->lwp_flags & LWP_ALTSTACK) && !oonstack &&
269 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
270 fp = (struct l_rt_sigframe *)(lp->lwp_sigstk.ss_sp +
271 lp->lwp_sigstk.ss_size - sizeof(struct l_rt_sigframe));
272 lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
273 } else
274 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
275
276 /*
277 * grow() will return FALSE if the fp will not fit inside the stack
278 * and the stack can not be grown. useracc will return FALSE
279 * if access is denied.
280 */
281 if ((vm_map_growstack(p, (vm_offset_t)fp) != KERN_SUCCESS) ||
282 !useracc((caddr_t)fp, sizeof (struct l_rt_sigframe),
283 VM_PROT_WRITE)) {
284 /*
285 * Process has trashed its stack; give it an illegal
286 * instruction to halt it in its tracks.
287 */
288 SIGACTION(p, SIGILL) = SIG_DFL;
289 SIGDELSET(p->p_sigignore, SIGILL);
290 SIGDELSET(p->p_sigcatch, SIGILL);
291 SIGDELSET(lp->lwp_sigmask, SIGILL);
292 #ifdef DEBUG
293 if (ldebug(rt_sendsig))
294 kprintf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
295 fp, oonstack);
296 #endif
297 lwpsignal(p, lp, SIGILL);
298 return;
299 }
300
301 /*
302 * Build the argument list for the signal handler.
303 */
304 if (p->p_sysent->sv_sigtbl)
305 if (sig <= p->p_sysent->sv_sigsize)
306 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
307
308 frame.sf_handler = catcher;
309 frame.sf_sig = sig;
310 frame.sf_siginfo = &fp->sf_si;
311 frame.sf_ucontext = &fp->sf_sc;
312
313 /* Fill siginfo structure. */
314 frame.sf_si.lsi_signo = sig;
315 frame.sf_si.lsi_code = code;
316 frame.sf_si.lsi_addr = (void *)regs->tf_err;
317
318 /*
319 * Build the signal context to be used by sigreturn.
320 */
321 frame.sf_sc.uc_flags = 0; /* XXX ??? */
322 frame.sf_sc.uc_link = NULL; /* XXX ??? */
323
324 frame.sf_sc.uc_stack.ss_sp = lp->lwp_sigstk.ss_sp;
325 frame.sf_sc.uc_stack.ss_size = lp->lwp_sigstk.ss_size;
326 frame.sf_sc.uc_stack.ss_flags = (lp->lwp_flags & LWP_ALTSTACK)
327 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
328
329 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
330
331 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
332 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
333 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
334 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
335 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
336 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
337 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
338 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
339 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
340 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
341 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
342 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
343 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
344 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
345 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
346 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
347 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
348 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
349 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
350
351 #ifdef DEBUG
352 if (ldebug(rt_sendsig))
353 kprintf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
354 frame.sf_sc.uc_stack.ss_flags, lp->lwp_sigstk.ss_sp,
355 lp->lwp_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
356 #endif
357
358 if (copyout(&frame, fp, sizeof(frame)) != 0) {
359 /*
360 * Process has trashed its stack; give it an illegal
361 * instruction to halt it in its tracks.
362 */
363 sigexit(lp, SIGILL);
364 /* NOTREACHED */
365 }
366
367 /*
368 * Build context to run handler in.
369 */
370 regs->tf_esp = (int)fp;
371 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
372 linux_sznonrtsigcode;
373
374 /*
375 * i386 abi specifies that the direction flag must be cleared
376 * on function entry
377 */
378 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
379
380 regs->tf_cs = _ucodesel;
381 regs->tf_ds = _udatasel;
382 regs->tf_es = _udatasel;
383 /* allow %fs and %gs to be inherited by the signal handler */
384 /*
385 regs->tf_fs = _udatasel;
386 regs->tf_gs = _udatasel;
387 */
388 regs->tf_ss = _udatasel;
389 clear_quickret();
390 }
391
392
393 /*
394 * Send an interrupt to process.
395 *
396 * Stack is set up to allow sigcode stored
397 * in u. to call routine, followed by kcall
398 * to sigreturn routine below. After sigreturn
399 * resets the signal mask, the stack, and the
400 * frame pointer, it returns to the user
401 * specified pc, psl.
402 */
403
404 static void
405 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
406 {
407 struct proc *p = curproc;
408 struct lwp *lp = curthread->td_lwp;
409 struct trapframe *regs;
410 struct l_sigframe *fp, frame;
411 l_sigset_t lmask;
412 int oonstack, i;
413
414 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
415 /* Signal handler installed with SA_SIGINFO. */
416 linux_rt_sendsig(catcher, sig, mask, code);
417 return;
418 }
419
420 regs = lp->lwp_md.md_regs;
421 oonstack = lp->lwp_sigstk.ss_flags & SS_ONSTACK;
422
423 #ifdef DEBUG
424 if (ldebug(sendsig))
425 kprintf(ARGS(sendsig, "%p, %d, %p, %lu"),
426 catcher, sig, (void*)mask, code);
427 #endif
428
429 /*
430 * Allocate space for the signal handler context.
431 */
432 if ((lp->lwp_flags & LWP_ALTSTACK) && !oonstack &&
433 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
434 fp = (struct l_sigframe *)(lp->lwp_sigstk.ss_sp +
435 lp->lwp_sigstk.ss_size - sizeof(struct l_sigframe));
436 lp->lwp_sigstk.ss_flags |= SS_ONSTACK;
437 } else
438 fp = (struct l_sigframe *)regs->tf_esp - 1;
439
440 /*
441 * grow() will return FALSE if the fp will not fit inside the stack
442 * and the stack can not be grown. useracc will return FALSE
443 * if access is denied.
444 */
445 if ((vm_map_growstack(p, (vm_offset_t)fp) != KERN_SUCCESS) ||
446 !useracc((caddr_t)fp, sizeof (struct l_sigframe),
447 VM_PROT_WRITE)) {
448 /*
449 * Process has trashed its stack; give it an illegal
450 * instruction to halt it in its tracks.
451 */
452 SIGACTION(p, SIGILL) = SIG_DFL;
453 SIGDELSET(p->p_sigignore, SIGILL);
454 SIGDELSET(p->p_sigcatch, SIGILL);
455 SIGDELSET(lp->lwp_sigmask, SIGILL);
456 lwpsignal(p, lp, SIGILL);
457 return;
458 }
459
460 /*
461 * Build the argument list for the signal handler.
462 */
463 if (p->p_sysent->sv_sigtbl)
464 if (sig <= p->p_sysent->sv_sigsize)
465 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
466
467 frame.sf_handler = catcher;
468 frame.sf_sig = sig;
469
470 bsd_to_linux_sigset(mask, &lmask);
471
472 /*
473 * Build the signal context to be used by sigreturn.
474 */
475 frame.sf_sc.sc_mask = lmask.__bits[0];
476 frame.sf_sc.sc_gs = regs->tf_gs;
477 frame.sf_sc.sc_fs = regs->tf_fs;
478 frame.sf_sc.sc_es = regs->tf_es;
479 frame.sf_sc.sc_ds = regs->tf_ds;
480 frame.sf_sc.sc_edi = regs->tf_edi;
481 frame.sf_sc.sc_esi = regs->tf_esi;
482 frame.sf_sc.sc_ebp = regs->tf_ebp;
483 frame.sf_sc.sc_ebx = regs->tf_ebx;
484 frame.sf_sc.sc_edx = regs->tf_edx;
485 frame.sf_sc.sc_ecx = regs->tf_ecx;
486 frame.sf_sc.sc_eax = regs->tf_eax;
487 frame.sf_sc.sc_eip = regs->tf_eip;
488 frame.sf_sc.sc_cs = regs->tf_cs;
489 frame.sf_sc.sc_eflags = regs->tf_eflags;
490 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
491 frame.sf_sc.sc_ss = regs->tf_ss;
492 frame.sf_sc.sc_err = regs->tf_err;
493 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
494
495 bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
496
497 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
498 frame.sf_extramask[i] = lmask.__bits[i+1];
499
500 if (copyout(&frame, fp, sizeof(frame)) != 0) {
501 /*
502 * Process has trashed its stack; give it an illegal
503 * instruction to halt it in its tracks.
504 */
505 sigexit(lp, SIGILL);
506 /* NOTREACHED */
507 }
508
509 /*
510 * Build context to run handler in.
511 */
512 regs->tf_esp = (int)fp;
513 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
514
515 /*
516 * i386 abi specifies that the direction flag must be cleared
517 * on function entry
518 */
519 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
520
521 regs->tf_cs = _ucodesel;
522 regs->tf_ds = _udatasel;
523 regs->tf_es = _udatasel;
524 /* Allow %fs and %gs to be inherited by the signal handler */
525 /*
526 regs->tf_fs = _udatasel;
527 regs->tf_gs = _udatasel;
528 */
529 regs->tf_ss = _udatasel;
530 clear_quickret();
531 }
532
533 /*
534 * System call to cleanup state after a signal
535 * has been taken. Reset signal mask and
536 * stack state from context left by sendsig (above).
537 * Return to previous pc and psl as specified by
538 * context left by sendsig. Check carefully to
539 * make sure that the user has not modified the
540 * psl to gain improper privileges or to cause
541 * a machine fault.
542 *
543 * MPSAFE
544 */
545 int
546 sys_linux_sigreturn(struct linux_sigreturn_args *args)
547 {
548 struct lwp *lp = curthread->td_lwp;
549 struct l_sigframe frame;
550 struct trapframe *regs;
551 l_sigset_t lmask;
552 int eflags, i;
553
554 regs = lp->lwp_md.md_regs;
555
556 #ifdef DEBUG
557 if (ldebug(sigreturn))
558 kprintf(ARGS(sigreturn, "%p"), (void *)args->sfp);
559 #endif
560 /*
561 * The trampoline code hands us the sigframe.
562 * It is unsafe to keep track of it ourselves, in the event that a
563 * program jumps out of a signal handler.
564 */
565 if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
566 return (EFAULT);
567
568 /*
569 * Check for security violations.
570 */
571 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
572 eflags = frame.sf_sc.sc_eflags;
573 /*
574 * XXX do allow users to change the privileged flag PSL_RF. The
575 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
576 * sometimes set it there too. tf_eflags is kept in the signal
577 * context during signal handling and there is no other place
578 * to remember it, so the PSL_RF bit may be corrupted by the
579 * signal handler without us knowing. Corruption of the PSL_RF
580 * bit at worst causes one more or one less debugger trap, so
581 * allowing it is fairly harmless.
582 */
583 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
584 return(EINVAL);
585 }
586
587 /*
588 * Don't allow users to load a valid privileged %cs. Let the
589 * hardware check for invalid selectors, excess privilege in
590 * other selectors, invalid %eip's and invalid %esp's.
591 */
592 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
593 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
594 trapsignal(lp, SIGBUS, T_PROTFLT);
595 return(EINVAL);
596 }
597
598 lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK;
599 lmask.__bits[0] = frame.sf_sc.sc_mask;
600 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
601 lmask.__bits[i+1] = frame.sf_extramask[i];
602 linux_to_bsd_sigset(&lmask, &lp->lwp_sigmask);
603 SIG_CANTMASK(lp->lwp_sigmask);
604
605 /*
606 * Restore signal context.
607 */
608 /* %gs was restored by the trampoline. */
609 regs->tf_fs = frame.sf_sc.sc_fs;
610 regs->tf_es = frame.sf_sc.sc_es;
611 regs->tf_ds = frame.sf_sc.sc_ds;
612 regs->tf_edi = frame.sf_sc.sc_edi;
613 regs->tf_esi = frame.sf_sc.sc_esi;
614 regs->tf_ebp = frame.sf_sc.sc_ebp;
615 regs->tf_ebx = frame.sf_sc.sc_ebx;
616 regs->tf_edx = frame.sf_sc.sc_edx;
617 regs->tf_ecx = frame.sf_sc.sc_ecx;
618 regs->tf_eax = frame.sf_sc.sc_eax;
619 regs->tf_eip = frame.sf_sc.sc_eip;
620 regs->tf_cs = frame.sf_sc.sc_cs;
621 regs->tf_eflags = eflags;
622 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
623 regs->tf_ss = frame.sf_sc.sc_ss;
624 clear_quickret();
625
626 return (EJUSTRETURN);
627 }
628
629 /*
630 * System call to cleanup state after a signal
631 * has been taken. Reset signal mask and
632 * stack state from context left by rt_sendsig (above).
633 * Return to previous pc and psl as specified by
634 * context left by sendsig. Check carefully to
635 * make sure that the user has not modified the
636 * psl to gain improper privileges or to cause
637 * a machine fault.
638 *
639 * MPSAFE
640 */
641 int
642 sys_linux_rt_sigreturn(struct linux_rt_sigreturn_args *args)
643 {
644 struct lwp *lp = curthread->td_lwp;
645 struct l_ucontext uc;
646 struct l_sigcontext *context;
647 l_stack_t *lss;
648 stack_t ss;
649 struct trapframe *regs;
650 int eflags;
651
652 regs = lp->lwp_md.md_regs;
653
654 #ifdef DEBUG
655 if (ldebug(rt_sigreturn))
656 kprintf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
657 #endif
658 /*
659 * The trampoline code hands us the ucontext.
660 * It is unsafe to keep track of it ourselves, in the event that a
661 * program jumps out of a signal handler.
662 */
663 if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
664 return (EFAULT);
665
666 context = &uc.uc_mcontext;
667
668 /*
669 * Check for security violations.
670 */
671 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
672 eflags = context->sc_eflags;
673 /*
674 * XXX do allow users to change the privileged flag PSL_RF. The
675 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
676 * sometimes set it there too. tf_eflags is kept in the signal
677 * context during signal handling and there is no other place
678 * to remember it, so the PSL_RF bit may be corrupted by the
679 * signal handler without us knowing. Corruption of the PSL_RF
680 * bit at worst causes one more or one less debugger trap, so
681 * allowing it is fairly harmless.
682 */
683 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
684 return(EINVAL);
685 }
686
687 /*
688 * Don't allow users to load a valid privileged %cs. Let the
689 * hardware check for invalid selectors, excess privilege in
690 * other selectors, invalid %eip's and invalid %esp's.
691 */
692 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
693 if (!CS_SECURE(context->sc_cs)) {
694 trapsignal(lp, SIGBUS, T_PROTFLT);
695 return(EINVAL);
696 }
697
698 lp->lwp_sigstk.ss_flags &= ~SS_ONSTACK;
699 linux_to_bsd_sigset(&uc.uc_sigmask, &lp->lwp_sigmask);
700 SIG_CANTMASK(lp->lwp_sigmask);
701
702 /*
703 * Restore signal context
704 */
705 /* %gs was restored by the trampoline. */
706 regs->tf_fs = context->sc_fs;
707 regs->tf_es = context->sc_es;
708 regs->tf_ds = context->sc_ds;
709 regs->tf_edi = context->sc_edi;
710 regs->tf_esi = context->sc_esi;
711 regs->tf_ebp = context->sc_ebp;
712 regs->tf_ebx = context->sc_ebx;
713 regs->tf_edx = context->sc_edx;
714 regs->tf_ecx = context->sc_ecx;
715 regs->tf_eax = context->sc_eax;
716 regs->tf_eip = context->sc_eip;
717 regs->tf_cs = context->sc_cs;
718 regs->tf_eflags = eflags;
719 regs->tf_esp = context->sc_esp_at_signal;
720 regs->tf_ss = context->sc_ss;
721
722 /*
723 * call sigaltstack & ignore results..
724 */
725 lss = &uc.uc_stack;
726 ss.ss_sp = lss->ss_sp;
727 ss.ss_size = lss->ss_size;
728 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
729
730 #ifdef DEBUG
731 if (ldebug(rt_sigreturn))
732 kprintf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
733 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
734 #endif
735 kern_sigaltstack(&ss, NULL);
736 clear_quickret();
737
738 return (EJUSTRETURN);
739 }
740
741 /*
742 * Prep arguments.
743 *
744 * MUST BE MPSAFE
745 */
746 static void
747 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
748 {
749 args[0] = tf->tf_ebx;
750 args[1] = tf->tf_ecx;
751 args[2] = tf->tf_edx;
752 args[3] = tf->tf_esi;
753 args[4] = tf->tf_edi;
754 args[5] = tf->tf_ebp;
755 *params = NULL; /* no copyin */
756 }
757
758 /*
759 * If a linux binary is exec'ing something, try this image activator
760 * first. We override standard shell script execution in order to
761 * be able to modify the interpreter path. We only do this if a linux
762 * binary is doing the exec, so we do not create an EXEC module for it.
763 */
764 static int exec_linux_imgact_try (struct image_params *iparams);
765
766 static int
767 exec_linux_imgact_try(struct image_params *imgp)
768 {
769 const char *head = (const char *)imgp->image_header;
770 int error = -1;
771
772 /*
773 * The interpreter for shell scripts run from a linux binary needs
774 * to be located in /compat/linux if possible in order to recursively
775 * maintain linux path emulation.
776 */
777 if (((const short *)head)[0] == SHELLMAGIC) {
778 /*
779 * Run our normal shell image activator. If it succeeds attempt
780 * to use the alternate path for the interpreter. If an alternate
781 * path is found, use our stringspace to store it.
782 */
783 if ((error = exec_shell_imgact(imgp)) == 0) {
784 linux_translate_path(imgp->interpreter_name,
785 MAXSHELLCMDLEN);
786 }
787 }
788 return(error);
789 }
790
791 struct sysentvec linux_sysvec = {
792 .sv_size = LINUX_SYS_MAXSYSCALL,
793 .sv_table = linux_sysent,
794 .sv_mask = 0xffffffff,
795 .sv_sigsize = LINUX_SIGTBLSZ,
796 .sv_sigtbl = bsd_to_linux_signal,
797 .sv_errsize = ELAST + 1,
798 .sv_errtbl = bsd_to_linux_errno,
799 .sv_transtrap = translate_traps,
800 .sv_fixup = linux_fixup,
801 .sv_sendsig = linux_sendsig,
802 .sv_sigcode = linux_sigcode,
803 .sv_szsigcode = &linux_szsigcode,
804 .sv_prepsyscall = linux_prepsyscall,
805 .sv_name = "Linux a.out",
806 .sv_coredump = NULL,
807 .sv_imgact_try = exec_linux_imgact_try,
808 .sv_minsigstksz = LINUX_MINSIGSTKSZ
809 };
810
811 struct sysentvec elf_linux_sysvec = {
812 .sv_size = LINUX_SYS_MAXSYSCALL,
813 .sv_table = linux_sysent,
814 .sv_mask = 0xffffffff,
815 .sv_sigsize = LINUX_SIGTBLSZ,
816 .sv_sigtbl = bsd_to_linux_signal,
817 .sv_errsize = ELAST + 1,
818 .sv_errtbl = bsd_to_linux_errno,
819 .sv_transtrap = translate_traps,
820 .sv_fixup = elf_linux_fixup,
821 .sv_sendsig = linux_sendsig,
822 .sv_sigcode = linux_sigcode,
823 .sv_szsigcode = &linux_szsigcode,
824 .sv_prepsyscall = linux_prepsyscall,
825 .sv_name = "Linux ELF32",
826 .sv_coredump = elf32_coredump,
827 .sv_imgact_try = exec_linux_imgact_try,
828 .sv_minsigstksz = LINUX_MINSIGSTKSZ
829 };
830
831 static const char GNU_ABI_VENDOR[] = "GNU";
832 static const char SUSE_ABI_VENDOR[] = "SuSE";
833 static int GNULINUX_ABI_DESC = 0;
834
835 static boolean_t
836 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
837 {
838 const Elf32_Word *desc;
839 uintptr_t p;
840
841 p = (uintptr_t)(note + 1);
842 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
843
844 desc = (const Elf32_Word *)p;
845 if (desc[0] != GNULINUX_ABI_DESC)
846 return (FALSE);
847 /*
848 * For Linux we encode osrel as follows:
849 * VVVMMMIII (version, major, minor)
850 */
851 *osrel = desc[1] * 1000000 +
852 desc[2] * 1000 +
853 desc[3];
854
855 return (TRUE);
856 }
857
858 static Elf_Brandnote linux32_generic_brandnote = {
859 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
860 .hdr.n_descsz = 16,
861 .hdr.n_type = 1,
862 .vendor = GNU_ABI_VENDOR,
863 .flags = BN_TRANSLATE_OSREL,
864 .trans_osrel = linux_trans_osrel,
865 };
866
867 static Elf_Brandnote linux32_suse_brandnote = {
868 .hdr.n_namesz = sizeof(SUSE_ABI_VENDOR),
869 .hdr.n_descsz = 16,
870 .hdr.n_type = 1,
871 .vendor = SUSE_ABI_VENDOR,
872 .flags = BN_TRANSLATE_OSREL,
873 .trans_osrel = linux_trans_osrel,
874 };
875
876 static Elf32_Brandinfo linux32_brand = {
877 .brand = ELFOSABI_LINUX,
878 .machine = EM_386,
879 .compat_3_brand = "Linux",
880 .emul_path = "/compat/linux",
881 .interp_path = "/lib/ld-linux.so.1",
882 .sysvec = &elf_linux_sysvec,
883 .interp_newpath = NULL,
884 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
885 .brand_note = &linux32_generic_brandnote,
886 };
887
888 static Elf32_Brandinfo linux32_glibc2_brand = {
889 .brand = ELFOSABI_LINUX,
890 .machine = EM_386,
891 .compat_3_brand = "Linux",
892 .emul_path = "/compat/linux",
893 .interp_path = "/lib/ld-linux.so.2",
894 .sysvec = &elf_linux_sysvec,
895 .interp_newpath = NULL,
896 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
897 .brand_note = &linux32_generic_brandnote,
898 };
899
900 static Elf32_Brandinfo linux32_suse_brand = {
901 .brand = ELFOSABI_LINUX,
902 .machine = EM_386,
903 .compat_3_brand = "Linux",
904 .emul_path = "/compat/linux",
905 .interp_path = "/lib/ld-linux.so.2",
906 .sysvec = &elf_linux_sysvec,
907 .interp_newpath = NULL,
908 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
909 .brand_note = &linux32_suse_brandnote,
910 };
911
912 Elf32_Brandinfo *linux_brandlist[] = {
913 &linux32_brand,
914 &linux32_glibc2_brand,
915 &linux32_suse_brand,
916 NULL
917 };
918
919 static int
920 linux_elf_modevent(module_t mod, int type, void *data)
921 {
922 Elf32_Brandinfo **brandinfo;
923 int error;
924
925 error = 0;
926
927 switch(type) {
928 case MOD_LOAD:
929 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
930 ++brandinfo)
931 if (elf32_insert_brand_entry(*brandinfo) < 0)
932 error = EINVAL;
933 if (error == 0) {
934 if (bootverbose)
935 kprintf("Linux ELF exec handler installed\n");
936 } else {
937 kprintf("cannot insert Linux ELF brand handler\n");
938 }
939 EMUL_LOCKINIT();
940 lockinit(&futex_mtx, "linftxs", 0, LK_CANRECURSE);
941 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_transition,
942 NULL, 1000);
943 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, emuldata_exit,
944 NULL, 1000);
945 break;
946 case MOD_UNLOAD:
947 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
948 ++brandinfo)
949 if (elf32_brand_inuse(*brandinfo))
950 error = EBUSY;
951 if (error == 0) {
952 for (brandinfo = &linux_brandlist[0];
953 *brandinfo != NULL; ++brandinfo)
954 if (elf32_remove_brand_entry(*brandinfo) < 0)
955 error = EINVAL;
956 }
957 if (error == 0) {
958 if (bootverbose)
959 kprintf("Linux ELF exec handler removed\n");
960 } else {
961 kprintf("Could not deinstall ELF interpreter entry\n");
962 }
963 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
964 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
965 lockuninit(&futex_mtx);
966 EMUL_LOCKUNINIT();
967 break;
968 default:
969 break;
970 }
971 return error;
972 }
973
974 static moduledata_t linux_elf_mod = {
975 "linuxelf",
976 linux_elf_modevent,
977 0
978 };
979
980 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Cache object: 89c5a34cad5606c34c0aec9677213f91
|