1 /*-
2 * Copyright (c) 1994-1996 Søren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/imgact.h>
36 #include <sys/imgact_aout.h>
37 #include <sys/imgact_elf.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/module.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/signalvar.h>
45 #include <sys/syscallsubr.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 #include <sys/vnode.h>
49 #include <sys/eventhandler.h>
50
51 #include <vm/vm.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_extern.h>
54 #include <vm/vm_map.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_param.h>
58
59 #include <machine/cpu.h>
60 #include <machine/cputypes.h>
61 #include <machine/md_var.h>
62 #include <machine/pcb.h>
63
64 #include <i386/linux/linux.h>
65 #include <i386/linux/linux_proto.h>
66 #include <compat/linux/linux_emul.h>
67 #include <compat/linux/linux_mib.h>
68 #include <compat/linux/linux_misc.h>
69 #include <compat/linux/linux_signal.h>
70 #include <compat/linux/linux_util.h>
71
72 MODULE_VERSION(linux, 1);
73
74 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
75
76 #if BYTE_ORDER == LITTLE_ENDIAN
77 #define SHELLMAGIC 0x2123 /* #! */
78 #else
79 #define SHELLMAGIC 0x2321
80 #endif
81
82 /*
83 * Allow the sendsig functions to use the ldebug() facility
84 * even though they are not syscalls themselves. Map them
85 * to syscall 0. This is slightly less bogus than using
86 * ldebug(sigreturn).
87 */
88 #define LINUX_SYS_linux_rt_sendsig 0
89 #define LINUX_SYS_linux_sendsig 0
90
91 extern char linux_sigcode[];
92 extern int linux_szsigcode;
93
94 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
95
96 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
97 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
98
99 static int linux_fixup(register_t **stack_base,
100 struct image_params *iparams);
101 static int elf_linux_fixup(register_t **stack_base,
102 struct image_params *iparams);
103 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
104 caddr_t *params);
105 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
106 static void exec_linux_setregs(struct thread *td, u_long entry,
107 u_long stack, u_long ps_strings);
108 static register_t *linux_copyout_strings(struct image_params *imgp);
109
110 static int linux_szplatform;
111 const char *linux_platform;
112
113 extern LIST_HEAD(futex_list, futex) futex_list;
114 extern struct sx futex_sx;
115
116 static eventhandler_tag linux_exit_tag;
117 static eventhandler_tag linux_schedtail_tag;
118 static eventhandler_tag linux_exec_tag;
119
120 /*
121 * Linux syscalls return negative errno's, we do positive and map them
122 * Reference:
123 * FreeBSD: src/sys/sys/errno.h
124 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
125 * linux-2.6.17.8/include/asm-generic/errno.h
126 */
127 static int bsd_to_linux_errno[ELAST + 1] = {
128 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
129 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
130 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
131 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
132 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
133 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
134 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
135 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
136 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
137 -72, -67, -71
138 };
139
140 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
141 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
142 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
143 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
144 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
145 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
146 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
147 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
148 0, LINUX_SIGUSR1, LINUX_SIGUSR2
149 };
150
151 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
152 SIGHUP, SIGINT, SIGQUIT, SIGILL,
153 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
154 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
155 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
156 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
157 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
158 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
159 SIGIO, SIGURG, SIGSYS
160 };
161
162 #define LINUX_T_UNKNOWN 255
163 static int _bsd_to_linux_trapcode[] = {
164 LINUX_T_UNKNOWN, /* 0 */
165 6, /* 1 T_PRIVINFLT */
166 LINUX_T_UNKNOWN, /* 2 */
167 3, /* 3 T_BPTFLT */
168 LINUX_T_UNKNOWN, /* 4 */
169 LINUX_T_UNKNOWN, /* 5 */
170 16, /* 6 T_ARITHTRAP */
171 254, /* 7 T_ASTFLT */
172 LINUX_T_UNKNOWN, /* 8 */
173 13, /* 9 T_PROTFLT */
174 1, /* 10 T_TRCTRAP */
175 LINUX_T_UNKNOWN, /* 11 */
176 14, /* 12 T_PAGEFLT */
177 LINUX_T_UNKNOWN, /* 13 */
178 17, /* 14 T_ALIGNFLT */
179 LINUX_T_UNKNOWN, /* 15 */
180 LINUX_T_UNKNOWN, /* 16 */
181 LINUX_T_UNKNOWN, /* 17 */
182 0, /* 18 T_DIVIDE */
183 2, /* 19 T_NMI */
184 4, /* 20 T_OFLOW */
185 5, /* 21 T_BOUND */
186 7, /* 22 T_DNA */
187 8, /* 23 T_DOUBLEFLT */
188 9, /* 24 T_FPOPFLT */
189 10, /* 25 T_TSSFLT */
190 11, /* 26 T_SEGNPFLT */
191 12, /* 27 T_STKFLT */
192 18, /* 28 T_MCHK */
193 19, /* 29 T_XMMFLT */
194 15 /* 30 T_RESERVED */
195 };
196 #define bsd_to_linux_trapcode(code) \
197 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
198 _bsd_to_linux_trapcode[(code)]: \
199 LINUX_T_UNKNOWN)
200
201 /*
202 * If FreeBSD & Linux have a difference of opinion about what a trap
203 * means, deal with it here.
204 *
205 * MPSAFE
206 */
207 static int
208 translate_traps(int signal, int trap_code)
209 {
210 if (signal != SIGBUS)
211 return signal;
212 switch (trap_code) {
213 case T_PROTFLT:
214 case T_TSSFLT:
215 case T_DOUBLEFLT:
216 case T_PAGEFLT:
217 return SIGSEGV;
218 default:
219 return signal;
220 }
221 }
222
223 static int
224 linux_fixup(register_t **stack_base, struct image_params *imgp)
225 {
226 register_t *argv, *envp;
227
228 argv = *stack_base;
229 envp = *stack_base + (imgp->args->argc + 1);
230 (*stack_base)--;
231 **stack_base = (intptr_t)(void *)envp;
232 (*stack_base)--;
233 **stack_base = (intptr_t)(void *)argv;
234 (*stack_base)--;
235 **stack_base = imgp->args->argc;
236 return (0);
237 }
238
239 static int
240 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
241 {
242 struct proc *p;
243 Elf32_Auxargs *args;
244 Elf32_Addr *uplatform;
245 struct ps_strings *arginfo;
246 register_t *pos;
247
248 KASSERT(curthread->td_proc == imgp->proc &&
249 (curthread->td_proc->p_flag & P_SA) == 0,
250 ("unsafe elf_linux_fixup(), should be curproc"));
251
252 p = imgp->proc;
253 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
254 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
255 linux_szplatform);
256 args = (Elf32_Auxargs *)imgp->auxargs;
257 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
258
259 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
260 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, hz);
261 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
262 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
263 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
264 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
265 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
266 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
267 AUXARGS_ENTRY(pos, AT_BASE, args->base);
268 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
269 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
270 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
271 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
272 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
273 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
274 if (args->execfd != -1)
275 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
276 AUXARGS_ENTRY(pos, AT_NULL, 0);
277
278 free(imgp->auxargs, M_TEMP);
279 imgp->auxargs = NULL;
280
281 (*stack_base)--;
282 **stack_base = (register_t)imgp->args->argc;
283 return (0);
284 }
285
286 /*
287 * Copied from kern/kern_exec.c
288 */
289 static register_t *
290 linux_copyout_strings(struct image_params *imgp)
291 {
292 int argc, envc;
293 char **vectp;
294 char *stringp, *destp;
295 register_t *stack_base;
296 struct ps_strings *arginfo;
297 struct proc *p;
298
299 /*
300 * Calculate string base and vector table pointers.
301 * Also deal with signal trampoline code for this exec type.
302 */
303 p = imgp->proc;
304 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
305 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
306 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
307 sizeof(char *));
308
309 /*
310 * install sigcode
311 */
312 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
313 linux_szsigcode), linux_szsigcode);
314
315 /*
316 * install LINUX_PLATFORM
317 */
318 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
319 linux_szplatform), linux_szplatform);
320
321 /*
322 * If we have a valid auxargs ptr, prepare some room
323 * on the stack.
324 */
325 if (imgp->auxargs) {
326 /*
327 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
328 * lower compatibility.
329 */
330 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
331 (LINUX_AT_COUNT * 2);
332 /*
333 * The '+ 2' is for the null pointers at the end of each of
334 * the arg and env vector sets,and imgp->auxarg_size is room
335 * for argument of Runtime loader.
336 */
337 vectp = (char **)(destp - (imgp->args->argc +
338 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
339 } else {
340 /*
341 * The '+ 2' is for the null pointers at the end of each of
342 * the arg and env vector sets
343 */
344 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
345 sizeof(char *));
346 }
347
348 /*
349 * vectp also becomes our initial stack base
350 */
351 stack_base = (register_t *)vectp;
352
353 stringp = imgp->args->begin_argv;
354 argc = imgp->args->argc;
355 envc = imgp->args->envc;
356
357 /*
358 * Copy out strings - arguments and environment.
359 */
360 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
361
362 /*
363 * Fill in "ps_strings" struct for ps, w, etc.
364 */
365 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
366 suword(&arginfo->ps_nargvstr, argc);
367
368 /*
369 * Fill in argument portion of vector table.
370 */
371 for (; argc > 0; --argc) {
372 suword(vectp++, (long)(intptr_t)destp);
373 while (*stringp++ != 0)
374 destp++;
375 destp++;
376 }
377
378 /* a null vector table pointer separates the argp's from the envp's */
379 suword(vectp++, 0);
380
381 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
382 suword(&arginfo->ps_nenvstr, envc);
383
384 /*
385 * Fill in environment portion of vector table.
386 */
387 for (; envc > 0; --envc) {
388 suword(vectp++, (long)(intptr_t)destp);
389 while (*stringp++ != 0)
390 destp++;
391 destp++;
392 }
393
394 /* end of vector table is a null pointer */
395 suword(vectp, 0);
396
397 return (stack_base);
398 }
399
400 extern int _ucodesel, _udatasel;
401 extern unsigned long linux_sznonrtsigcode;
402
403 static void
404 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
405 {
406 struct thread *td = curthread;
407 struct proc *p = td->td_proc;
408 struct sigacts *psp;
409 struct trapframe *regs;
410 struct l_rt_sigframe *fp, frame;
411 int sig, code;
412 int oonstack;
413
414 sig = ksi->ksi_signo;
415 code = ksi->ksi_code;
416 PROC_LOCK_ASSERT(p, MA_OWNED);
417 psp = p->p_sigacts;
418 mtx_assert(&psp->ps_mtx, MA_OWNED);
419 regs = td->td_frame;
420 oonstack = sigonstack(regs->tf_esp);
421
422 #ifdef DEBUG
423 if (ldebug(rt_sendsig))
424 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
425 catcher, sig, (void*)mask, code);
426 #endif
427 /*
428 * Allocate space for the signal handler context.
429 */
430 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
431 SIGISMEMBER(psp->ps_sigonstack, sig)) {
432 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
433 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
434 } else
435 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
436 mtx_unlock(&psp->ps_mtx);
437
438 /*
439 * Build the argument list for the signal handler.
440 */
441 if (p->p_sysent->sv_sigtbl)
442 if (sig <= p->p_sysent->sv_sigsize)
443 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
444
445 bzero(&frame, sizeof(frame));
446
447 frame.sf_handler = catcher;
448 frame.sf_sig = sig;
449 frame.sf_siginfo = &fp->sf_si;
450 frame.sf_ucontext = &fp->sf_sc;
451
452 /* Fill in POSIX parts */
453 frame.sf_si.lsi_signo = sig;
454 frame.sf_si.lsi_code = code;
455 frame.sf_si.lsi_addr = ksi->ksi_addr;
456
457 /*
458 * Build the signal context to be used by sigreturn.
459 */
460 frame.sf_sc.uc_flags = 0; /* XXX ??? */
461 frame.sf_sc.uc_link = NULL; /* XXX ??? */
462
463 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
464 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
465 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
466 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
467 PROC_UNLOCK(p);
468
469 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
470
471 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
472 frame.sf_sc.uc_mcontext.sc_gs = rgs();
473 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
474 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
475 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
476 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
477 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
478 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
479 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
480 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
481 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
482 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
483 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
484 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
485 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
486 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
487 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
488 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
489 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
490 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
491
492 #ifdef DEBUG
493 if (ldebug(rt_sendsig))
494 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
495 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
496 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
497 #endif
498
499 if (copyout(&frame, fp, sizeof(frame)) != 0) {
500 /*
501 * Process has trashed its stack; give it an illegal
502 * instruction to halt it in its tracks.
503 */
504 #ifdef DEBUG
505 if (ldebug(rt_sendsig))
506 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
507 fp, oonstack);
508 #endif
509 PROC_LOCK(p);
510 sigexit(td, SIGILL);
511 }
512
513 /*
514 * Build context to run handler in.
515 */
516 regs->tf_esp = (int)fp;
517 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
518 linux_sznonrtsigcode;
519 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
520 regs->tf_cs = _ucodesel;
521 regs->tf_ds = _udatasel;
522 regs->tf_es = _udatasel;
523 regs->tf_fs = _udatasel;
524 regs->tf_ss = _udatasel;
525 PROC_LOCK(p);
526 mtx_lock(&psp->ps_mtx);
527 }
528
529
530 /*
531 * Send an interrupt to process.
532 *
533 * Stack is set up to allow sigcode stored
534 * in u. to call routine, followed by kcall
535 * to sigreturn routine below. After sigreturn
536 * resets the signal mask, the stack, and the
537 * frame pointer, it returns to the user
538 * specified pc, psl.
539 */
540 static void
541 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
542 {
543 struct thread *td = curthread;
544 struct proc *p = td->td_proc;
545 struct sigacts *psp;
546 struct trapframe *regs;
547 struct l_sigframe *fp, frame;
548 l_sigset_t lmask;
549 int sig, code;
550 int oonstack, i;
551
552 PROC_LOCK_ASSERT(p, MA_OWNED);
553 psp = p->p_sigacts;
554 sig = ksi->ksi_signo;
555 code = ksi->ksi_code;
556 mtx_assert(&psp->ps_mtx, MA_OWNED);
557 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
558 /* Signal handler installed with SA_SIGINFO. */
559 linux_rt_sendsig(catcher, ksi, mask);
560 return;
561 }
562 regs = td->td_frame;
563 oonstack = sigonstack(regs->tf_esp);
564
565 #ifdef DEBUG
566 if (ldebug(sendsig))
567 printf(ARGS(sendsig, "%p, %d, %p, %u"),
568 catcher, sig, (void*)mask, code);
569 #endif
570
571 /*
572 * Allocate space for the signal handler context.
573 */
574 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
575 SIGISMEMBER(psp->ps_sigonstack, sig)) {
576 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
577 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
578 } else
579 fp = (struct l_sigframe *)regs->tf_esp - 1;
580 mtx_unlock(&psp->ps_mtx);
581 PROC_UNLOCK(p);
582
583 /*
584 * Build the argument list for the signal handler.
585 */
586 if (p->p_sysent->sv_sigtbl)
587 if (sig <= p->p_sysent->sv_sigsize)
588 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
589
590 bzero(&frame, sizeof(frame));
591
592 frame.sf_handler = catcher;
593 frame.sf_sig = sig;
594
595 bsd_to_linux_sigset(mask, &lmask);
596
597 /*
598 * Build the signal context to be used by sigreturn.
599 */
600 frame.sf_sc.sc_mask = lmask.__bits[0];
601 frame.sf_sc.sc_gs = rgs();
602 frame.sf_sc.sc_fs = regs->tf_fs;
603 frame.sf_sc.sc_es = regs->tf_es;
604 frame.sf_sc.sc_ds = regs->tf_ds;
605 frame.sf_sc.sc_edi = regs->tf_edi;
606 frame.sf_sc.sc_esi = regs->tf_esi;
607 frame.sf_sc.sc_ebp = regs->tf_ebp;
608 frame.sf_sc.sc_ebx = regs->tf_ebx;
609 frame.sf_sc.sc_edx = regs->tf_edx;
610 frame.sf_sc.sc_ecx = regs->tf_ecx;
611 frame.sf_sc.sc_eax = regs->tf_eax;
612 frame.sf_sc.sc_eip = regs->tf_eip;
613 frame.sf_sc.sc_cs = regs->tf_cs;
614 frame.sf_sc.sc_eflags = regs->tf_eflags;
615 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
616 frame.sf_sc.sc_ss = regs->tf_ss;
617 frame.sf_sc.sc_err = regs->tf_err;
618 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
619 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
620
621 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
622 frame.sf_extramask[i] = lmask.__bits[i+1];
623
624 if (copyout(&frame, fp, sizeof(frame)) != 0) {
625 /*
626 * Process has trashed its stack; give it an illegal
627 * instruction to halt it in its tracks.
628 */
629 PROC_LOCK(p);
630 sigexit(td, SIGILL);
631 }
632
633 /*
634 * Build context to run handler in.
635 */
636 regs->tf_esp = (int)fp;
637 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
638 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
639 regs->tf_cs = _ucodesel;
640 regs->tf_ds = _udatasel;
641 regs->tf_es = _udatasel;
642 regs->tf_fs = _udatasel;
643 regs->tf_ss = _udatasel;
644 PROC_LOCK(p);
645 mtx_lock(&psp->ps_mtx);
646 }
647
648 /*
649 * System call to cleanup state after a signal
650 * has been taken. Reset signal mask and
651 * stack state from context left by sendsig (above).
652 * Return to previous pc and psl as specified by
653 * context left by sendsig. Check carefully to
654 * make sure that the user has not modified the
655 * psl to gain improper privileges or to cause
656 * a machine fault.
657 */
658 int
659 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
660 {
661 struct proc *p = td->td_proc;
662 struct l_sigframe frame;
663 struct trapframe *regs;
664 l_sigset_t lmask;
665 int eflags, i;
666 ksiginfo_t ksi;
667
668 regs = td->td_frame;
669
670 #ifdef DEBUG
671 if (ldebug(sigreturn))
672 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
673 #endif
674 /*
675 * The trampoline code hands us the sigframe.
676 * It is unsafe to keep track of it ourselves, in the event that a
677 * program jumps out of a signal handler.
678 */
679 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
680 return (EFAULT);
681
682 /*
683 * Check for security violations.
684 */
685 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
686 eflags = frame.sf_sc.sc_eflags;
687 /*
688 * XXX do allow users to change the privileged flag PSL_RF. The
689 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
690 * sometimes set it there too. tf_eflags is kept in the signal
691 * context during signal handling and there is no other place
692 * to remember it, so the PSL_RF bit may be corrupted by the
693 * signal handler without us knowing. Corruption of the PSL_RF
694 * bit at worst causes one more or one less debugger trap, so
695 * allowing it is fairly harmless.
696 */
697 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
698 return(EINVAL);
699
700 /*
701 * Don't allow users to load a valid privileged %cs. Let the
702 * hardware check for invalid selectors, excess privilege in
703 * other selectors, invalid %eip's and invalid %esp's.
704 */
705 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
706 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
707 ksiginfo_init_trap(&ksi);
708 ksi.ksi_signo = SIGBUS;
709 ksi.ksi_code = BUS_OBJERR;
710 ksi.ksi_trapno = T_PROTFLT;
711 ksi.ksi_addr = (void *)regs->tf_eip;
712 trapsignal(td, &ksi);
713 return(EINVAL);
714 }
715
716 lmask.__bits[0] = frame.sf_sc.sc_mask;
717 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
718 lmask.__bits[i+1] = frame.sf_extramask[i];
719 PROC_LOCK(p);
720 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
721 SIG_CANTMASK(td->td_sigmask);
722 signotify(td);
723 PROC_UNLOCK(p);
724
725 /*
726 * Restore signal context.
727 */
728 /* %gs was restored by the trampoline. */
729 regs->tf_fs = frame.sf_sc.sc_fs;
730 regs->tf_es = frame.sf_sc.sc_es;
731 regs->tf_ds = frame.sf_sc.sc_ds;
732 regs->tf_edi = frame.sf_sc.sc_edi;
733 regs->tf_esi = frame.sf_sc.sc_esi;
734 regs->tf_ebp = frame.sf_sc.sc_ebp;
735 regs->tf_ebx = frame.sf_sc.sc_ebx;
736 regs->tf_edx = frame.sf_sc.sc_edx;
737 regs->tf_ecx = frame.sf_sc.sc_ecx;
738 regs->tf_eax = frame.sf_sc.sc_eax;
739 regs->tf_eip = frame.sf_sc.sc_eip;
740 regs->tf_cs = frame.sf_sc.sc_cs;
741 regs->tf_eflags = eflags;
742 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
743 regs->tf_ss = frame.sf_sc.sc_ss;
744
745 return (EJUSTRETURN);
746 }
747
748 /*
749 * System call to cleanup state after a signal
750 * has been taken. Reset signal mask and
751 * stack state from context left by rt_sendsig (above).
752 * Return to previous pc and psl as specified by
753 * context left by sendsig. Check carefully to
754 * make sure that the user has not modified the
755 * psl to gain improper privileges or to cause
756 * a machine fault.
757 */
758 int
759 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
760 {
761 struct proc *p = td->td_proc;
762 struct l_ucontext uc;
763 struct l_sigcontext *context;
764 l_stack_t *lss;
765 stack_t ss;
766 struct trapframe *regs;
767 int eflags;
768 ksiginfo_t ksi;
769
770 regs = td->td_frame;
771
772 #ifdef DEBUG
773 if (ldebug(rt_sigreturn))
774 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
775 #endif
776 /*
777 * The trampoline code hands us the ucontext.
778 * It is unsafe to keep track of it ourselves, in the event that a
779 * program jumps out of a signal handler.
780 */
781 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
782 return (EFAULT);
783
784 context = &uc.uc_mcontext;
785
786 /*
787 * Check for security violations.
788 */
789 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
790 eflags = context->sc_eflags;
791 /*
792 * XXX do allow users to change the privileged flag PSL_RF. The
793 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
794 * sometimes set it there too. tf_eflags is kept in the signal
795 * context during signal handling and there is no other place
796 * to remember it, so the PSL_RF bit may be corrupted by the
797 * signal handler without us knowing. Corruption of the PSL_RF
798 * bit at worst causes one more or one less debugger trap, so
799 * allowing it is fairly harmless.
800 */
801 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
802 return(EINVAL);
803
804 /*
805 * Don't allow users to load a valid privileged %cs. Let the
806 * hardware check for invalid selectors, excess privilege in
807 * other selectors, invalid %eip's and invalid %esp's.
808 */
809 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
810 if (!CS_SECURE(context->sc_cs)) {
811 ksiginfo_init_trap(&ksi);
812 ksi.ksi_signo = SIGBUS;
813 ksi.ksi_code = BUS_OBJERR;
814 ksi.ksi_trapno = T_PROTFLT;
815 ksi.ksi_addr = (void *)regs->tf_eip;
816 trapsignal(td, &ksi);
817 return(EINVAL);
818 }
819
820 PROC_LOCK(p);
821 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
822 SIG_CANTMASK(td->td_sigmask);
823 signotify(td);
824 PROC_UNLOCK(p);
825
826 /*
827 * Restore signal context
828 */
829 /* %gs was restored by the trampoline. */
830 regs->tf_fs = context->sc_fs;
831 regs->tf_es = context->sc_es;
832 regs->tf_ds = context->sc_ds;
833 regs->tf_edi = context->sc_edi;
834 regs->tf_esi = context->sc_esi;
835 regs->tf_ebp = context->sc_ebp;
836 regs->tf_ebx = context->sc_ebx;
837 regs->tf_edx = context->sc_edx;
838 regs->tf_ecx = context->sc_ecx;
839 regs->tf_eax = context->sc_eax;
840 regs->tf_eip = context->sc_eip;
841 regs->tf_cs = context->sc_cs;
842 regs->tf_eflags = eflags;
843 regs->tf_esp = context->sc_esp_at_signal;
844 regs->tf_ss = context->sc_ss;
845
846 /*
847 * call sigaltstack & ignore results..
848 */
849 lss = &uc.uc_stack;
850 ss.ss_sp = lss->ss_sp;
851 ss.ss_size = lss->ss_size;
852 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
853
854 #ifdef DEBUG
855 if (ldebug(rt_sigreturn))
856 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
857 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
858 #endif
859 (void)kern_sigaltstack(td, &ss, NULL);
860
861 return (EJUSTRETURN);
862 }
863
864 /*
865 * MPSAFE
866 */
867 static void
868 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
869 {
870 args[0] = tf->tf_ebx;
871 args[1] = tf->tf_ecx;
872 args[2] = tf->tf_edx;
873 args[3] = tf->tf_esi;
874 args[4] = tf->tf_edi;
875 args[5] = tf->tf_ebp; /* Unconfirmed */
876 *params = NULL; /* no copyin */
877 }
878
879 /*
880 * If a linux binary is exec'ing something, try this image activator
881 * first. We override standard shell script execution in order to
882 * be able to modify the interpreter path. We only do this if a linux
883 * binary is doing the exec, so we do not create an EXEC module for it.
884 */
885 static int exec_linux_imgact_try(struct image_params *iparams);
886
887 static int
888 exec_linux_imgact_try(struct image_params *imgp)
889 {
890 const char *head = (const char *)imgp->image_header;
891 char *rpath;
892 int error = -1, len;
893
894 /*
895 * The interpreter for shell scripts run from a linux binary needs
896 * to be located in /compat/linux if possible in order to recursively
897 * maintain linux path emulation.
898 */
899 if (((const short *)head)[0] == SHELLMAGIC) {
900 /*
901 * Run our normal shell image activator. If it succeeds attempt
902 * to use the alternate path for the interpreter. If an alternate
903 * path is found, use our stringspace to store it.
904 */
905 if ((error = exec_shell_imgact(imgp)) == 0) {
906 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
907 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
908 if (rpath != NULL) {
909 len = strlen(rpath) + 1;
910
911 if (len <= MAXSHELLCMDLEN) {
912 memcpy(imgp->interpreter_name, rpath, len);
913 }
914 free(rpath, M_TEMP);
915 }
916 }
917 }
918 return(error);
919 }
920
921 /*
922 * exec_setregs may initialize some registers differently than Linux
923 * does, thus potentially confusing Linux binaries. If necessary, we
924 * override the exec_setregs default(s) here.
925 */
926 static void
927 exec_linux_setregs(struct thread *td, u_long entry,
928 u_long stack, u_long ps_strings)
929 {
930 struct pcb *pcb = td->td_pcb;
931
932 exec_setregs(td, entry, stack, ps_strings);
933
934 /* Linux sets %gs to 0, we default to _udatasel */
935 pcb->pcb_gs = 0;
936 load_gs(0);
937
938 pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
939 }
940
941 static void
942 linux_get_machine(const char **dst)
943 {
944
945 switch (cpu_class) {
946 case CPUCLASS_686:
947 *dst = "i686";
948 break;
949 case CPUCLASS_586:
950 *dst = "i586";
951 break;
952 case CPUCLASS_486:
953 *dst = "i486";
954 break;
955 default:
956 *dst = "i386";
957 }
958 }
959
960 struct sysentvec linux_sysvec = {
961 .sv_size = LINUX_SYS_MAXSYSCALL,
962 .sv_table = linux_sysent,
963 .sv_mask = 0,
964 .sv_sigsize = LINUX_SIGTBLSZ,
965 .sv_sigtbl = bsd_to_linux_signal,
966 .sv_errsize = ELAST + 1,
967 .sv_errtbl = bsd_to_linux_errno,
968 .sv_transtrap = translate_traps,
969 .sv_fixup = linux_fixup,
970 .sv_sendsig = linux_sendsig,
971 .sv_sigcode = linux_sigcode,
972 .sv_szsigcode = &linux_szsigcode,
973 .sv_prepsyscall = linux_prepsyscall,
974 .sv_name = "Linux a.out",
975 .sv_coredump = NULL,
976 .sv_imgact_try = exec_linux_imgact_try,
977 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
978 .sv_pagesize = PAGE_SIZE,
979 .sv_minuser = VM_MIN_ADDRESS,
980 .sv_maxuser = VM_MAXUSER_ADDRESS,
981 .sv_usrstack = USRSTACK,
982 .sv_psstrings = PS_STRINGS,
983 .sv_stackprot = VM_PROT_ALL,
984 .sv_copyout_strings = exec_copyout_strings,
985 .sv_setregs = exec_linux_setregs,
986 .sv_fixlimit = NULL,
987 .sv_maxssiz = NULL
988 };
989
990 struct sysentvec elf_linux_sysvec = {
991 .sv_size = LINUX_SYS_MAXSYSCALL,
992 .sv_table = linux_sysent,
993 .sv_mask = 0,
994 .sv_sigsize = LINUX_SIGTBLSZ,
995 .sv_sigtbl = bsd_to_linux_signal,
996 .sv_errsize = ELAST + 1,
997 .sv_errtbl = bsd_to_linux_errno,
998 .sv_transtrap = translate_traps,
999 .sv_fixup = elf_linux_fixup,
1000 .sv_sendsig = linux_sendsig,
1001 .sv_sigcode = linux_sigcode,
1002 .sv_szsigcode = &linux_szsigcode,
1003 .sv_prepsyscall = linux_prepsyscall,
1004 .sv_name = "Linux ELF",
1005 .sv_coredump = elf32_coredump,
1006 .sv_imgact_try = exec_linux_imgact_try,
1007 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1008 .sv_pagesize = PAGE_SIZE,
1009 .sv_minuser = VM_MIN_ADDRESS,
1010 .sv_maxuser = VM_MAXUSER_ADDRESS,
1011 .sv_usrstack = USRSTACK,
1012 .sv_psstrings = PS_STRINGS,
1013 .sv_stackprot = VM_PROT_ALL,
1014 .sv_copyout_strings = linux_copyout_strings,
1015 .sv_setregs = exec_linux_setregs,
1016 .sv_fixlimit = NULL,
1017 .sv_maxssiz = NULL
1018 };
1019
1020 static char GNULINUX_ABI_VENDOR[] = "GNU";
1021
1022 static Elf_Brandnote linux_brandnote = {
1023 .hdr.n_namesz = sizeof(GNULINUX_ABI_VENDOR),
1024 .hdr.n_descsz = 16,
1025 .hdr.n_type = 1,
1026 .vendor = GNULINUX_ABI_VENDOR,
1027 .flags = 0
1028 };
1029
1030 static Elf32_Brandinfo linux_brand = {
1031 .brand = ELFOSABI_LINUX,
1032 .machine = EM_386,
1033 .compat_3_brand = "Linux",
1034 .emul_path = "/compat/linux",
1035 .interp_path = "/lib/ld-linux.so.1",
1036 .sysvec = &elf_linux_sysvec,
1037 .interp_newpath = NULL,
1038 .brand_note = &linux_brandnote,
1039 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1040 };
1041
1042 static Elf32_Brandinfo linux_glibc2brand = {
1043 .brand = ELFOSABI_LINUX,
1044 .machine = EM_386,
1045 .compat_3_brand = "Linux",
1046 .emul_path = "/compat/linux",
1047 .interp_path = "/lib/ld-linux.so.2",
1048 .sysvec = &elf_linux_sysvec,
1049 .interp_newpath = NULL,
1050 .brand_note = &linux_brandnote,
1051 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1052 };
1053
1054 Elf32_Brandinfo *linux_brandlist[] = {
1055 &linux_brand,
1056 &linux_glibc2brand,
1057 NULL
1058 };
1059
1060 static int
1061 linux_elf_modevent(module_t mod, int type, void *data)
1062 {
1063 Elf32_Brandinfo **brandinfo;
1064 int error;
1065 struct linux_ioctl_handler **lihp;
1066 struct linux_device_handler **ldhp;
1067
1068 error = 0;
1069
1070 switch(type) {
1071 case MOD_LOAD:
1072 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1073 ++brandinfo)
1074 if (elf32_insert_brand_entry(*brandinfo) < 0)
1075 error = EINVAL;
1076 if (error == 0) {
1077 SET_FOREACH(lihp, linux_ioctl_handler_set)
1078 linux_ioctl_register_handler(*lihp);
1079 SET_FOREACH(ldhp, linux_device_handler_set)
1080 linux_device_register_handler(*ldhp);
1081 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1082 sx_init(&emul_shared_lock, "emuldata->shared lock");
1083 LIST_INIT(&futex_list);
1084 sx_init(&futex_sx, "futex protection lock");
1085 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1086 NULL, 1000);
1087 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1088 NULL, 1000);
1089 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1090 NULL, 1000);
1091 linux_get_machine(&linux_platform);
1092 linux_szplatform = roundup(strlen(linux_platform) + 1,
1093 sizeof(char *));
1094 if (bootverbose)
1095 printf("Linux ELF exec handler installed\n");
1096 } else
1097 printf("cannot insert Linux ELF brand handler\n");
1098 break;
1099 case MOD_UNLOAD:
1100 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1101 ++brandinfo)
1102 if (elf32_brand_inuse(*brandinfo))
1103 error = EBUSY;
1104 if (error == 0) {
1105 for (brandinfo = &linux_brandlist[0];
1106 *brandinfo != NULL; ++brandinfo)
1107 if (elf32_remove_brand_entry(*brandinfo) < 0)
1108 error = EINVAL;
1109 }
1110 if (error == 0) {
1111 SET_FOREACH(lihp, linux_ioctl_handler_set)
1112 linux_ioctl_unregister_handler(*lihp);
1113 SET_FOREACH(ldhp, linux_device_handler_set)
1114 linux_device_unregister_handler(*ldhp);
1115 mtx_destroy(&emul_lock);
1116 sx_destroy(&emul_shared_lock);
1117 sx_destroy(&futex_sx);
1118 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1119 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1120 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1121 if (bootverbose)
1122 printf("Linux ELF exec handler removed\n");
1123 } else
1124 printf("Could not deinstall ELF interpreter entry\n");
1125 break;
1126 default:
1127 return EOPNOTSUPP;
1128 }
1129 return error;
1130 }
1131
1132 static moduledata_t linux_elf_mod = {
1133 "linuxelf",
1134 linux_elf_modevent,
1135 0
1136 };
1137
1138 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Cache object: faf74e20e26b1aec799d993c228db084
|