1 /*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 Søren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 /* XXX we use functions that might not exist. */
37 #include "opt_compat.h"
38
39 #ifndef COMPAT_43
40 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
41 #endif
42 #ifndef COMPAT_IA32
43 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
44 #endif
45
46 #define __ELF_WORD_SIZE 32
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/exec.h>
51 #include <sys/imgact.h>
52 #include <sys/imgact_elf.h>
53 #include <sys/kernel.h>
54 #include <sys/lock.h>
55 #include <sys/malloc.h>
56 #include <sys/module.h>
57 #include <sys/mutex.h>
58 #include <sys/proc.h>
59 #include <sys/resourcevar.h>
60 #include <sys/signalvar.h>
61 #include <sys/sysctl.h>
62 #include <sys/syscallsubr.h>
63 #include <sys/sysent.h>
64 #include <sys/sysproto.h>
65 #include <sys/vnode.h>
66
67 #include <vm/vm.h>
68 #include <vm/pmap.h>
69 #include <vm/vm_extern.h>
70 #include <vm/vm_map.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_param.h>
74
75 #include <machine/cpu.h>
76 #include <machine/md_var.h>
77 #include <machine/pcb.h>
78 #include <machine/specialreg.h>
79
80 #include <amd64/linux32/linux.h>
81 #include <amd64/linux32/linux32_proto.h>
82 #include <compat/linux/linux_mib.h>
83 #include <compat/linux/linux_signal.h>
84 #include <compat/linux/linux_util.h>
85
86 MODULE_VERSION(linux, 1);
87 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
88 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
89 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
90
91 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
92
93 #define AUXARGS_ENTRY_32(pos, id, val) \
94 do { \
95 suword32(pos++, id); \
96 suword32(pos++, val); \
97 } while (0)
98
99 #if BYTE_ORDER == LITTLE_ENDIAN
100 #define SHELLMAGIC 0x2123 /* #! */
101 #else
102 #define SHELLMAGIC 0x2321
103 #endif
104
105 /*
106 * Allow the sendsig functions to use the ldebug() facility
107 * even though they are not syscalls themselves. Map them
108 * to syscall 0. This is slightly less bogus than using
109 * ldebug(sigreturn).
110 */
111 #define LINUX_SYS_linux_rt_sendsig 0
112 #define LINUX_SYS_linux_sendsig 0
113
114 extern char linux_sigcode[];
115 extern int linux_szsigcode;
116
117 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
118
119 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
120
121 static int elf_linux_fixup(register_t **stack_base,
122 struct image_params *iparams);
123 static register_t *linux_copyout_strings(struct image_params *imgp);
124 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
125 caddr_t *params);
126 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
127 u_long code);
128 static void exec_linux_setregs(struct thread *td, u_long entry,
129 u_long stack, u_long ps_strings);
130 static void linux32_fixlimits(struct image_params *imgp);
131
132 /*
133 * Linux syscalls return negative errno's, we do positive and map them
134 */
135 static int bsd_to_linux_errno[ELAST + 1] = {
136 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
137 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
138 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
139 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
140 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
141 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
142 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
143 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
144 -6, -6, -43, -42, -75, -6, -84
145 };
146
147 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
148 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
149 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
150 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
151 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
152 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
153 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
154 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
155 0, LINUX_SIGUSR1, LINUX_SIGUSR2
156 };
157
158 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
159 SIGHUP, SIGINT, SIGQUIT, SIGILL,
160 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
161 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
162 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
163 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
164 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
165 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
166 SIGIO, SIGURG, SIGSYS
167 };
168
169 #define LINUX_T_UNKNOWN 255
170 static int _bsd_to_linux_trapcode[] = {
171 LINUX_T_UNKNOWN, /* 0 */
172 6, /* 1 T_PRIVINFLT */
173 LINUX_T_UNKNOWN, /* 2 */
174 3, /* 3 T_BPTFLT */
175 LINUX_T_UNKNOWN, /* 4 */
176 LINUX_T_UNKNOWN, /* 5 */
177 16, /* 6 T_ARITHTRAP */
178 254, /* 7 T_ASTFLT */
179 LINUX_T_UNKNOWN, /* 8 */
180 13, /* 9 T_PROTFLT */
181 1, /* 10 T_TRCTRAP */
182 LINUX_T_UNKNOWN, /* 11 */
183 14, /* 12 T_PAGEFLT */
184 LINUX_T_UNKNOWN, /* 13 */
185 17, /* 14 T_ALIGNFLT */
186 LINUX_T_UNKNOWN, /* 15 */
187 LINUX_T_UNKNOWN, /* 16 */
188 LINUX_T_UNKNOWN, /* 17 */
189 0, /* 18 T_DIVIDE */
190 2, /* 19 T_NMI */
191 4, /* 20 T_OFLOW */
192 5, /* 21 T_BOUND */
193 7, /* 22 T_DNA */
194 8, /* 23 T_DOUBLEFLT */
195 9, /* 24 T_FPOPFLT */
196 10, /* 25 T_TSSFLT */
197 11, /* 26 T_SEGNPFLT */
198 12, /* 27 T_STKFLT */
199 18, /* 28 T_MCHK */
200 19, /* 29 T_XMMFLT */
201 15 /* 30 T_RESERVED */
202 };
203 #define bsd_to_linux_trapcode(code) \
204 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
205 _bsd_to_linux_trapcode[(code)]: \
206 LINUX_T_UNKNOWN)
207
208 struct linux32_ps_strings {
209 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
210 u_int ps_nargvstr; /* the number of argument strings */
211 u_int32_t ps_envstr; /* first of 0 or more environment strings */
212 u_int ps_nenvstr; /* the number of environment strings */
213 };
214
215 /*
216 * If FreeBSD & Linux have a difference of opinion about what a trap
217 * means, deal with it here.
218 *
219 * MPSAFE
220 */
221 static int
222 translate_traps(int signal, int trap_code)
223 {
224 if (signal != SIGBUS)
225 return signal;
226 switch (trap_code) {
227 case T_PROTFLT:
228 case T_TSSFLT:
229 case T_DOUBLEFLT:
230 case T_PAGEFLT:
231 return SIGSEGV;
232 default:
233 return signal;
234 }
235 }
236
237 static int
238 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
239 {
240 Elf32_Auxargs *args;
241 Elf32_Addr *base;
242 Elf32_Addr *pos;
243
244 KASSERT(curthread->td_proc == imgp->proc &&
245 (curthread->td_proc->p_flag & P_SA) == 0,
246 ("unsafe elf_linux_fixup(), should be curproc"));
247 base = (Elf32_Addr *)*stack_base;
248 args = (Elf32_Auxargs *)imgp->auxargs;
249 pos = base + (imgp->argc + imgp->envc + 2);
250
251 if (args->trace)
252 AUXARGS_ENTRY_32(pos, AT_DEBUG, 1);
253 if (args->execfd != -1)
254 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
255 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
256 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
257 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
258 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
259 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
260 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
261 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
262 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
263 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
264 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
265 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
266 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
267
268 free(imgp->auxargs, M_TEMP);
269 imgp->auxargs = NULL;
270
271 base--;
272 suword32(base, (uint32_t)imgp->argc);
273 *stack_base = (register_t *)base;
274 return 0;
275 }
276
277 extern int _ucodesel, _ucode32sel, _udatasel;
278 extern unsigned long linux_sznonrtsigcode;
279
280 static void
281 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
282 {
283 struct thread *td = curthread;
284 struct proc *p = td->td_proc;
285 struct sigacts *psp;
286 struct trapframe *regs;
287 struct l_rt_sigframe *fp, frame;
288 int oonstack;
289
290 PROC_LOCK_ASSERT(p, MA_OWNED);
291 psp = p->p_sigacts;
292 mtx_assert(&psp->ps_mtx, MA_OWNED);
293 regs = td->td_frame;
294 oonstack = sigonstack(regs->tf_rsp);
295
296 #ifdef DEBUG
297 if (ldebug(rt_sendsig))
298 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
299 catcher, sig, (void*)mask, code);
300 #endif
301 /*
302 * Allocate space for the signal handler context.
303 */
304 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
305 SIGISMEMBER(psp->ps_sigonstack, sig)) {
306 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
307 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
308 } else
309 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
310 mtx_unlock(&psp->ps_mtx);
311
312 /*
313 * Build the argument list for the signal handler.
314 */
315 if (p->p_sysent->sv_sigtbl)
316 if (sig <= p->p_sysent->sv_sigsize)
317 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
318
319 bzero(&frame, sizeof(frame));
320
321 frame.sf_handler = PTROUT(catcher);
322 frame.sf_sig = sig;
323 frame.sf_siginfo = PTROUT(&fp->sf_si);
324 frame.sf_ucontext = PTROUT(&fp->sf_sc);
325
326 /* Fill in POSIX parts */
327 frame.sf_si.lsi_signo = sig;
328 frame.sf_si.lsi_code = code;
329 frame.sf_si.lsi_addr = PTROUT(regs->tf_err);
330
331 /*
332 * Build the signal context to be used by sigreturn.
333 */
334 frame.sf_sc.uc_flags = 0; /* XXX ??? */
335 frame.sf_sc.uc_link = 0; /* XXX ??? */
336
337 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
338 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
339 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
340 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
341 PROC_UNLOCK(p);
342
343 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
344
345 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
346 frame.sf_sc.uc_mcontext.sc_gs = rgs();
347 frame.sf_sc.uc_mcontext.sc_fs = rfs();
348 __asm __volatile("movl %%es,%0" :
349 "=rm" (frame.sf_sc.uc_mcontext.sc_es));
350 __asm __volatile("movl %%ds,%0" :
351 "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
352 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
353 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
354 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
355 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
356 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
357 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
358 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
359 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
360 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
361 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
362 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
363 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
364 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
365 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
366
367 #ifdef DEBUG
368 if (ldebug(rt_sendsig))
369 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
370 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
371 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
372 #endif
373
374 if (copyout(&frame, fp, sizeof(frame)) != 0) {
375 /*
376 * Process has trashed its stack; give it an illegal
377 * instruction to halt it in its tracks.
378 */
379 #ifdef DEBUG
380 if (ldebug(rt_sendsig))
381 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
382 fp, oonstack);
383 #endif
384 PROC_LOCK(p);
385 sigexit(td, SIGILL);
386 }
387
388 /*
389 * Build context to run handler in.
390 */
391 regs->tf_rsp = PTROUT(fp);
392 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
393 linux_sznonrtsigcode;
394 regs->tf_rflags &= ~PSL_T;
395 regs->tf_cs = _ucode32sel;
396 regs->tf_ss = _udatasel;
397 load_ds(_udatasel);
398 td->td_pcb->pcb_ds = _udatasel;
399 load_es(_udatasel);
400 td->td_pcb->pcb_es = _udatasel;
401 PROC_LOCK(p);
402 mtx_lock(&psp->ps_mtx);
403 }
404
405
406 /*
407 * Send an interrupt to process.
408 *
409 * Stack is set up to allow sigcode stored
410 * in u. to call routine, followed by kcall
411 * to sigreturn routine below. After sigreturn
412 * resets the signal mask, the stack, and the
413 * frame pointer, it returns to the user
414 * specified pc, psl.
415 */
416 static void
417 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
418 {
419 struct thread *td = curthread;
420 struct proc *p = td->td_proc;
421 struct sigacts *psp;
422 struct trapframe *regs;
423 struct l_sigframe *fp, frame;
424 l_sigset_t lmask;
425 int oonstack, i;
426
427 PROC_LOCK_ASSERT(p, MA_OWNED);
428 psp = p->p_sigacts;
429 mtx_assert(&psp->ps_mtx, MA_OWNED);
430 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
431 /* Signal handler installed with SA_SIGINFO. */
432 linux_rt_sendsig(catcher, sig, mask, code);
433 return;
434 }
435
436 regs = td->td_frame;
437 oonstack = sigonstack(regs->tf_rsp);
438
439 #ifdef DEBUG
440 if (ldebug(sendsig))
441 printf(ARGS(sendsig, "%p, %d, %p, %lu"),
442 catcher, sig, (void*)mask, code);
443 #endif
444
445 /*
446 * Allocate space for the signal handler context.
447 */
448 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
449 SIGISMEMBER(psp->ps_sigonstack, sig)) {
450 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
451 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
452 } else
453 fp = (struct l_sigframe *)regs->tf_rsp - 1;
454 mtx_unlock(&psp->ps_mtx);
455 PROC_UNLOCK(p);
456
457 /*
458 * Build the argument list for the signal handler.
459 */
460 if (p->p_sysent->sv_sigtbl)
461 if (sig <= p->p_sysent->sv_sigsize)
462 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
463
464 bzero(&frame, sizeof(frame));
465
466 frame.sf_handler = PTROUT(catcher);
467 frame.sf_sig = sig;
468
469 bsd_to_linux_sigset(mask, &lmask);
470
471 /*
472 * Build the signal context to be used by sigreturn.
473 */
474 frame.sf_sc.sc_mask = lmask.__bits[0];
475 frame.sf_sc.sc_gs = rgs();
476 frame.sf_sc.sc_fs = rfs();
477 __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
478 __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
479 frame.sf_sc.sc_edi = regs->tf_rdi;
480 frame.sf_sc.sc_esi = regs->tf_rsi;
481 frame.sf_sc.sc_ebp = regs->tf_rbp;
482 frame.sf_sc.sc_ebx = regs->tf_rbx;
483 frame.sf_sc.sc_edx = regs->tf_rdx;
484 frame.sf_sc.sc_ecx = regs->tf_rcx;
485 frame.sf_sc.sc_eax = regs->tf_rax;
486 frame.sf_sc.sc_eip = regs->tf_rip;
487 frame.sf_sc.sc_cs = regs->tf_cs;
488 frame.sf_sc.sc_eflags = regs->tf_rflags;
489 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
490 frame.sf_sc.sc_ss = regs->tf_ss;
491 frame.sf_sc.sc_err = regs->tf_err;
492 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
493
494 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
495 frame.sf_extramask[i] = lmask.__bits[i+1];
496
497 if (copyout(&frame, fp, sizeof(frame)) != 0) {
498 /*
499 * Process has trashed its stack; give it an illegal
500 * instruction to halt it in its tracks.
501 */
502 PROC_LOCK(p);
503 sigexit(td, SIGILL);
504 }
505
506 /*
507 * Build context to run handler in.
508 */
509 regs->tf_rsp = PTROUT(fp);
510 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
511 regs->tf_rflags &= ~PSL_T;
512 regs->tf_cs = _ucode32sel;
513 regs->tf_ss = _udatasel;
514 load_ds(_udatasel);
515 td->td_pcb->pcb_ds = _udatasel;
516 load_es(_udatasel);
517 td->td_pcb->pcb_es = _udatasel;
518 PROC_LOCK(p);
519 mtx_lock(&psp->ps_mtx);
520 }
521
522 /*
523 * System call to cleanup state after a signal
524 * has been taken. Reset signal mask and
525 * stack state from context left by sendsig (above).
526 * Return to previous pc and psl as specified by
527 * context left by sendsig. Check carefully to
528 * make sure that the user has not modified the
529 * psl to gain improper privileges or to cause
530 * a machine fault.
531 */
532 int
533 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
534 {
535 struct proc *p = td->td_proc;
536 struct l_sigframe frame;
537 struct trapframe *regs;
538 l_sigset_t lmask;
539 int eflags, i;
540
541 regs = td->td_frame;
542
543 #ifdef DEBUG
544 if (ldebug(sigreturn))
545 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
546 #endif
547 /*
548 * The trampoline code hands us the sigframe.
549 * It is unsafe to keep track of it ourselves, in the event that a
550 * program jumps out of a signal handler.
551 */
552 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
553 return (EFAULT);
554
555 /*
556 * Check for security violations.
557 */
558 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
559 eflags = frame.sf_sc.sc_eflags;
560 /*
561 * XXX do allow users to change the privileged flag PSL_RF. The
562 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
563 * sometimes set it there too. tf_eflags is kept in the signal
564 * context during signal handling and there is no other place
565 * to remember it, so the PSL_RF bit may be corrupted by the
566 * signal handler without us knowing. Corruption of the PSL_RF
567 * bit at worst causes one more or one less debugger trap, so
568 * allowing it is fairly harmless.
569 */
570 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
571 return(EINVAL);
572
573 /*
574 * Don't allow users to load a valid privileged %cs. Let the
575 * hardware check for invalid selectors, excess privilege in
576 * other selectors, invalid %eip's and invalid %esp's.
577 */
578 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
579 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
580 trapsignal(td, SIGBUS, T_PROTFLT);
581 return(EINVAL);
582 }
583
584 lmask.__bits[0] = frame.sf_sc.sc_mask;
585 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
586 lmask.__bits[i+1] = frame.sf_extramask[i];
587 PROC_LOCK(p);
588 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
589 SIG_CANTMASK(td->td_sigmask);
590 signotify(td);
591 PROC_UNLOCK(p);
592
593 /*
594 * Restore signal context.
595 */
596 /* Selectors were restored by the trampoline. */
597 regs->tf_rdi = frame.sf_sc.sc_edi;
598 regs->tf_rsi = frame.sf_sc.sc_esi;
599 regs->tf_rbp = frame.sf_sc.sc_ebp;
600 regs->tf_rbx = frame.sf_sc.sc_ebx;
601 regs->tf_rdx = frame.sf_sc.sc_edx;
602 regs->tf_rcx = frame.sf_sc.sc_ecx;
603 regs->tf_rax = frame.sf_sc.sc_eax;
604 regs->tf_rip = frame.sf_sc.sc_eip;
605 regs->tf_cs = frame.sf_sc.sc_cs;
606 regs->tf_rflags = eflags;
607 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
608 regs->tf_ss = frame.sf_sc.sc_ss;
609
610 return (EJUSTRETURN);
611 }
612
613 /*
614 * System call to cleanup state after a signal
615 * has been taken. Reset signal mask and
616 * stack state from context left by rt_sendsig (above).
617 * Return to previous pc and psl as specified by
618 * context left by sendsig. Check carefully to
619 * make sure that the user has not modified the
620 * psl to gain improper privileges or to cause
621 * a machine fault.
622 */
623 int
624 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
625 {
626 struct proc *p = td->td_proc;
627 struct l_ucontext uc;
628 struct l_sigcontext *context;
629 l_stack_t *lss;
630 stack_t ss;
631 struct trapframe *regs;
632 int eflags;
633
634 regs = td->td_frame;
635
636 #ifdef DEBUG
637 if (ldebug(rt_sigreturn))
638 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
639 #endif
640 /*
641 * The trampoline code hands us the ucontext.
642 * It is unsafe to keep track of it ourselves, in the event that a
643 * program jumps out of a signal handler.
644 */
645 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
646 return (EFAULT);
647
648 context = &uc.uc_mcontext;
649
650 /*
651 * Check for security violations.
652 */
653 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
654 eflags = context->sc_eflags;
655 /*
656 * XXX do allow users to change the privileged flag PSL_RF. The
657 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
658 * sometimes set it there too. tf_eflags is kept in the signal
659 * context during signal handling and there is no other place
660 * to remember it, so the PSL_RF bit may be corrupted by the
661 * signal handler without us knowing. Corruption of the PSL_RF
662 * bit at worst causes one more or one less debugger trap, so
663 * allowing it is fairly harmless.
664 */
665 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
666 return(EINVAL);
667
668 /*
669 * Don't allow users to load a valid privileged %cs. Let the
670 * hardware check for invalid selectors, excess privilege in
671 * other selectors, invalid %eip's and invalid %esp's.
672 */
673 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
674 if (!CS_SECURE(context->sc_cs)) {
675 trapsignal(td, SIGBUS, T_PROTFLT);
676 return(EINVAL);
677 }
678
679 PROC_LOCK(p);
680 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
681 SIG_CANTMASK(td->td_sigmask);
682 signotify(td);
683 PROC_UNLOCK(p);
684
685 /*
686 * Restore signal context
687 */
688 /* Selectors were restored by the trampoline. */
689 regs->tf_rdi = context->sc_edi;
690 regs->tf_rsi = context->sc_esi;
691 regs->tf_rbp = context->sc_ebp;
692 regs->tf_rbx = context->sc_ebx;
693 regs->tf_rdx = context->sc_edx;
694 regs->tf_rcx = context->sc_ecx;
695 regs->tf_rax = context->sc_eax;
696 regs->tf_rip = context->sc_eip;
697 regs->tf_cs = context->sc_cs;
698 regs->tf_rflags = eflags;
699 regs->tf_rsp = context->sc_esp_at_signal;
700 regs->tf_ss = context->sc_ss;
701
702 /*
703 * call sigaltstack & ignore results..
704 */
705 lss = &uc.uc_stack;
706 ss.ss_sp = PTRIN(lss->ss_sp);
707 ss.ss_size = lss->ss_size;
708 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
709
710 #ifdef DEBUG
711 if (ldebug(rt_sigreturn))
712 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
713 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
714 #endif
715 (void)kern_sigaltstack(td, &ss, NULL);
716
717 return (EJUSTRETURN);
718 }
719
720 /*
721 * MPSAFE
722 */
723 static void
724 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
725 {
726 args[0] = tf->tf_rbx;
727 args[1] = tf->tf_rcx;
728 args[2] = tf->tf_rdx;
729 args[3] = tf->tf_rsi;
730 args[4] = tf->tf_rdi;
731 args[5] = tf->tf_rbp; /* Unconfirmed */
732 *params = NULL; /* no copyin */
733 }
734
735 /*
736 * If a linux binary is exec'ing something, try this image activator
737 * first. We override standard shell script execution in order to
738 * be able to modify the interpreter path. We only do this if a linux
739 * binary is doing the exec, so we do not create an EXEC module for it.
740 */
741 static int exec_linux_imgact_try(struct image_params *iparams);
742
743 static int
744 exec_linux_imgact_try(struct image_params *imgp)
745 {
746 const char *head = (const char *)imgp->image_header;
747 char *rpath;
748 int error = -1, len;
749
750 /*
751 * The interpreter for shell scripts run from a linux binary needs
752 * to be located in /compat/linux if possible in order to recursively
753 * maintain linux path emulation.
754 */
755 if (((const short *)head)[0] == SHELLMAGIC) {
756 /*
757 * Run our normal shell image activator. If it succeeds attempt
758 * to use the alternate path for the interpreter. If an alternate
759 * path is found, use our stringspace to store it.
760 */
761 if ((error = exec_shell_imgact(imgp)) == 0) {
762 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
763 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
764 if (rpath != NULL) {
765 len = strlen(rpath) + 1;
766
767 if (len <= MAXSHELLCMDLEN) {
768 memcpy(imgp->interpreter_name, rpath, len);
769 }
770 free(rpath, M_TEMP);
771 }
772 }
773 }
774 return(error);
775 }
776
777 /*
778 * Clear registers on exec
779 * XXX copied from ia32_signal.c.
780 */
781 static void
782 exec_linux_setregs(td, entry, stack, ps_strings)
783 struct thread *td;
784 u_long entry;
785 u_long stack;
786 u_long ps_strings;
787 {
788 struct trapframe *regs = td->td_frame;
789 struct pcb *pcb = td->td_pcb;
790
791 wrmsr(MSR_FSBASE, 0);
792 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
793 pcb->pcb_fsbase = 0;
794 pcb->pcb_gsbase = 0;
795 load_ds(_udatasel);
796 load_es(_udatasel);
797 load_fs(_udatasel);
798 load_gs(0);
799 pcb->pcb_ds = _udatasel;
800 pcb->pcb_es = _udatasel;
801 pcb->pcb_fs = _udatasel;
802 pcb->pcb_gs = 0;
803
804 bzero((char *)regs, sizeof(struct trapframe));
805 regs->tf_rip = entry;
806 regs->tf_rsp = stack;
807 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
808 regs->tf_ss = _udatasel;
809 regs->tf_cs = _ucode32sel;
810 regs->tf_rbx = ps_strings;
811 load_cr0(rcr0() | CR0_MP | CR0_TS);
812
813 /* Return via doreti so that we can change to a different %cs */
814 pcb->pcb_flags |= PCB_FULLCTX;
815 td->td_retval[1] = 0;
816 }
817
818 /*
819 * XXX copied from ia32_sysvec.c.
820 */
821 static register_t *
822 linux_copyout_strings(struct image_params *imgp)
823 {
824 int argc, envc;
825 u_int32_t *vectp;
826 char *stringp, *destp;
827 u_int32_t *stack_base;
828 struct linux32_ps_strings *arginfo;
829 int sigcodesz;
830
831 /*
832 * Calculate string base and vector table pointers.
833 * Also deal with signal trampoline code for this exec type.
834 */
835 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
836 sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode);
837 destp = (caddr_t)arginfo - sigcodesz - SPARE_USRSPACE -
838 roundup((ARG_MAX - imgp->stringspace), sizeof(char *));
839
840 /*
841 * install sigcode
842 */
843 if (sigcodesz)
844 copyout(imgp->proc->p_sysent->sv_sigcode,
845 ((caddr_t)arginfo - sigcodesz), szsigcode);
846
847 /*
848 * If we have a valid auxargs ptr, prepare some room
849 * on the stack.
850 */
851 if (imgp->auxargs) {
852 /*
853 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
854 * lower compatibility.
855 */
856 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
857 : (AT_COUNT * 2);
858 /*
859 * The '+ 2' is for the null pointers at the end of each of
860 * the arg and env vector sets,and imgp->auxarg_size is room
861 * for argument of Runtime loader.
862 */
863 vectp = (u_int32_t *) (destp - (imgp->argc + imgp->envc + 2 +
864 imgp->auxarg_size) * sizeof(u_int32_t));
865
866 } else
867 /*
868 * The '+ 2' is for the null pointers at the end of each of
869 * the arg and env vector sets
870 */
871 vectp = (u_int32_t *)
872 (destp - (imgp->argc + imgp->envc + 2) * sizeof(u_int32_t));
873
874 /*
875 * vectp also becomes our initial stack base
876 */
877 stack_base = vectp;
878
879 stringp = imgp->stringbase;
880 argc = imgp->argc;
881 envc = imgp->envc;
882 /*
883 * Copy out strings - arguments and environment.
884 */
885 copyout(stringp, destp, ARG_MAX - imgp->stringspace);
886
887 /*
888 * Fill in "ps_strings" struct for ps, w, etc.
889 */
890 suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
891 suword32(&arginfo->ps_nargvstr, argc);
892
893 /*
894 * Fill in argument portion of vector table.
895 */
896 for (; argc > 0; --argc) {
897 suword32(vectp++, (u_int32_t)(intptr_t)destp);
898 while (*stringp++ != 0)
899 destp++;
900 destp++;
901 }
902
903 /* a null vector table pointer separates the argp's from the envp's */
904 suword32(vectp++, 0);
905
906 suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
907 suword32(&arginfo->ps_nenvstr, envc);
908
909 /*
910 * Fill in environment portion of vector table.
911 */
912 for (; envc > 0; --envc) {
913 suword32(vectp++, (u_int32_t)(intptr_t)destp);
914 while (*stringp++ != 0)
915 destp++;
916 destp++;
917 }
918
919 /* end of vector table is a null pointer */
920 suword32(vectp, 0);
921
922 return ((register_t *)stack_base);
923 }
924
925 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
926 "32-bit Linux emulation");
927
928 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
929 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
930 &linux32_maxdsiz, 0, "");
931 static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
932 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
933 &linux32_maxssiz, 0, "");
934 static u_long linux32_maxvmem = LINUX32_MAXVMEM;
935 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
936 &linux32_maxvmem, 0, "");
937
938 /*
939 * XXX copied from ia32_sysvec.c.
940 */
941 static void
942 linux32_fixlimits(struct image_params *imgp)
943 {
944 struct proc *p = imgp->proc;
945 struct plimit *oldlim, *newlim;
946
947 if (linux32_maxdsiz == 0 && linux32_maxssiz == 0 &&
948 linux32_maxvmem == 0)
949 return;
950 newlim = lim_alloc();
951 PROC_LOCK(p);
952 oldlim = p->p_limit;
953 lim_copy(newlim, oldlim);
954 if (linux32_maxdsiz != 0) {
955 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_cur > linux32_maxdsiz)
956 newlim->pl_rlimit[RLIMIT_DATA].rlim_cur = linux32_maxdsiz;
957 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_max > linux32_maxdsiz)
958 newlim->pl_rlimit[RLIMIT_DATA].rlim_max = linux32_maxdsiz;
959 }
960 if (linux32_maxssiz != 0) {
961 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_cur > linux32_maxssiz)
962 newlim->pl_rlimit[RLIMIT_STACK].rlim_cur = linux32_maxssiz;
963 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_max > linux32_maxssiz)
964 newlim->pl_rlimit[RLIMIT_STACK].rlim_max = linux32_maxssiz;
965 }
966 if (linux32_maxvmem != 0) {
967 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur > linux32_maxvmem)
968 newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur = linux32_maxvmem;
969 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_max > linux32_maxvmem)
970 newlim->pl_rlimit[RLIMIT_VMEM].rlim_max = linux32_maxvmem;
971 }
972 p->p_limit = newlim;
973 PROC_UNLOCK(p);
974 lim_free(oldlim);
975 }
976
977 struct sysentvec elf_linux_sysvec = {
978 LINUX_SYS_MAXSYSCALL,
979 linux_sysent,
980 0xff,
981 LINUX_SIGTBLSZ,
982 bsd_to_linux_signal,
983 ELAST + 1,
984 bsd_to_linux_errno,
985 translate_traps,
986 elf_linux_fixup,
987 linux_sendsig,
988 linux_sigcode,
989 &linux_szsigcode,
990 linux_prepsyscall,
991 "Linux ELF32",
992 elf32_coredump,
993 exec_linux_imgact_try,
994 LINUX_MINSIGSTKSZ,
995 PAGE_SIZE,
996 VM_MIN_ADDRESS,
997 LINUX32_USRSTACK,
998 LINUX32_USRSTACK,
999 LINUX32_PS_STRINGS,
1000 VM_PROT_ALL,
1001 linux_copyout_strings,
1002 exec_linux_setregs,
1003 linux32_fixlimits
1004 };
1005
1006 static Elf32_Brandinfo linux_brand = {
1007 ELFOSABI_LINUX,
1008 EM_386,
1009 "Linux",
1010 "/compat/linux",
1011 "/lib/ld-linux.so.1",
1012 &elf_linux_sysvec,
1013 NULL,
1014 };
1015
1016 static Elf32_Brandinfo linux_glibc2brand = {
1017 ELFOSABI_LINUX,
1018 EM_386,
1019 "Linux",
1020 "/compat/linux",
1021 "/lib/ld-linux.so.2",
1022 &elf_linux_sysvec,
1023 NULL,
1024 };
1025
1026 Elf32_Brandinfo *linux_brandlist[] = {
1027 &linux_brand,
1028 &linux_glibc2brand,
1029 NULL
1030 };
1031
1032 static int
1033 linux_elf_modevent(module_t mod, int type, void *data)
1034 {
1035 Elf32_Brandinfo **brandinfo;
1036 int error;
1037 struct linux_ioctl_handler **lihp;
1038
1039 error = 0;
1040
1041 switch(type) {
1042 case MOD_LOAD:
1043 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1044 ++brandinfo)
1045 if (elf32_insert_brand_entry(*brandinfo) < 0)
1046 error = EINVAL;
1047 if (error == 0) {
1048 SET_FOREACH(lihp, linux_ioctl_handler_set)
1049 linux_ioctl_register_handler(*lihp);
1050 if (bootverbose)
1051 printf("Linux ELF exec handler installed\n");
1052 } else
1053 printf("cannot insert Linux ELF brand handler\n");
1054 break;
1055 case MOD_UNLOAD:
1056 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1057 ++brandinfo)
1058 if (elf32_brand_inuse(*brandinfo))
1059 error = EBUSY;
1060 if (error == 0) {
1061 for (brandinfo = &linux_brandlist[0];
1062 *brandinfo != NULL; ++brandinfo)
1063 if (elf32_remove_brand_entry(*brandinfo) < 0)
1064 error = EINVAL;
1065 }
1066 if (error == 0) {
1067 SET_FOREACH(lihp, linux_ioctl_handler_set)
1068 linux_ioctl_unregister_handler(*lihp);
1069 if (bootverbose)
1070 printf("Linux ELF exec handler removed\n");
1071 linux_mib_destroy();
1072 } else
1073 printf("Could not deinstall ELF interpreter entry\n");
1074 break;
1075 default:
1076 break;
1077 }
1078 return error;
1079 }
1080
1081 static moduledata_t linux_elf_mod = {
1082 "linuxelf",
1083 linux_elf_modevent,
1084 0
1085 };
1086
1087 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Cache object: 9bf39c085f86f75bbe7767d2629b4727
|