1 /*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 Søren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD: src/sys/amd64/linux32/linux32_sysvec.c,v 1.39 2008/12/17 06:11:42 imp Exp $");
35 #include "opt_compat.h"
36
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40
41 #define __ELF_WORD_SIZE 32
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/fcntl.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
63
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
71
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
76
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_emul.h>
80 #include <compat/linux/linux_mib.h>
81 #include <compat/linux/linux_signal.h>
82 #include <compat/linux/linux_util.h>
83
84 MODULE_VERSION(linux, 1);
85
86 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
87
88 #define AUXARGS_ENTRY_32(pos, id, val) \
89 do { \
90 suword32(pos++, id); \
91 suword32(pos++, val); \
92 } while (0)
93
94 #if BYTE_ORDER == LITTLE_ENDIAN
95 #define SHELLMAGIC 0x2123 /* #! */
96 #else
97 #define SHELLMAGIC 0x2321
98 #endif
99
100 /*
101 * Allow the sendsig functions to use the ldebug() facility
102 * even though they are not syscalls themselves. Map them
103 * to syscall 0. This is slightly less bogus than using
104 * ldebug(sigreturn).
105 */
106 #define LINUX_SYS_linux_rt_sendsig 0
107 #define LINUX_SYS_linux_sendsig 0
108
109 extern char linux_sigcode[];
110 extern int linux_szsigcode;
111
112 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
113
114 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
115 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
116
117 static int elf_linux_fixup(register_t **stack_base,
118 struct image_params *iparams);
119 static register_t *linux_copyout_strings(struct image_params *imgp);
120 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
121 caddr_t *params);
122 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
123 static void exec_linux_setregs(struct thread *td, u_long entry,
124 u_long stack, u_long ps_strings);
125 static void linux32_fixlimit(struct rlimit *rl, int which);
126
127 extern LIST_HEAD(futex_list, futex) futex_list;
128 extern struct sx futex_sx;
129
130 static eventhandler_tag linux_exit_tag;
131 static eventhandler_tag linux_schedtail_tag;
132 static eventhandler_tag linux_exec_tag;
133
134 /*
135 * Linux syscalls return negative errno's, we do positive and map them
136 * Reference:
137 * FreeBSD: src/sys/sys/errno.h
138 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
139 * linux-2.6.17.8/include/asm-generic/errno.h
140 */
141 static int bsd_to_linux_errno[ELAST + 1] = {
142 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
143 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
144 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
145 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
146 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
147 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
148 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
149 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
150 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
151 -72, -67, -71
152 };
153
154 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
155 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
156 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
157 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
158 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
159 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
160 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
161 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
162 0, LINUX_SIGUSR1, LINUX_SIGUSR2
163 };
164
165 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
166 SIGHUP, SIGINT, SIGQUIT, SIGILL,
167 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
168 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
169 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
170 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
171 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
172 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
173 SIGIO, SIGURG, SIGSYS
174 };
175
176 #define LINUX_T_UNKNOWN 255
177 static int _bsd_to_linux_trapcode[] = {
178 LINUX_T_UNKNOWN, /* 0 */
179 6, /* 1 T_PRIVINFLT */
180 LINUX_T_UNKNOWN, /* 2 */
181 3, /* 3 T_BPTFLT */
182 LINUX_T_UNKNOWN, /* 4 */
183 LINUX_T_UNKNOWN, /* 5 */
184 16, /* 6 T_ARITHTRAP */
185 254, /* 7 T_ASTFLT */
186 LINUX_T_UNKNOWN, /* 8 */
187 13, /* 9 T_PROTFLT */
188 1, /* 10 T_TRCTRAP */
189 LINUX_T_UNKNOWN, /* 11 */
190 14, /* 12 T_PAGEFLT */
191 LINUX_T_UNKNOWN, /* 13 */
192 17, /* 14 T_ALIGNFLT */
193 LINUX_T_UNKNOWN, /* 15 */
194 LINUX_T_UNKNOWN, /* 16 */
195 LINUX_T_UNKNOWN, /* 17 */
196 0, /* 18 T_DIVIDE */
197 2, /* 19 T_NMI */
198 4, /* 20 T_OFLOW */
199 5, /* 21 T_BOUND */
200 7, /* 22 T_DNA */
201 8, /* 23 T_DOUBLEFLT */
202 9, /* 24 T_FPOPFLT */
203 10, /* 25 T_TSSFLT */
204 11, /* 26 T_SEGNPFLT */
205 12, /* 27 T_STKFLT */
206 18, /* 28 T_MCHK */
207 19, /* 29 T_XMMFLT */
208 15 /* 30 T_RESERVED */
209 };
210 #define bsd_to_linux_trapcode(code) \
211 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
212 _bsd_to_linux_trapcode[(code)]: \
213 LINUX_T_UNKNOWN)
214
215 struct linux32_ps_strings {
216 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
217 u_int ps_nargvstr; /* the number of argument strings */
218 u_int32_t ps_envstr; /* first of 0 or more environment strings */
219 u_int ps_nenvstr; /* the number of environment strings */
220 };
221
222 /*
223 * If FreeBSD & Linux have a difference of opinion about what a trap
224 * means, deal with it here.
225 *
226 * MPSAFE
227 */
228 static int
229 translate_traps(int signal, int trap_code)
230 {
231 if (signal != SIGBUS)
232 return signal;
233 switch (trap_code) {
234 case T_PROTFLT:
235 case T_TSSFLT:
236 case T_DOUBLEFLT:
237 case T_PAGEFLT:
238 return SIGSEGV;
239 default:
240 return signal;
241 }
242 }
243
244 static int
245 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
246 {
247 Elf32_Auxargs *args;
248 Elf32_Addr *base;
249 Elf32_Addr *pos;
250
251 KASSERT(curthread->td_proc == imgp->proc,
252 ("unsafe elf_linux_fixup(), should be curproc"));
253 base = (Elf32_Addr *)*stack_base;
254 args = (Elf32_Auxargs *)imgp->auxargs;
255 pos = base + (imgp->args->argc + imgp->args->envc + 2);
256
257 if (args->execfd != -1)
258 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
259 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
260 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
261 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
262 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
263 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
264 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
265 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
266 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
267 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
268 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
269 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
270 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
271
272 free(imgp->auxargs, M_TEMP);
273 imgp->auxargs = NULL;
274
275 base--;
276 suword32(base, (uint32_t)imgp->args->argc);
277 *stack_base = (register_t *)base;
278 return 0;
279 }
280
281 extern int _ucodesel, _ucode32sel, _udatasel;
282 extern unsigned long linux_sznonrtsigcode;
283
284 static void
285 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
286 {
287 struct thread *td = curthread;
288 struct proc *p = td->td_proc;
289 struct sigacts *psp;
290 struct trapframe *regs;
291 struct l_rt_sigframe *fp, frame;
292 int oonstack;
293 int sig;
294 int code;
295
296 sig = ksi->ksi_signo;
297 code = ksi->ksi_code;
298 PROC_LOCK_ASSERT(p, MA_OWNED);
299 psp = p->p_sigacts;
300 mtx_assert(&psp->ps_mtx, MA_OWNED);
301 regs = td->td_frame;
302 oonstack = sigonstack(regs->tf_rsp);
303
304 #ifdef DEBUG
305 if (ldebug(rt_sendsig))
306 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
307 catcher, sig, (void*)mask, code);
308 #endif
309 /*
310 * Allocate space for the signal handler context.
311 */
312 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
313 SIGISMEMBER(psp->ps_sigonstack, sig)) {
314 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
315 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
316 } else
317 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
318 mtx_unlock(&psp->ps_mtx);
319
320 /*
321 * Build the argument list for the signal handler.
322 */
323 if (p->p_sysent->sv_sigtbl)
324 if (sig <= p->p_sysent->sv_sigsize)
325 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
326
327 bzero(&frame, sizeof(frame));
328
329 frame.sf_handler = PTROUT(catcher);
330 frame.sf_sig = sig;
331 frame.sf_siginfo = PTROUT(&fp->sf_si);
332 frame.sf_ucontext = PTROUT(&fp->sf_sc);
333
334 /* Fill in POSIX parts */
335 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
336
337 /*
338 * Build the signal context to be used by sigreturn.
339 */
340 frame.sf_sc.uc_flags = 0; /* XXX ??? */
341 frame.sf_sc.uc_link = 0; /* XXX ??? */
342
343 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
344 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
345 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
346 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
347 PROC_UNLOCK(p);
348
349 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
350
351 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
352 frame.sf_sc.uc_mcontext.sc_gs = rgs();
353 frame.sf_sc.uc_mcontext.sc_fs = rfs();
354 __asm __volatile("movl %%es,%0" :
355 "=rm" (frame.sf_sc.uc_mcontext.sc_es));
356 __asm __volatile("movl %%ds,%0" :
357 "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
358 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
359 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
360 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
361 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
362 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
363 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
364 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
365 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
366 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
367 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
368 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
369 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
370 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
371 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
372 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
373
374 #ifdef DEBUG
375 if (ldebug(rt_sendsig))
376 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
377 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
378 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
379 #endif
380
381 if (copyout(&frame, fp, sizeof(frame)) != 0) {
382 /*
383 * Process has trashed its stack; give it an illegal
384 * instruction to halt it in its tracks.
385 */
386 #ifdef DEBUG
387 if (ldebug(rt_sendsig))
388 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
389 fp, oonstack);
390 #endif
391 PROC_LOCK(p);
392 sigexit(td, SIGILL);
393 }
394
395 /*
396 * Build context to run handler in.
397 */
398 regs->tf_rsp = PTROUT(fp);
399 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
400 linux_sznonrtsigcode;
401 regs->tf_rflags &= ~(PSL_T | PSL_D);
402 regs->tf_cs = _ucode32sel;
403 regs->tf_ss = _udatasel;
404 load_ds(_udatasel);
405 td->td_pcb->pcb_ds = _udatasel;
406 load_es(_udatasel);
407 td->td_pcb->pcb_es = _udatasel;
408 /* leave user %fs and %gs untouched */
409 PROC_LOCK(p);
410 mtx_lock(&psp->ps_mtx);
411 }
412
413
414 /*
415 * Send an interrupt to process.
416 *
417 * Stack is set up to allow sigcode stored
418 * in u. to call routine, followed by kcall
419 * to sigreturn routine below. After sigreturn
420 * resets the signal mask, the stack, and the
421 * frame pointer, it returns to the user
422 * specified pc, psl.
423 */
424 static void
425 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
426 {
427 struct thread *td = curthread;
428 struct proc *p = td->td_proc;
429 struct sigacts *psp;
430 struct trapframe *regs;
431 struct l_sigframe *fp, frame;
432 l_sigset_t lmask;
433 int oonstack, i;
434 int sig, code;
435
436 sig = ksi->ksi_signo;
437 code = ksi->ksi_code;
438 PROC_LOCK_ASSERT(p, MA_OWNED);
439 psp = p->p_sigacts;
440 mtx_assert(&psp->ps_mtx, MA_OWNED);
441 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
442 /* Signal handler installed with SA_SIGINFO. */
443 linux_rt_sendsig(catcher, ksi, mask);
444 return;
445 }
446
447 regs = td->td_frame;
448 oonstack = sigonstack(regs->tf_rsp);
449
450 #ifdef DEBUG
451 if (ldebug(sendsig))
452 printf(ARGS(sendsig, "%p, %d, %p, %u"),
453 catcher, sig, (void*)mask, code);
454 #endif
455
456 /*
457 * Allocate space for the signal handler context.
458 */
459 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
460 SIGISMEMBER(psp->ps_sigonstack, sig)) {
461 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
462 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
463 } else
464 fp = (struct l_sigframe *)regs->tf_rsp - 1;
465 mtx_unlock(&psp->ps_mtx);
466 PROC_UNLOCK(p);
467
468 /*
469 * Build the argument list for the signal handler.
470 */
471 if (p->p_sysent->sv_sigtbl)
472 if (sig <= p->p_sysent->sv_sigsize)
473 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
474
475 bzero(&frame, sizeof(frame));
476
477 frame.sf_handler = PTROUT(catcher);
478 frame.sf_sig = sig;
479
480 bsd_to_linux_sigset(mask, &lmask);
481
482 /*
483 * Build the signal context to be used by sigreturn.
484 */
485 frame.sf_sc.sc_mask = lmask.__bits[0];
486 frame.sf_sc.sc_gs = rgs();
487 frame.sf_sc.sc_fs = rfs();
488 __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
489 __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
490 frame.sf_sc.sc_edi = regs->tf_rdi;
491 frame.sf_sc.sc_esi = regs->tf_rsi;
492 frame.sf_sc.sc_ebp = regs->tf_rbp;
493 frame.sf_sc.sc_ebx = regs->tf_rbx;
494 frame.sf_sc.sc_edx = regs->tf_rdx;
495 frame.sf_sc.sc_ecx = regs->tf_rcx;
496 frame.sf_sc.sc_eax = regs->tf_rax;
497 frame.sf_sc.sc_eip = regs->tf_rip;
498 frame.sf_sc.sc_cs = regs->tf_cs;
499 frame.sf_sc.sc_eflags = regs->tf_rflags;
500 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
501 frame.sf_sc.sc_ss = regs->tf_ss;
502 frame.sf_sc.sc_err = regs->tf_err;
503 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
504 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
505
506 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
507 frame.sf_extramask[i] = lmask.__bits[i+1];
508
509 if (copyout(&frame, fp, sizeof(frame)) != 0) {
510 /*
511 * Process has trashed its stack; give it an illegal
512 * instruction to halt it in its tracks.
513 */
514 PROC_LOCK(p);
515 sigexit(td, SIGILL);
516 }
517
518 /*
519 * Build context to run handler in.
520 */
521 regs->tf_rsp = PTROUT(fp);
522 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
523 regs->tf_rflags &= ~(PSL_T | PSL_D);
524 regs->tf_cs = _ucode32sel;
525 regs->tf_ss = _udatasel;
526 load_ds(_udatasel);
527 td->td_pcb->pcb_ds = _udatasel;
528 load_es(_udatasel);
529 td->td_pcb->pcb_es = _udatasel;
530 /* leave user %fs and %gs untouched */
531 PROC_LOCK(p);
532 mtx_lock(&psp->ps_mtx);
533 }
534
535 /*
536 * System call to cleanup state after a signal
537 * has been taken. Reset signal mask and
538 * stack state from context left by sendsig (above).
539 * Return to previous pc and psl as specified by
540 * context left by sendsig. Check carefully to
541 * make sure that the user has not modified the
542 * psl to gain improper privileges or to cause
543 * a machine fault.
544 */
545 int
546 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
547 {
548 struct proc *p = td->td_proc;
549 struct l_sigframe frame;
550 struct trapframe *regs;
551 l_sigset_t lmask;
552 int eflags, i;
553 ksiginfo_t ksi;
554
555 regs = td->td_frame;
556
557 #ifdef DEBUG
558 if (ldebug(sigreturn))
559 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
560 #endif
561 /*
562 * The trampoline code hands us the sigframe.
563 * It is unsafe to keep track of it ourselves, in the event that a
564 * program jumps out of a signal handler.
565 */
566 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
567 return (EFAULT);
568
569 /*
570 * Check for security violations.
571 */
572 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
573 eflags = frame.sf_sc.sc_eflags;
574 /*
575 * XXX do allow users to change the privileged flag PSL_RF. The
576 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
577 * sometimes set it there too. tf_eflags is kept in the signal
578 * context during signal handling and there is no other place
579 * to remember it, so the PSL_RF bit may be corrupted by the
580 * signal handler without us knowing. Corruption of the PSL_RF
581 * bit at worst causes one more or one less debugger trap, so
582 * allowing it is fairly harmless.
583 */
584 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
585 return(EINVAL);
586
587 /*
588 * Don't allow users to load a valid privileged %cs. Let the
589 * hardware check for invalid selectors, excess privilege in
590 * other selectors, invalid %eip's and invalid %esp's.
591 */
592 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
593 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
594 ksiginfo_init_trap(&ksi);
595 ksi.ksi_signo = SIGBUS;
596 ksi.ksi_code = BUS_OBJERR;
597 ksi.ksi_trapno = T_PROTFLT;
598 ksi.ksi_addr = (void *)regs->tf_rip;
599 trapsignal(td, &ksi);
600 return(EINVAL);
601 }
602
603 lmask.__bits[0] = frame.sf_sc.sc_mask;
604 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
605 lmask.__bits[i+1] = frame.sf_extramask[i];
606 PROC_LOCK(p);
607 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
608 SIG_CANTMASK(td->td_sigmask);
609 signotify(td);
610 PROC_UNLOCK(p);
611
612 /*
613 * Restore signal context.
614 */
615 /* Selectors were restored by the trampoline. */
616 regs->tf_rdi = frame.sf_sc.sc_edi;
617 regs->tf_rsi = frame.sf_sc.sc_esi;
618 regs->tf_rbp = frame.sf_sc.sc_ebp;
619 regs->tf_rbx = frame.sf_sc.sc_ebx;
620 regs->tf_rdx = frame.sf_sc.sc_edx;
621 regs->tf_rcx = frame.sf_sc.sc_ecx;
622 regs->tf_rax = frame.sf_sc.sc_eax;
623 regs->tf_rip = frame.sf_sc.sc_eip;
624 regs->tf_cs = frame.sf_sc.sc_cs;
625 regs->tf_rflags = eflags;
626 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
627 regs->tf_ss = frame.sf_sc.sc_ss;
628
629 return (EJUSTRETURN);
630 }
631
632 /*
633 * System call to cleanup state after a signal
634 * has been taken. Reset signal mask and
635 * stack state from context left by rt_sendsig (above).
636 * Return to previous pc and psl as specified by
637 * context left by sendsig. Check carefully to
638 * make sure that the user has not modified the
639 * psl to gain improper privileges or to cause
640 * a machine fault.
641 */
642 int
643 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
644 {
645 struct proc *p = td->td_proc;
646 struct l_ucontext uc;
647 struct l_sigcontext *context;
648 l_stack_t *lss;
649 stack_t ss;
650 struct trapframe *regs;
651 int eflags;
652 ksiginfo_t ksi;
653
654 regs = td->td_frame;
655
656 #ifdef DEBUG
657 if (ldebug(rt_sigreturn))
658 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
659 #endif
660 /*
661 * The trampoline code hands us the ucontext.
662 * It is unsafe to keep track of it ourselves, in the event that a
663 * program jumps out of a signal handler.
664 */
665 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
666 return (EFAULT);
667
668 context = &uc.uc_mcontext;
669
670 /*
671 * Check for security violations.
672 */
673 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
674 eflags = context->sc_eflags;
675 /*
676 * XXX do allow users to change the privileged flag PSL_RF. The
677 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
678 * sometimes set it there too. tf_eflags is kept in the signal
679 * context during signal handling and there is no other place
680 * to remember it, so the PSL_RF bit may be corrupted by the
681 * signal handler without us knowing. Corruption of the PSL_RF
682 * bit at worst causes one more or one less debugger trap, so
683 * allowing it is fairly harmless.
684 */
685 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
686 return(EINVAL);
687
688 /*
689 * Don't allow users to load a valid privileged %cs. Let the
690 * hardware check for invalid selectors, excess privilege in
691 * other selectors, invalid %eip's and invalid %esp's.
692 */
693 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
694 if (!CS_SECURE(context->sc_cs)) {
695 ksiginfo_init_trap(&ksi);
696 ksi.ksi_signo = SIGBUS;
697 ksi.ksi_code = BUS_OBJERR;
698 ksi.ksi_trapno = T_PROTFLT;
699 ksi.ksi_addr = (void *)regs->tf_rip;
700 trapsignal(td, &ksi);
701 return(EINVAL);
702 }
703
704 PROC_LOCK(p);
705 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
706 SIG_CANTMASK(td->td_sigmask);
707 signotify(td);
708 PROC_UNLOCK(p);
709
710 /*
711 * Restore signal context
712 */
713 /* Selectors were restored by the trampoline. */
714 regs->tf_rdi = context->sc_edi;
715 regs->tf_rsi = context->sc_esi;
716 regs->tf_rbp = context->sc_ebp;
717 regs->tf_rbx = context->sc_ebx;
718 regs->tf_rdx = context->sc_edx;
719 regs->tf_rcx = context->sc_ecx;
720 regs->tf_rax = context->sc_eax;
721 regs->tf_rip = context->sc_eip;
722 regs->tf_cs = context->sc_cs;
723 regs->tf_rflags = eflags;
724 regs->tf_rsp = context->sc_esp_at_signal;
725 regs->tf_ss = context->sc_ss;
726
727 /*
728 * call sigaltstack & ignore results..
729 */
730 lss = &uc.uc_stack;
731 ss.ss_sp = PTRIN(lss->ss_sp);
732 ss.ss_size = lss->ss_size;
733 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
734
735 #ifdef DEBUG
736 if (ldebug(rt_sigreturn))
737 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
738 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
739 #endif
740 (void)kern_sigaltstack(td, &ss, NULL);
741
742 return (EJUSTRETURN);
743 }
744
745 /*
746 * MPSAFE
747 */
748 static void
749 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
750 {
751 args[0] = tf->tf_rbx;
752 args[1] = tf->tf_rcx;
753 args[2] = tf->tf_rdx;
754 args[3] = tf->tf_rsi;
755 args[4] = tf->tf_rdi;
756 args[5] = tf->tf_rbp; /* Unconfirmed */
757 *params = NULL; /* no copyin */
758 }
759
760 /*
761 * If a linux binary is exec'ing something, try this image activator
762 * first. We override standard shell script execution in order to
763 * be able to modify the interpreter path. We only do this if a linux
764 * binary is doing the exec, so we do not create an EXEC module for it.
765 */
766 static int exec_linux_imgact_try(struct image_params *iparams);
767
768 static int
769 exec_linux_imgact_try(struct image_params *imgp)
770 {
771 const char *head = (const char *)imgp->image_header;
772 char *rpath;
773 int error = -1, len;
774
775 /*
776 * The interpreter for shell scripts run from a linux binary needs
777 * to be located in /compat/linux if possible in order to recursively
778 * maintain linux path emulation.
779 */
780 if (((const short *)head)[0] == SHELLMAGIC) {
781 /*
782 * Run our normal shell image activator. If it succeeds attempt
783 * to use the alternate path for the interpreter. If an alternate
784 * path is found, use our stringspace to store it.
785 */
786 if ((error = exec_shell_imgact(imgp)) == 0) {
787 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
788 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
789 if (rpath != NULL) {
790 len = strlen(rpath) + 1;
791
792 if (len <= MAXSHELLCMDLEN) {
793 memcpy(imgp->interpreter_name, rpath, len);
794 }
795 free(rpath, M_TEMP);
796 }
797 }
798 }
799 return(error);
800 }
801
802 /*
803 * Clear registers on exec
804 * XXX copied from ia32_signal.c.
805 */
806 static void
807 exec_linux_setregs(td, entry, stack, ps_strings)
808 struct thread *td;
809 u_long entry;
810 u_long stack;
811 u_long ps_strings;
812 {
813 struct trapframe *regs = td->td_frame;
814 struct pcb *pcb = td->td_pcb;
815
816 critical_enter();
817 wrmsr(MSR_FSBASE, 0);
818 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
819 pcb->pcb_fsbase = 0;
820 pcb->pcb_gsbase = 0;
821 critical_exit();
822 load_ds(_udatasel);
823 load_es(_udatasel);
824 load_fs(_udatasel);
825 load_gs(_udatasel);
826 pcb->pcb_ds = _udatasel;
827 pcb->pcb_es = _udatasel;
828 pcb->pcb_fs = _udatasel;
829 pcb->pcb_gs = _udatasel;
830
831 bzero((char *)regs, sizeof(struct trapframe));
832 regs->tf_rip = entry;
833 regs->tf_rsp = stack;
834 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
835 regs->tf_ss = _udatasel;
836 regs->tf_cs = _ucode32sel;
837 regs->tf_rbx = ps_strings;
838 load_cr0(rcr0() | CR0_MP | CR0_TS);
839 fpstate_drop(td);
840
841 /* Return via doreti so that we can change to a different %cs */
842 pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
843 pcb->pcb_flags &= ~PCB_GS32BIT;
844 td->td_retval[1] = 0;
845 }
846
847 /*
848 * XXX copied from ia32_sysvec.c.
849 */
850 static register_t *
851 |