1 /*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 Søren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD: releng/6.1/sys/amd64/linux32/linux32_sysvec.c 153664 2005-12-22 21:25:20Z jhb $");
35
36 /* XXX we use functions that might not exist. */
37 #include "opt_compat.h"
38
39 #ifndef COMPAT_43
40 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
41 #endif
42 #ifndef COMPAT_IA32
43 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
44 #endif
45
46 #define __ELF_WORD_SIZE 32
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/exec.h>
51 #include <sys/imgact.h>
52 #include <sys/imgact_elf.h>
53 #include <sys/kernel.h>
54 #include <sys/lock.h>
55 #include <sys/malloc.h>
56 #include <sys/module.h>
57 #include <sys/mutex.h>
58 #include <sys/proc.h>
59 #include <sys/resourcevar.h>
60 #include <sys/signalvar.h>
61 #include <sys/sysctl.h>
62 #include <sys/syscallsubr.h>
63 #include <sys/sysent.h>
64 #include <sys/sysproto.h>
65 #include <sys/vnode.h>
66
67 #include <vm/vm.h>
68 #include <vm/pmap.h>
69 #include <vm/vm_extern.h>
70 #include <vm/vm_map.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_param.h>
74
75 #include <machine/cpu.h>
76 #include <machine/md_var.h>
77 #include <machine/pcb.h>
78 #include <machine/specialreg.h>
79
80 #include <amd64/linux32/linux.h>
81 #include <amd64/linux32/linux32_proto.h>
82 #include <compat/linux/linux_mib.h>
83 #include <compat/linux/linux_signal.h>
84 #include <compat/linux/linux_util.h>
85
86 MODULE_VERSION(linux, 1);
87 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
88 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
89 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
90
91 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
92
93 #define AUXARGS_ENTRY_32(pos, id, val) \
94 do { \
95 suword32(pos++, id); \
96 suword32(pos++, val); \
97 } while (0)
98
99 #if BYTE_ORDER == LITTLE_ENDIAN
100 #define SHELLMAGIC 0x2123 /* #! */
101 #else
102 #define SHELLMAGIC 0x2321
103 #endif
104
105 /*
106 * Allow the sendsig functions to use the ldebug() facility
107 * even though they are not syscalls themselves. Map them
108 * to syscall 0. This is slightly less bogus than using
109 * ldebug(sigreturn).
110 */
111 #define LINUX_SYS_linux_rt_sendsig 0
112 #define LINUX_SYS_linux_sendsig 0
113
114 extern char linux_sigcode[];
115 extern int linux_szsigcode;
116
117 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
118
119 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
120
121 static int elf_linux_fixup(register_t **stack_base,
122 struct image_params *iparams);
123 static register_t *linux_copyout_strings(struct image_params *imgp);
124 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
125 caddr_t *params);
126 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
127 u_long code);
128 static void exec_linux_setregs(struct thread *td, u_long entry,
129 u_long stack, u_long ps_strings);
130 static void linux32_fixlimits(struct image_params *imgp);
131
132 /*
133 * Linux syscalls return negative errno's, we do positive and map them
134 */
135 static int bsd_to_linux_errno[ELAST + 1] = {
136 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
137 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
138 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
139 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
140 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
141 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
142 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
143 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
144 -6, -6, -43, -42, -75, -6, -84
145 };
146
147 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
148 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
149 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
150 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
151 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
152 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
153 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
154 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
155 0, LINUX_SIGUSR1, LINUX_SIGUSR2
156 };
157
158 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
159 SIGHUP, SIGINT, SIGQUIT, SIGILL,
160 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
161 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
162 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
163 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
164 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
165 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
166 SIGIO, SIGURG, SIGSYS
167 };
168
169 #define LINUX_T_UNKNOWN 255
170 static int _bsd_to_linux_trapcode[] = {
171 LINUX_T_UNKNOWN, /* 0 */
172 6, /* 1 T_PRIVINFLT */
173 LINUX_T_UNKNOWN, /* 2 */
174 3, /* 3 T_BPTFLT */
175 LINUX_T_UNKNOWN, /* 4 */
176 LINUX_T_UNKNOWN, /* 5 */
177 16, /* 6 T_ARITHTRAP */
178 254, /* 7 T_ASTFLT */
179 LINUX_T_UNKNOWN, /* 8 */
180 13, /* 9 T_PROTFLT */
181 1, /* 10 T_TRCTRAP */
182 LINUX_T_UNKNOWN, /* 11 */
183 14, /* 12 T_PAGEFLT */
184 LINUX_T_UNKNOWN, /* 13 */
185 17, /* 14 T_ALIGNFLT */
186 LINUX_T_UNKNOWN, /* 15 */
187 LINUX_T_UNKNOWN, /* 16 */
188 LINUX_T_UNKNOWN, /* 17 */
189 0, /* 18 T_DIVIDE */
190 2, /* 19 T_NMI */
191 4, /* 20 T_OFLOW */
192 5, /* 21 T_BOUND */
193 7, /* 22 T_DNA */
194 8, /* 23 T_DOUBLEFLT */
195 9, /* 24 T_FPOPFLT */
196 10, /* 25 T_TSSFLT */
197 11, /* 26 T_SEGNPFLT */
198 12, /* 27 T_STKFLT */
199 18, /* 28 T_MCHK */
200 19, /* 29 T_XMMFLT */
201 15 /* 30 T_RESERVED */
202 };
203 #define bsd_to_linux_trapcode(code) \
204 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
205 _bsd_to_linux_trapcode[(code)]: \
206 LINUX_T_UNKNOWN)
207
208 struct linux32_ps_strings {
209 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
210 u_int ps_nargvstr; /* the number of argument strings */
211 u_int32_t ps_envstr; /* first of 0 or more environment strings */
212 u_int ps_nenvstr; /* the number of environment strings */
213 };
214
215 /*
216 * If FreeBSD & Linux have a difference of opinion about what a trap
217 * means, deal with it here.
218 *
219 * MPSAFE
220 */
221 static int
222 translate_traps(int signal, int trap_code)
223 {
224 if (signal != SIGBUS)
225 return signal;
226 switch (trap_code) {
227 case T_PROTFLT:
228 case T_TSSFLT:
229 case T_DOUBLEFLT:
230 case T_PAGEFLT:
231 return SIGSEGV;
232 default:
233 return signal;
234 }
235 }
236
237 static int
238 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
239 {
240 Elf32_Auxargs *args;
241 Elf32_Addr *base;
242 Elf32_Addr *pos;
243
244 KASSERT(curthread->td_proc == imgp->proc &&
245 (curthread->td_proc->p_flag & P_SA) == 0,
246 ("unsafe elf_linux_fixup(), should be curproc"));
247 base = (Elf32_Addr *)*stack_base;
248 args = (Elf32_Auxargs *)imgp->auxargs;
249 pos = base + (imgp->args->argc + imgp->args->envc + 2);
250
251 if (args->trace)
252 AUXARGS_ENTRY_32(pos, AT_DEBUG, 1);
253 if (args->execfd != -1)
254 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
255 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
256 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
257 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
258 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
259 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
260 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
261 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
262 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
263 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
264 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
265 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
266 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
267
268 free(imgp->auxargs, M_TEMP);
269 imgp->auxargs = NULL;
270
271 base--;
272 suword32(base, (uint32_t)imgp->args->argc);
273 *stack_base = (register_t *)base;
274 return 0;
275 }
276
277 extern int _ucodesel, _ucode32sel, _udatasel;
278 extern unsigned long linux_sznonrtsigcode;
279
280 static void
281 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
282 {
283 struct thread *td = curthread;
284 struct proc *p = td->td_proc;
285 struct sigacts *psp;
286 struct trapframe *regs;
287 struct l_rt_sigframe *fp, frame;
288 int oonstack;
289
290 PROC_LOCK_ASSERT(p, MA_OWNED);
291 psp = p->p_sigacts;
292 mtx_assert(&psp->ps_mtx, MA_OWNED);
293 regs = td->td_frame;
294 oonstack = sigonstack(regs->tf_rsp);
295
296 #ifdef DEBUG
297 if (ldebug(rt_sendsig))
298 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
299 catcher, sig, (void*)mask, code);
300 #endif
301 /*
302 * Allocate space for the signal handler context.
303 */
304 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
305 SIGISMEMBER(psp->ps_sigonstack, sig)) {
306 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
307 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
308 } else
309 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
310 mtx_unlock(&psp->ps_mtx);
311
312 /*
313 * Build the argument list for the signal handler.
314 */
315 if (p->p_sysent->sv_sigtbl)
316 if (sig <= p->p_sysent->sv_sigsize)
317 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
318
319 bzero(&frame, sizeof(frame));
320
321 frame.sf_handler = PTROUT(catcher);
322 frame.sf_sig = sig;
323 frame.sf_siginfo = PTROUT(&fp->sf_si);
324 frame.sf_ucontext = PTROUT(&fp->sf_sc);
325
326 /* Fill in POSIX parts */
327 frame.sf_si.lsi_signo = sig;
328 frame.sf_si.lsi_code = code;
329 frame.sf_si.lsi_addr = PTROUT(regs->tf_err);
330
331 /*
332 * Build the signal context to be used by sigreturn.
333 */
334 frame.sf_sc.uc_flags = 0; /* XXX ??? */
335 frame.sf_sc.uc_link = 0; /* XXX ??? */
336
337 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
338 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
339 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
340 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
341 PROC_UNLOCK(p);
342
343 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
344
345 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
346 frame.sf_sc.uc_mcontext.sc_gs = rgs();
347 frame.sf_sc.uc_mcontext.sc_fs = rfs();
348 __asm __volatile("movl %%es,%0" :
349 "=rm" (frame.sf_sc.uc_mcontext.sc_es));
350 __asm __volatile("movl %%ds,%0" :
351 "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
352 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
353 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
354 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
355 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
356 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
357 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
358 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
359 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
360 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
361 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
362 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
363 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
364 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
365 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
366
367 #ifdef DEBUG
368 if (ldebug(rt_sendsig))
369 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
370 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
371 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
372 #endif
373
374 if (copyout(&frame, fp, sizeof(frame)) != 0) {
375 /*
376 * Process has trashed its stack; give it an illegal
377 * instruction to halt it in its tracks.
378 */
379 #ifdef DEBUG
380 if (ldebug(rt_sendsig))
381 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
382 fp, oonstack);
383 #endif
384 PROC_LOCK(p);
385 sigexit(td, SIGILL);
386 }
387
388 /*
389 * Build context to run handler in.
390 */
391 regs->tf_rsp = PTROUT(fp);
392 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
393 linux_sznonrtsigcode;
394 regs->tf_rflags &= ~PSL_T;
395 regs->tf_cs = _ucode32sel;
396 regs->tf_ss = _udatasel;
397 load_ds(_udatasel);
398 td->td_pcb->pcb_ds = _udatasel;
399 load_es(_udatasel);
400 td->td_pcb->pcb_es = _udatasel;
401 PROC_LOCK(p);
402 mtx_lock(&psp->ps_mtx);
403 }
404
405
406 /*
407 * Send an interrupt to process.
408 *
409 * Stack is set up to allow sigcode stored
410 * in u. to call routine, followed by kcall
411 * to sigreturn routine below. After sigreturn
412 * resets the signal mask, the stack, and the
413 * frame pointer, it returns to the user
414 * specified pc, psl.
415 */
416 static void
417 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
418 {
419 struct thread *td = curthread;
420 struct proc *p = td->td_proc;
421 struct sigacts *psp;
422 struct trapframe *regs;
423 struct l_sigframe *fp, frame;
424 l_sigset_t lmask;
425 int oonstack, i;
426
427 PROC_LOCK_ASSERT(p, MA_OWNED);
428 psp = p->p_sigacts;
429 mtx_assert(&psp->ps_mtx, MA_OWNED);
430 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
431 /* Signal handler installed with SA_SIGINFO. */
432 linux_rt_sendsig(catcher, sig, mask, code);
433 return;
434 }
435
436 regs = td->td_frame;
437 oonstack = sigonstack(regs->tf_rsp);
438
439 #ifdef DEBUG
440 if (ldebug(sendsig))
441 printf(ARGS(sendsig, "%p, %d, %p, %lu"),
442 catcher, sig, (void*)mask, code);
443 #endif
444
445 /*
446 * Allocate space for the signal handler context.
447 */
448 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
449 SIGISMEMBER(psp->ps_sigonstack, sig)) {
450 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
451 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
452 } else
453 fp = (struct l_sigframe *)regs->tf_rsp - 1;
454 mtx_unlock(&psp->ps_mtx);
455 PROC_UNLOCK(p);
456
457 /*
458 * Build the argument list for the signal handler.
459 */
460 if (p->p_sysent->sv_sigtbl)
461 if (sig <= p->p_sysent->sv_sigsize)
462 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
463
464 bzero(&frame, sizeof(frame));
465
466 frame.sf_handler = PTROUT(catcher);
467 frame.sf_sig = sig;
468
469 bsd_to_linux_sigset(mask, &lmask);
470
471 /*
472 * Build the signal context to be used by sigreturn.
473 */
474 frame.sf_sc.sc_mask = lmask.__bits[0];
475 frame.sf_sc.sc_gs = rgs();
476 frame.sf_sc.sc_fs = rfs();
477 __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
478 __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
479 frame.sf_sc.sc_edi = regs->tf_rdi;
480 frame.sf_sc.sc_esi = regs->tf_rsi;
481 frame.sf_sc.sc_ebp = regs->tf_rbp;
482 frame.sf_sc.sc_ebx = regs->tf_rbx;
483 frame.sf_sc.sc_edx = regs->tf_rdx;
484 frame.sf_sc.sc_ecx = regs->tf_rcx;
485 frame.sf_sc.sc_eax = regs->tf_rax;
486 frame.sf_sc.sc_eip = regs->tf_rip;
487 frame.sf_sc.sc_cs = regs->tf_cs;
488 frame.sf_sc.sc_eflags = regs->tf_rflags;
489 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
490 frame.sf_sc.sc_ss = regs->tf_ss;
491 frame.sf_sc.sc_err = regs->tf_err;
492 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
493
494 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
495 frame.sf_extramask[i] = lmask.__bits[i+1];
496
497 if (copyout(&frame, fp, sizeof(frame)) != 0) {
498 /*
499 * Process has trashed its stack; give it an illegal
500 * instruction to halt it in its tracks.
501 */
502 PROC_LOCK(p);
503 sigexit(td, SIGILL);
504 }
505
506 /*
507 * Build context to run handler in.
508 */
509 regs->tf_rsp = PTROUT(fp);
510 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
511 regs->tf_rflags &= ~PSL_T;
512 regs->tf_cs = _ucode32sel;
513 regs->tf_ss = _udatasel;
514 load_ds(_udatasel);
515 td->td_pcb->pcb_ds = _udatasel;
516 load_es(_udatasel);
517 td->td_pcb->pcb_es = _udatasel;
518 PROC_LOCK(p);
519 mtx_lock(&psp->ps_mtx);
520 }
521
522 /*
523 * System call to cleanup state after a signal
524 * has been taken. Reset signal mask and
525 * stack state from context left by sendsig (above).
526 * Return to previous pc and psl as specified by
527 * context left by sendsig. Check carefully to
528 * make sure that the user has not modified the
529 * psl to gain improper privileges or to cause
530 * a machine fault.
531 */
532 int
533 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
534 {
535 struct proc *p = td->td_proc;
536 struct l_sigframe frame;
537 struct trapframe *regs;
538 l_sigset_t lmask;
539 int eflags, i;
540
541 regs = td->td_frame;
542
543 #ifdef DEBUG
544 if (ldebug(sigreturn))
545 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
546 #endif
547 /*
548 * The trampoline code hands us the sigframe.
549 * It is unsafe to keep track of it ourselves, in the event that a
550 * program jumps out of a signal handler.
551 */
552 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
553 return (EFAULT);
554
555 /*
556 * Check for security violations.
557 */
558 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
559 eflags = frame.sf_sc.sc_eflags;
560 /*
561 * XXX do allow users to change the privileged flag PSL_RF. The
562 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
563 * sometimes set it there too. tf_eflags is kept in the signal
564 * context during signal handling and there is no other place
565 * to remember it, so the PSL_RF bit may be corrupted by the
566 * signal handler without us knowing. Corruption of the PSL_RF
567 * bit at worst causes one more or one less debugger trap, so
568 * allowing it is fairly harmless.
569 */
570 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
571 return(EINVAL);
572
573 /*
574 * Don't allow users to load a valid privileged %cs. Let the
575 * hardware check for invalid selectors, excess privilege in
576 * other selectors, invalid %eip's and invalid %esp's.
577 */
578 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
579 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
580 trapsignal(td, SIGBUS, T_PROTFLT);
581 return(EINVAL);
582 }
583
584 lmask.__bits[0] = frame.sf_sc.sc_mask;
585 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
586 lmask.__bits[i+1] = frame.sf_extramask[i];
587 PROC_LOCK(p);
588 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
589 SIG_CANTMASK(td->td_sigmask);
590 signotify(td);
591 PROC_UNLOCK(p);
592
593 /*
594 * Restore signal context.
595 */
596 /* Selectors were restored by the trampoline. */
597 regs->tf_rdi = frame.sf_sc.sc_edi;
598 regs->tf_rsi = frame.sf_sc.sc_esi;
599 regs->tf_rbp = frame.sf_sc.sc_ebp;
600 regs->tf_rbx = frame.sf_sc.sc_ebx;
601 regs->tf_rdx = frame.sf_sc.sc_edx;
602 regs->tf_rcx = frame.sf_sc.sc_ecx;
603 regs->tf_rax = frame.sf_sc.sc_eax;
604 regs->tf_rip = frame.sf_sc.sc_eip;
605 regs->tf_cs = frame.sf_sc.sc_cs;
606 regs->tf_rflags = eflags;
607 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
608 regs->tf_ss = frame.sf_sc.sc_ss;
609
610 return (EJUSTRETURN);
611 }
612
613 /*
614 * System call to cleanup state after a signal
615 * has been taken. Reset signal mask and
616 * stack state from context left by rt_sendsig (above).
617 * Return to previous pc and psl as specified by
618 * context left by sendsig. Check carefully to
619 * make sure that the user has not modified the
620 * psl to gain improper privileges or to cause
621 * a machine fault.
622 */
623 int
624 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
625 {
626 struct proc *p = td->td_proc;
627 struct l_ucontext uc;
628 struct l_sigcontext *context;
629 l_stack_t *lss;
630 stack_t ss;
631 struct trapframe *regs;
632 int eflags;
633
634 regs = td->td_frame;
635
636 #ifdef DEBUG
637 if (ldebug(rt_sigreturn))
638 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
639 #endif
640 /*
641 * The trampoline code hands us the ucontext.
642 * It is unsafe to keep track of it ourselves, in the event that a
643 * program jumps out of a signal handler.
644 */
645 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
646 return (EFAULT);
647
648 context = &uc.uc_mcontext;
649
650 /*
651 * Check for security violations.
652 */
653 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
654 eflags = context->sc_eflags;
655 /*
656 * XXX do allow users to change the privileged flag PSL_RF. The
657 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
658 * sometimes set it there too. tf_eflags is kept in the signal
659 * context during signal handling and there is no other place
660 * to remember it, so the PSL_RF bit may be corrupted by the
661 * signal handler without us knowing. Corruption of the PSL_RF
662 * bit at worst causes one more or one less debugger trap, so
663 * allowing it is fairly harmless.
664 */
665 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
666 return(EINVAL);
667
668 /*
669 * Don't allow users to load a valid privileged %cs. Let the
670 * hardware check for invalid selectors, excess privilege in
671 * other selectors, invalid %eip's and invalid %esp's.
672 */
673 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
674 if (!CS_SECURE(context->sc_cs)) {
675 trapsignal(td, SIGBUS, T_PROTFLT);
676 return(EINVAL);
677 }
678
679 PROC_LOCK(p);
680 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
681 SIG_CANTMASK(td->td_sigmask);
682 signotify(td);
683 PROC_UNLOCK(p);
684
685 /*
686 * Restore signal context
687 */
688 /* Selectors were restored by the trampoline. */
689 regs->tf_rdi = context->sc_edi;
690 regs->tf_rsi = context->sc_esi;
691 regs->tf_rbp = context->sc_ebp;
692 regs->tf_rbx = context->sc_ebx;
693 regs->tf_rdx = context->sc_edx;
694 regs->tf_rcx = context->sc_ecx;
695 regs->tf_rax = context->sc_eax;
696 regs->tf_rip = context->sc_eip;
697 regs->tf_cs = context->sc_cs;
698 regs->tf_rflags = eflags;
699 regs->tf_rsp = context->sc_esp_at_signal;
700 regs->tf_ss = context->sc_ss;
701
702 /*
703 * call sigaltstack & ignore results..
704 */
705 lss = &uc.uc_stack;
706 ss.ss_sp = PTRIN(lss->ss_sp);
707 ss.ss_size = lss->ss_size;
708 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
709
710 #ifdef DEBUG
711 if (ldebug(rt_sigreturn))
712 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
713 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
714 #endif
715 (void)kern_sigaltstack(td, &ss, NULL);
716
717 return (EJUSTRETURN);
718 }
719
720 /*
721 * MPSAFE
722 */
723 static void
724 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
725 {
726 args[0] = tf->tf_rbx;
727 args[1] = tf->tf_rcx;
728 args[2] = tf->tf_rdx;
729 args[3] = tf->tf_rsi;
730 args[4] = tf->tf_rdi;
731 args[5] = tf->tf_rbp; /* Unconfirmed */
732 *params = NULL; /* no copyin */
733 }
734
735 /*
736 * If a linux binary is exec'ing something, try this image activator
737 * first. We override standard shell script execution in order to
738 * be able to modify the interpreter path. We only do this if a linux
739 * binary is doing the exec, so we do not create an EXEC module for it.
740 */
741 static int exec_linux_imgact_try(struct image_params *iparams);
742
743 static int
744 exec_linux_imgact_try(struct image_params *imgp)
745 {
746 const char *head = (const char *)imgp->image_header;
747 char *rpath;
748 int error = -1, len;
749
750 /*
751 * The interpreter for shell scripts run from a linux binary needs
752 * to be located in /compat/linux if possible in order to recursively
753 * maintain linux path emulation.
754 */
755 if (((const short *)head)[0] == SHELLMAGIC) {
756 /*
757 * Run our normal shell image activator. If it succeeds attempt
758 * to use the alternate path for the interpreter. If an alternate
759 * path is found, use our stringspace to store it.
760 */
761 if ((error = exec_shell_imgact(imgp)) == 0) {
762 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
763 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
764 if (rpath != NULL) {
765 len = strlen(rpath) + 1;
766
767 if (len <= MAXSHELLCMDLEN) {
768 memcpy(imgp->interpreter_name, rpath, len);
769 }
770 free(rpath, M_TEMP);
771 }
772 }
773 }
774 return(error);
775 }
776
777 /*
778 * Clear registers on exec
779 * XXX copied from ia32_signal.c.
780 */
781 static void
782 exec_linux_setregs(td, entry, stack, ps_strings)
783 struct thread *td;
784 u_long entry;
785 u_long stack;
786 u_long ps_strings;
787 {
788 struct trapframe *regs = td->td_frame;
789 struct pcb *pcb = td->td_pcb;
790
791 wrmsr(MSR_FSBASE, 0);
792 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
793 pcb->pcb_fsbase = 0;
794 pcb->pcb_gsbase = 0;
795 load_ds(_udatasel);
796 load_es(_udatasel);
797 load_fs(_udatasel);
798 load_gs(0);
799 pcb->pcb_ds = _udatasel;
800 pcb->pcb_es = _udatasel;
801 pcb->pcb_fs = _udatasel;
802 pcb->pcb_gs = 0;
803
804 bzero((char *)regs, sizeof(struct trapframe));
805 regs->tf_rip = entry;
806 regs->tf_rsp = stack;
807 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
808 regs->tf_ss = _udatasel;
809 regs->tf_cs = _ucode32sel;
810 regs->tf_rbx = ps_strings;
811 load_cr0(rcr0() | CR0_MP | CR0_TS);
812 fpstate_drop(td);
813
814 /* Return via doreti so that we can change to a different %cs */
815 pcb->pcb_flags |= PCB_FULLCTX;
816 td->td_retval[1] = 0;
817 }
818
819 /*
820 * XXX copied from ia32_sysvec.c.
821 */
822 static register_t *
823 linux_copyout_strings(struct image_params *imgp)
824 {
825 int argc, envc;
826 u_int32_t *vectp;
827 char *stringp, *destp;
828 u_int32_t *stack_base;
829 struct linux32_ps_strings *arginfo;
830 int sigcodesz;
831
832 /*
833 * Calculate string base and vector table pointers.
834 * Also deal with signal trampoline code for this exec type.
835 */
836 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
837 sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode);
838 destp = (caddr_t)arginfo - sigcodesz - SPARE_USRSPACE -
839 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
840
841 /*
842 * install sigcode
843 */
844 if (sigcodesz)
845 copyout(imgp->proc->p_sysent->sv_sigcode,
846 ((caddr_t)arginfo - sigcodesz), szsigcode);
847
848 /*
849 * If we have a valid auxargs ptr, prepare some room
850 * on the stack.
851 */
852 if (imgp->auxargs) {
853 /*
854 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
855 * lower compatibility.
856 */
857 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
858 : (AT_COUNT * 2);
859 /*
860 * The '+ 2' is for the null pointers at the end of each of
861 * the arg and env vector sets,and imgp->auxarg_size is room
862 * for argument of Runtime loader.
863 */
864 vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 +
865 imgp->auxarg_size) * sizeof(u_int32_t));
866
867 } else
868 /*
869 * The '+ 2' is for the null pointers at the end of each of
870 * the arg and env vector sets
871 */
872 vectp = (u_int32_t *)
873 (destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t));
874
875 /*
876 * vectp also becomes our initial stack base
877 */
878 stack_base = vectp;
879
880 stringp = imgp->args->begin_argv;
881 argc = imgp->args->argc;
882 envc = imgp->args->envc;
883 /*
884 * Copy out strings - arguments and environment.
885 */
886 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
887
888 /*
889 * Fill in "ps_strings" struct for ps, w, etc.
890 */
891 suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
892 suword32(&arginfo->ps_nargvstr, argc);
893
894 /*
895 * Fill in argument portion of vector table.
896 */
897 for (; argc > 0; --argc) {
898 suword32(vectp++, (u_int32_t)(intptr_t)destp);
899 while (*stringp++ != 0)
900 destp++;
901 destp++;
902 }
903
904 /* a null vector table pointer separates the argp's from the envp's */
905 suword32(vectp++, 0);
906
907 suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
908 suword32(&arginfo->ps_nenvstr, envc);
909
910 /*
911 * Fill in environment portion of vector table.
912 */
913 for (; envc > 0; --envc) {
914 suword32(vectp++, (u_int32_t)(intptr_t)destp);
915 while (*stringp++ != 0)
916 destp++;
917 destp++;
918 }
919
920 /* end of vector table is a null pointer */
921 suword32(vectp, 0);
922
923 return ((register_t *)stack_base);
924 }
925
926 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
927 "32-bit Linux emulation");
928
929 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
930 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
931 &linux32_maxdsiz, 0, "");
932 static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
933 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
934 &linux32_maxssiz, 0, "");
935 static u_long linux32_maxvmem = LINUX32_MAXVMEM;
936 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
937 &linux32_maxvmem, 0, "");
938
939 /*
940 * XXX copied from ia32_sysvec.c.
941 */
942 static void
943 linux32_fixlimits(struct image_params *imgp)
944 {
945 struct proc *p = imgp->proc;
946 struct plimit *oldlim, *newlim;
947
948 if (linux32_maxdsiz == 0 && linux32_maxssiz == 0 &&
949 linux32_maxvmem == 0)
950 return;
951 newlim = lim_alloc();
952 PROC_LOCK(p);
953 oldlim = p->p_limit;
954 lim_copy(newlim, oldlim);
955 if (linux32_maxdsiz != 0) {
956 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_cur > linux32_maxdsiz)
957 newlim->pl_rlimit[RLIMIT_DATA].rlim_cur = linux32_maxdsiz;
958 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_max > linux32_maxdsiz)
959 newlim->pl_rlimit[RLIMIT_DATA].rlim_max = linux32_maxdsiz;
960 }
961 if (linux32_maxssiz != 0) {
962 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_cur > linux32_maxssiz)
963 newlim->pl_rlimit[RLIMIT_STACK].rlim_cur = linux32_maxssiz;
964 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_max > linux32_maxssiz)
965 newlim->pl_rlimit[RLIMIT_STACK].rlim_max = linux32_maxssiz;
966 }
967 if (linux32_maxvmem != 0) {
968 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur > linux32_maxvmem)
969 newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur = linux32_maxvmem;
970 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_max > linux32_maxvmem)
971 newlim->pl_rlimit[RLIMIT_VMEM].rlim_max = linux32_maxvmem;
972 }
973 p->p_limit = newlim;
974 PROC_UNLOCK(p);
975 lim_free(oldlim);
976 }
977
978 struct sysentvec elf_linux_sysvec = {
979 LINUX_SYS_MAXSYSCALL,
980 linux_sysent,
981 0xff,
982 LINUX_SIGTBLSZ,
983 bsd_to_linux_signal,
984 ELAST + 1,
985 bsd_to_linux_errno,
986 translate_traps,
987 elf_linux_fixup,
988 linux_sendsig,
989 linux_sigcode,
990 &linux_szsigcode,
991 linux_prepsyscall,
992 "Linux ELF32",
993 elf32_coredump,
994 exec_linux_imgact_try,
995 LINUX_MINSIGSTKSZ,
996 PAGE_SIZE,
997 VM_MIN_ADDRESS,
998 LINUX32_USRSTACK,
999 LINUX32_USRSTACK,
1000 LINUX32_PS_STRINGS,
1001 VM_PROT_ALL,
1002 linux_copyout_strings,
1003 exec_linux_setregs,
1004 linux32_fixlimits
1005 };
1006
1007 static Elf32_Brandinfo linux_brand = {
1008 ELFOSABI_LINUX,
1009 EM_386,
1010 "Linux",
1011 "/compat/linux",
1012 "/lib/ld-linux.so.1",
1013 &elf_linux_sysvec,
1014 NULL,
1015 };
1016
1017 static Elf32_Brandinfo linux_glibc2brand = {
1018 ELFOSABI_LINUX,
1019 EM_386,
1020 "Linux",
1021 "/compat/linux",
1022 "/lib/ld-linux.so.2",
1023 &elf_linux_sysvec,
1024 NULL,
1025 };
1026
1027 Elf32_Brandinfo *linux_brandlist[] = {
1028 &linux_brand,
1029 &linux_glibc2brand,
1030 NULL
1031 };
1032
1033 static int
1034 linux_elf_modevent(module_t mod, int type, void *data)
1035 {
1036 Elf32_Brandinfo **brandinfo;
1037 int error;
1038 struct linux_ioctl_handler **lihp;
1039
1040 error = 0;
1041
1042 switch(type) {
1043 case MOD_LOAD:
1044 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1045 ++brandinfo)
1046 if (elf32_insert_brand_entry(*brandinfo) < 0)
1047 error = EINVAL;
1048 if (error == 0) {
1049 SET_FOREACH(lihp, linux_ioctl_handler_set)
1050 linux_ioctl_register_handler(*lihp);
1051 if (bootverbose)
1052 printf("Linux ELF exec handler installed\n");
1053 } else
1054 printf("cannot insert Linux ELF brand handler\n");
1055 break;
1056 case MOD_UNLOAD:
1057 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1058 ++brandinfo)
1059 if (elf32_brand_inuse(*brandinfo))
1060 error = EBUSY;
1061 if (error == 0) {
1062 for (brandinfo = &linux_brandlist[0];
1063 *brandinfo != NULL; ++brandinfo)
1064 if (elf32_remove_brand_entry(*brandinfo) < 0)
1065 error = EINVAL;
1066 }
1067 if (error == 0) {
1068 SET_FOREACH(lihp, linux_ioctl_handler_set)
1069 linux_ioctl_unregister_handler(*lihp);
1070 if (bootverbose)
1071 printf("Linux ELF exec handler removed\n");
1072 } else
1073 printf("Could not deinstall ELF interpreter entry\n");
1074 break;
1075 default:
1076 break;
1077 }
1078 return error;
1079 }
1080
1081 static moduledata_t linux_elf_mod = {
1082 "linuxelf",
1083 linux_elf_modevent,
1084 0
1085 };
1086
1087 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Cache object: 3309580de242789ccb40b916f4e73ed4
|