1 /*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 Søren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD: releng/5.3/sys/amd64/linux32/linux32_sysvec.c 133846 2004-08-16 11:15:46Z obrien $");
35
36 /* XXX we use functions that might not exist. */
37 #include "opt_compat.h"
38
39 #ifndef COMPAT_43
40 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
41 #endif
42 #ifndef COMPAT_IA32
43 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
44 #endif
45
46 #define __ELF_WORD_SIZE 32
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/exec.h>
51 #include <sys/imgact.h>
52 #include <sys/imgact_elf.h>
53 #include <sys/kernel.h>
54 #include <sys/lock.h>
55 #include <sys/malloc.h>
56 #include <sys/module.h>
57 #include <sys/mutex.h>
58 #include <sys/proc.h>
59 #include <sys/signalvar.h>
60 #include <sys/sysctl.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysent.h>
63 #include <sys/sysproto.h>
64 #include <sys/user.h>
65 #include <sys/vnode.h>
66
67 #include <vm/vm.h>
68 #include <vm/pmap.h>
69 #include <vm/vm_extern.h>
70 #include <vm/vm_map.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_param.h>
74
75 #include <machine/cpu.h>
76 #include <machine/md_var.h>
77 #include <machine/specialreg.h>
78
79 #include <amd64/linux32/linux.h>
80 #include <amd64/linux32/linux32_proto.h>
81 #include <compat/linux/linux_mib.h>
82 #include <compat/linux/linux_signal.h>
83 #include <compat/linux/linux_util.h>
84
85 MODULE_VERSION(linux, 1);
86 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
87 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
88 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
89
90 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
91
92 #define AUXARGS_ENTRY_32(pos, id, val) \
93 do { \
94 suword32(pos++, id); \
95 suword32(pos++, val); \
96 } while (0)
97
98 #if BYTE_ORDER == LITTLE_ENDIAN
99 #define SHELLMAGIC 0x2123 /* #! */
100 #else
101 #define SHELLMAGIC 0x2321
102 #endif
103
104 /*
105 * Allow the sendsig functions to use the ldebug() facility
106 * even though they are not syscalls themselves. Map them
107 * to syscall 0. This is slightly less bogus than using
108 * ldebug(sigreturn).
109 */
110 #define LINUX_SYS_linux_rt_sendsig 0
111 #define LINUX_SYS_linux_sendsig 0
112
113 extern char linux_sigcode[];
114 extern int linux_szsigcode;
115
116 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
117
118 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
119
120 static int elf_linux_fixup(register_t **stack_base,
121 struct image_params *iparams);
122 static register_t *linux_copyout_strings(struct image_params *imgp);
123 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
124 caddr_t *params);
125 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
126 u_long code);
127 static void exec_linux_setregs(struct thread *td, u_long entry,
128 u_long stack, u_long ps_strings);
129 static void linux32_fixlimits(struct image_params *imgp);
130
131 /*
132 * Linux syscalls return negative errno's, we do positive and map them
133 */
134 static int bsd_to_linux_errno[ELAST + 1] = {
135 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
136 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
137 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
138 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
139 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
140 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
141 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
142 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
143 -6, -6, -43, -42, -75, -6, -84
144 };
145
146 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
147 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
148 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
149 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
150 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
151 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
152 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
153 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
154 0, LINUX_SIGUSR1, LINUX_SIGUSR2
155 };
156
157 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
158 SIGHUP, SIGINT, SIGQUIT, SIGILL,
159 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
160 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
161 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
162 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
163 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
164 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
165 SIGIO, SIGURG, SIGSYS
166 };
167
168 #define LINUX_T_UNKNOWN 255
169 static int _bsd_to_linux_trapcode[] = {
170 LINUX_T_UNKNOWN, /* 0 */
171 6, /* 1 T_PRIVINFLT */
172 LINUX_T_UNKNOWN, /* 2 */
173 3, /* 3 T_BPTFLT */
174 LINUX_T_UNKNOWN, /* 4 */
175 LINUX_T_UNKNOWN, /* 5 */
176 16, /* 6 T_ARITHTRAP */
177 254, /* 7 T_ASTFLT */
178 LINUX_T_UNKNOWN, /* 8 */
179 13, /* 9 T_PROTFLT */
180 1, /* 10 T_TRCTRAP */
181 LINUX_T_UNKNOWN, /* 11 */
182 14, /* 12 T_PAGEFLT */
183 LINUX_T_UNKNOWN, /* 13 */
184 17, /* 14 T_ALIGNFLT */
185 LINUX_T_UNKNOWN, /* 15 */
186 LINUX_T_UNKNOWN, /* 16 */
187 LINUX_T_UNKNOWN, /* 17 */
188 0, /* 18 T_DIVIDE */
189 2, /* 19 T_NMI */
190 4, /* 20 T_OFLOW */
191 5, /* 21 T_BOUND */
192 7, /* 22 T_DNA */
193 8, /* 23 T_DOUBLEFLT */
194 9, /* 24 T_FPOPFLT */
195 10, /* 25 T_TSSFLT */
196 11, /* 26 T_SEGNPFLT */
197 12, /* 27 T_STKFLT */
198 18, /* 28 T_MCHK */
199 19, /* 29 T_XMMFLT */
200 15 /* 30 T_RESERVED */
201 };
202 #define bsd_to_linux_trapcode(code) \
203 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
204 _bsd_to_linux_trapcode[(code)]: \
205 LINUX_T_UNKNOWN)
206
207 struct linux32_ps_strings {
208 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
209 int ps_nargvstr; /* the number of argument strings */
210 u_int32_t ps_envstr; /* first of 0 or more environment strings */
211 int ps_nenvstr; /* the number of environment strings */
212 };
213
214 /*
215 * If FreeBSD & Linux have a difference of opinion about what a trap
216 * means, deal with it here.
217 *
218 * MPSAFE
219 */
220 static int
221 translate_traps(int signal, int trap_code)
222 {
223 if (signal != SIGBUS)
224 return signal;
225 switch (trap_code) {
226 case T_PROTFLT:
227 case T_TSSFLT:
228 case T_DOUBLEFLT:
229 case T_PAGEFLT:
230 return SIGSEGV;
231 default:
232 return signal;
233 }
234 }
235
236 static int
237 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
238 {
239 Elf32_Auxargs *args;
240 Elf32_Addr *base;
241 Elf32_Addr *pos;
242
243 KASSERT(curthread->td_proc == imgp->proc &&
244 (curthread->td_proc->p_flag & P_SA) == 0,
245 ("unsafe elf_linux_fixup(), should be curproc"));
246 base = (Elf32_Addr *)*stack_base;
247 args = (Elf32_Auxargs *)imgp->auxargs;
248 pos = base + (imgp->argc + imgp->envc + 2);
249
250 if (args->trace)
251 AUXARGS_ENTRY_32(pos, AT_DEBUG, 1);
252 if (args->execfd != -1)
253 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
254 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
255 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
256 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
257 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
258 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
259 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
260 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
261 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
262 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
263 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
264 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
265 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
266
267 free(imgp->auxargs, M_TEMP);
268 imgp->auxargs = NULL;
269
270 base--;
271 suword32(base, (uint32_t)imgp->argc);
272 *stack_base = (register_t *)base;
273 return 0;
274 }
275
276 extern int _ucodesel, _ucode32sel, _udatasel;
277 extern unsigned long linux_sznonrtsigcode;
278
279 static void
280 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
281 {
282 struct thread *td = curthread;
283 struct proc *p = td->td_proc;
284 struct sigacts *psp;
285 struct trapframe *regs;
286 struct l_rt_sigframe *fp, frame;
287 int oonstack;
288
289 PROC_LOCK_ASSERT(p, MA_OWNED);
290 psp = p->p_sigacts;
291 mtx_assert(&psp->ps_mtx, MA_OWNED);
292 regs = td->td_frame;
293 oonstack = sigonstack(regs->tf_rsp);
294
295 #ifdef DEBUG
296 if (ldebug(rt_sendsig))
297 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
298 catcher, sig, (void*)mask, code);
299 #endif
300 /*
301 * Allocate space for the signal handler context.
302 */
303 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
304 SIGISMEMBER(psp->ps_sigonstack, sig)) {
305 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
306 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
307 } else
308 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
309 mtx_unlock(&psp->ps_mtx);
310
311 /*
312 * Build the argument list for the signal handler.
313 */
314 if (p->p_sysent->sv_sigtbl)
315 if (sig <= p->p_sysent->sv_sigsize)
316 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
317
318 bzero(&frame, sizeof(frame));
319
320 frame.sf_handler = PTROUT(catcher);
321 frame.sf_sig = sig;
322 frame.sf_siginfo = PTROUT(&fp->sf_si);
323 frame.sf_ucontext = PTROUT(&fp->sf_sc);
324
325 /* Fill in POSIX parts */
326 frame.sf_si.lsi_signo = sig;
327 frame.sf_si.lsi_code = code;
328 frame.sf_si.lsi_addr = PTROUT(regs->tf_err);
329
330 /*
331 * Build the signal context to be used by sigreturn.
332 */
333 frame.sf_sc.uc_flags = 0; /* XXX ??? */
334 frame.sf_sc.uc_link = 0; /* XXX ??? */
335
336 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
337 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
338 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
339 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
340 PROC_UNLOCK(p);
341
342 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
343
344 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
345 frame.sf_sc.uc_mcontext.sc_gs = rgs();
346 frame.sf_sc.uc_mcontext.sc_fs = rfs();
347 __asm __volatile("movl %%es,%0" :
348 "=rm" (frame.sf_sc.uc_mcontext.sc_es));
349 __asm __volatile("movl %%ds,%0" :
350 "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
351 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
352 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
353 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
354 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
355 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
356 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
357 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
358 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
359 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
360 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
361 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
362 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
363 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
364 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
365
366 #ifdef DEBUG
367 if (ldebug(rt_sendsig))
368 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
369 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
370 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
371 #endif
372
373 if (copyout(&frame, fp, sizeof(frame)) != 0) {
374 /*
375 * Process has trashed its stack; give it an illegal
376 * instruction to halt it in its tracks.
377 */
378 #ifdef DEBUG
379 if (ldebug(rt_sendsig))
380 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
381 fp, oonstack);
382 #endif
383 PROC_LOCK(p);
384 sigexit(td, SIGILL);
385 }
386
387 /*
388 * Build context to run handler in.
389 */
390 regs->tf_rsp = PTROUT(fp);
391 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
392 linux_sznonrtsigcode;
393 regs->tf_rflags &= ~PSL_T;
394 regs->tf_cs = _ucode32sel;
395 regs->tf_ss = _udatasel;
396 load_ds(_udatasel);
397 td->td_pcb->pcb_ds = _udatasel;
398 load_es(_udatasel);
399 td->td_pcb->pcb_es = _udatasel;
400 PROC_LOCK(p);
401 mtx_lock(&psp->ps_mtx);
402 }
403
404
405 /*
406 * Send an interrupt to process.
407 *
408 * Stack is set up to allow sigcode stored
409 * in u. to call routine, followed by kcall
410 * to sigreturn routine below. After sigreturn
411 * resets the signal mask, the stack, and the
412 * frame pointer, it returns to the user
413 * specified pc, psl.
414 */
415 static void
416 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
417 {
418 struct thread *td = curthread;
419 struct proc *p = td->td_proc;
420 struct sigacts *psp;
421 struct trapframe *regs;
422 struct l_sigframe *fp, frame;
423 l_sigset_t lmask;
424 int oonstack, i;
425
426 PROC_LOCK_ASSERT(p, MA_OWNED);
427 psp = p->p_sigacts;
428 mtx_assert(&psp->ps_mtx, MA_OWNED);
429 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
430 /* Signal handler installed with SA_SIGINFO. */
431 linux_rt_sendsig(catcher, sig, mask, code);
432 return;
433 }
434
435 regs = td->td_frame;
436 oonstack = sigonstack(regs->tf_rsp);
437
438 #ifdef DEBUG
439 if (ldebug(sendsig))
440 printf(ARGS(sendsig, "%p, %d, %p, %lu"),
441 catcher, sig, (void*)mask, code);
442 #endif
443
444 /*
445 * Allocate space for the signal handler context.
446 */
447 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
448 SIGISMEMBER(psp->ps_sigonstack, sig)) {
449 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
450 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
451 } else
452 fp = (struct l_sigframe *)regs->tf_rsp - 1;
453 mtx_unlock(&psp->ps_mtx);
454 PROC_UNLOCK(p);
455
456 /*
457 * Build the argument list for the signal handler.
458 */
459 if (p->p_sysent->sv_sigtbl)
460 if (sig <= p->p_sysent->sv_sigsize)
461 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
462
463 bzero(&frame, sizeof(frame));
464
465 frame.sf_handler = PTROUT(catcher);
466 frame.sf_sig = sig;
467
468 bsd_to_linux_sigset(mask, &lmask);
469
470 /*
471 * Build the signal context to be used by sigreturn.
472 */
473 frame.sf_sc.sc_mask = lmask.__bits[0];
474 frame.sf_sc.sc_gs = rgs();
475 frame.sf_sc.sc_fs = rfs();
476 __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
477 __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
478 frame.sf_sc.sc_edi = regs->tf_rdi;
479 frame.sf_sc.sc_esi = regs->tf_rsi;
480 frame.sf_sc.sc_ebp = regs->tf_rbp;
481 frame.sf_sc.sc_ebx = regs->tf_rbx;
482 frame.sf_sc.sc_edx = regs->tf_rdx;
483 frame.sf_sc.sc_ecx = regs->tf_rcx;
484 frame.sf_sc.sc_eax = regs->tf_rax;
485 frame.sf_sc.sc_eip = regs->tf_rip;
486 frame.sf_sc.sc_cs = regs->tf_cs;
487 frame.sf_sc.sc_eflags = regs->tf_rflags;
488 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
489 frame.sf_sc.sc_ss = regs->tf_ss;
490 frame.sf_sc.sc_err = regs->tf_err;
491 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
492
493 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
494 frame.sf_extramask[i] = lmask.__bits[i+1];
495
496 if (copyout(&frame, fp, sizeof(frame)) != 0) {
497 /*
498 * Process has trashed its stack; give it an illegal
499 * instruction to halt it in its tracks.
500 */
501 PROC_LOCK(p);
502 sigexit(td, SIGILL);
503 }
504
505 /*
506 * Build context to run handler in.
507 */
508 regs->tf_rsp = PTROUT(fp);
509 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
510 regs->tf_rflags &= ~PSL_T;
511 regs->tf_cs = _ucode32sel;
512 regs->tf_ss = _udatasel;
513 load_ds(_udatasel);
514 td->td_pcb->pcb_ds = _udatasel;
515 load_es(_udatasel);
516 td->td_pcb->pcb_es = _udatasel;
517 PROC_LOCK(p);
518 mtx_lock(&psp->ps_mtx);
519 }
520
521 /*
522 * System call to cleanup state after a signal
523 * has been taken. Reset signal mask and
524 * stack state from context left by sendsig (above).
525 * Return to previous pc and psl as specified by
526 * context left by sendsig. Check carefully to
527 * make sure that the user has not modified the
528 * psl to gain improper privileges or to cause
529 * a machine fault.
530 */
531 int
532 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
533 {
534 struct proc *p = td->td_proc;
535 struct l_sigframe frame;
536 struct trapframe *regs;
537 l_sigset_t lmask;
538 int eflags, i;
539
540 regs = td->td_frame;
541
542 #ifdef DEBUG
543 if (ldebug(sigreturn))
544 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
545 #endif
546 /*
547 * The trampoline code hands us the sigframe.
548 * It is unsafe to keep track of it ourselves, in the event that a
549 * program jumps out of a signal handler.
550 */
551 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
552 return (EFAULT);
553
554 /*
555 * Check for security violations.
556 */
557 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
558 eflags = frame.sf_sc.sc_eflags;
559 /*
560 * XXX do allow users to change the privileged flag PSL_RF. The
561 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
562 * sometimes set it there too. tf_eflags is kept in the signal
563 * context during signal handling and there is no other place
564 * to remember it, so the PSL_RF bit may be corrupted by the
565 * signal handler without us knowing. Corruption of the PSL_RF
566 * bit at worst causes one more or one less debugger trap, so
567 * allowing it is fairly harmless.
568 */
569 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
570 return(EINVAL);
571
572 /*
573 * Don't allow users to load a valid privileged %cs. Let the
574 * hardware check for invalid selectors, excess privilege in
575 * other selectors, invalid %eip's and invalid %esp's.
576 */
577 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
578 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
579 trapsignal(td, SIGBUS, T_PROTFLT);
580 return(EINVAL);
581 }
582
583 lmask.__bits[0] = frame.sf_sc.sc_mask;
584 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
585 lmask.__bits[i+1] = frame.sf_extramask[i];
586 PROC_LOCK(p);
587 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
588 SIG_CANTMASK(td->td_sigmask);
589 signotify(td);
590 PROC_UNLOCK(p);
591
592 /*
593 * Restore signal context.
594 */
595 /* Selectors were restored by the trampoline. */
596 regs->tf_rdi = frame.sf_sc.sc_edi;
597 regs->tf_rsi = frame.sf_sc.sc_esi;
598 regs->tf_rbp = frame.sf_sc.sc_ebp;
599 regs->tf_rbx = frame.sf_sc.sc_ebx;
600 regs->tf_rdx = frame.sf_sc.sc_edx;
601 regs->tf_rcx = frame.sf_sc.sc_ecx;
602 regs->tf_rax = frame.sf_sc.sc_eax;
603 regs->tf_rip = frame.sf_sc.sc_eip;
604 regs->tf_cs = frame.sf_sc.sc_cs;
605 regs->tf_rflags = eflags;
606 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
607 regs->tf_ss = frame.sf_sc.sc_ss;
608
609 return (EJUSTRETURN);
610 }
611
612 /*
613 * System call to cleanup state after a signal
614 * has been taken. Reset signal mask and
615 * stack state from context left by rt_sendsig (above).
616 * Return to previous pc and psl as specified by
617 * context left by sendsig. Check carefully to
618 * make sure that the user has not modified the
619 * psl to gain improper privileges or to cause
620 * a machine fault.
621 */
622 int
623 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
624 {
625 struct proc *p = td->td_proc;
626 struct l_ucontext uc;
627 struct l_sigcontext *context;
628 l_stack_t *lss;
629 stack_t ss;
630 struct trapframe *regs;
631 int eflags;
632
633 regs = td->td_frame;
634
635 #ifdef DEBUG
636 if (ldebug(rt_sigreturn))
637 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
638 #endif
639 /*
640 * The trampoline code hands us the ucontext.
641 * It is unsafe to keep track of it ourselves, in the event that a
642 * program jumps out of a signal handler.
643 */
644 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
645 return (EFAULT);
646
647 context = &uc.uc_mcontext;
648
649 /*
650 * Check for security violations.
651 */
652 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
653 eflags = context->sc_eflags;
654 /*
655 * XXX do allow users to change the privileged flag PSL_RF. The
656 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
657 * sometimes set it there too. tf_eflags is kept in the signal
658 * context during signal handling and there is no other place
659 * to remember it, so the PSL_RF bit may be corrupted by the
660 * signal handler without us knowing. Corruption of the PSL_RF
661 * bit at worst causes one more or one less debugger trap, so
662 * allowing it is fairly harmless.
663 */
664 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
665 return(EINVAL);
666
667 /*
668 * Don't allow users to load a valid privileged %cs. Let the
669 * hardware check for invalid selectors, excess privilege in
670 * other selectors, invalid %eip's and invalid %esp's.
671 */
672 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
673 if (!CS_SECURE(context->sc_cs)) {
674 trapsignal(td, SIGBUS, T_PROTFLT);
675 return(EINVAL);
676 }
677
678 PROC_LOCK(p);
679 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
680 SIG_CANTMASK(td->td_sigmask);
681 signotify(td);
682 PROC_UNLOCK(p);
683
684 /*
685 * Restore signal context
686 */
687 /* Selectors were restored by the trampoline. */
688 regs->tf_rdi = context->sc_edi;
689 regs->tf_rsi = context->sc_esi;
690 regs->tf_rbp = context->sc_ebp;
691 regs->tf_rbx = context->sc_ebx;
692 regs->tf_rdx = context->sc_edx;
693 regs->tf_rcx = context->sc_ecx;
694 regs->tf_rax = context->sc_eax;
695 regs->tf_rip = context->sc_eip;
696 regs->tf_cs = context->sc_cs;
697 regs->tf_rflags = eflags;
698 regs->tf_rsp = context->sc_esp_at_signal;
699 regs->tf_ss = context->sc_ss;
700
701 /*
702 * call sigaltstack & ignore results..
703 */
704 lss = &uc.uc_stack;
705 ss.ss_sp = PTRIN(lss->ss_sp);
706 ss.ss_size = lss->ss_size;
707 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
708
709 #ifdef DEBUG
710 if (ldebug(rt_sigreturn))
711 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
712 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
713 #endif
714 (void)kern_sigaltstack(td, &ss, NULL);
715
716 return (EJUSTRETURN);
717 }
718
719 /*
720 * MPSAFE
721 */
722 static void
723 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
724 {
725 args[0] = tf->tf_rbx;
726 args[1] = tf->tf_rcx;
727 args[2] = tf->tf_rdx;
728 args[3] = tf->tf_rsi;
729 args[4] = tf->tf_rdi;
730 args[5] = tf->tf_rbp; /* Unconfirmed */
731 *params = NULL; /* no copyin */
732 }
733
734 /*
735 * If a linux binary is exec'ing something, try this image activator
736 * first. We override standard shell script execution in order to
737 * be able to modify the interpreter path. We only do this if a linux
738 * binary is doing the exec, so we do not create an EXEC module for it.
739 */
740 static int exec_linux_imgact_try(struct image_params *iparams);
741
742 static int
743 exec_linux_imgact_try(struct image_params *imgp)
744 {
745 const char *head = (const char *)imgp->image_header;
746 int error = -1;
747
748 /*
749 * The interpreter for shell scripts run from a linux binary needs
750 * to be located in /compat/linux if possible in order to recursively
751 * maintain linux path emulation.
752 */
753 if (((const short *)head)[0] == SHELLMAGIC) {
754 /*
755 * Run our normal shell image activator. If it succeeds attempt
756 * to use the alternate path for the interpreter. If an alternate
757 * path is found, use our stringspace to store it.
758 */
759 if ((error = exec_shell_imgact(imgp)) == 0) {
760 char *rpath = NULL;
761
762 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
763 imgp->interpreter_name, &rpath, 0);
764 if (rpath != imgp->interpreter_name) {
765 int len = strlen(rpath) + 1;
766
767 if (len <= MAXSHELLCMDLEN) {
768 memcpy(imgp->interpreter_name, rpath, len);
769 }
770 free(rpath, M_TEMP);
771 }
772 }
773 }
774 return(error);
775 }
776
777 /*
778 * Clear registers on exec
779 * XXX copied from ia32_signal.c.
780 */
781 static void
782 exec_linux_setregs(td, entry, stack, ps_strings)
783 struct thread *td;
784 u_long entry;
785 u_long stack;
786 u_long ps_strings;
787 {
788 struct trapframe *regs = td->td_frame;
789 struct pcb *pcb = td->td_pcb;
790
791 wrmsr(MSR_FSBASE, 0);
792 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
793 pcb->pcb_fsbase = 0;
794 pcb->pcb_gsbase = 0;
795 load_ds(_udatasel);
796 load_es(_udatasel);
797 load_fs(_udatasel);
798 load_gs(0);
799 pcb->pcb_ds = _udatasel;
800 pcb->pcb_es = _udatasel;
801 pcb->pcb_fs = _udatasel;
802 pcb->pcb_gs = 0;
803
804 bzero((char *)regs, sizeof(struct trapframe));
805 regs->tf_rip = entry;
806 regs->tf_rsp = stack;
807 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
808 regs->tf_ss = _udatasel;
809 regs->tf_cs = _ucode32sel;
810 regs->tf_rbx = ps_strings;
811 load_cr0(rcr0() | CR0_MP | CR0_TS);
812
813 /* Return via doreti so that we can change to a different %cs */
814 pcb->pcb_flags |= PCB_FULLCTX;
815 td->td_retval[1] = 0;
816 }
817
818 /*
819 * XXX copied from ia32_sysvec.c.
820 */
821 static register_t *
822 linux_copyout_strings(struct image_params *imgp)
823 {
824 int argc, envc;
825 u_int32_t *vectp;
826 char *stringp, *destp;
827 u_int32_t *stack_base;
828 struct linux32_ps_strings *arginfo;
829 int sigcodesz;
830
831 /*
832 * Calculate string base and vector table pointers.
833 * Also deal with signal trampoline code for this exec type.
834 */
835 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
836 sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode);
837 destp = (caddr_t)arginfo - sigcodesz - SPARE_USRSPACE -
838 roundup((ARG_MAX - imgp->stringspace), sizeof(char *));
839
840 /*
841 * install sigcode
842 */
843 if (sigcodesz)
844 copyout(imgp->proc->p_sysent->sv_sigcode,
845 ((caddr_t)arginfo - sigcodesz), szsigcode);
846
847 /*
848 * If we have a valid auxargs ptr, prepare some room
849 * on the stack.
850 */
851 if (imgp->auxargs) {
852 /*
853 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
854 * lower compatibility.
855 */
856 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
857 : (AT_COUNT * 2);
858 /*
859 * The '+ 2' is for the null pointers at the end of each of
860 * the arg and env vector sets,and imgp->auxarg_size is room
861 * for argument of Runtime loader.
862 */
863 vectp = (u_int32_t *) (destp - (imgp->argc + imgp->envc + 2 +
864 imgp->auxarg_size) * sizeof(u_int32_t));
865
866 } else
867 /*
868 * The '+ 2' is for the null pointers at the end of each of
869 * the arg and env vector sets
870 */
871 vectp = (u_int32_t *)
872 (destp - (imgp->argc + imgp->envc + 2) * sizeof(u_int32_t));
873
874 /*
875 * vectp also becomes our initial stack base
876 */
877 stack_base = vectp;
878
879 stringp = imgp->stringbase;
880 argc = imgp->argc;
881 envc = imgp->envc;
882 /*
883 * Copy out strings - arguments and environment.
884 */
885 copyout(stringp, destp, ARG_MAX - imgp->stringspace);
886
887 /*
888 * Fill in "ps_strings" struct for ps, w, etc.
889 */
890 suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
891 suword32(&arginfo->ps_nargvstr, argc);
892
893 /*
894 * Fill in argument portion of vector table.
895 */
896 for (; argc > 0; --argc) {
897 suword32(vectp++, (u_int32_t)(intptr_t)destp);
898 while (*stringp++ != 0)
899 destp++;
900 destp++;
901 }
902
903 /* a null vector table pointer separates the argp's from the envp's */
904 suword32(vectp++, 0);
905
906 suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
907 suword32(&arginfo->ps_nenvstr, envc);
908
909 /*
910 * Fill in environment portion of vector table.
911 */
912 for (; envc > 0; --envc) {
913 suword32(vectp++, (u_int32_t)(intptr_t)destp);
914 while (*stringp++ != 0)
915 destp++;
916 destp++;
917 }
918
919 /* end of vector table is a null pointer */
920 suword32(vectp, 0);
921
922 return ((register_t *)stack_base);
923 }
924
925 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
926 "32-bit Linux emulation");
927
928 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
929 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
930 &linux32_maxdsiz, 0, "");
931 static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
932 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
933 &linux32_maxssiz, 0, "");
934 static u_long linux32_maxvmem = LINUX32_MAXVMEM;
935 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
936 &linux32_maxvmem, 0, "");
937
938 /*
939 * XXX copied from ia32_sysvec.c.
940 */
941 static void
942 linux32_fixlimits(struct image_params *imgp)
943 {
944 struct proc *p = imgp->proc;
945 struct plimit *oldlim, *newlim;
946
947 if (linux32_maxdsiz == 0 && linux32_maxssiz == 0 &&
948 linux32_maxvmem == 0)
949 return;
950 newlim = lim_alloc();
951 PROC_LOCK(p);
952 oldlim = p->p_limit;
953 lim_copy(newlim, oldlim);
954 if (linux32_maxdsiz != 0) {
955 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_cur > linux32_maxdsiz)
956 newlim->pl_rlimit[RLIMIT_DATA].rlim_cur = linux32_maxdsiz;
957 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_max > linux32_maxdsiz)
958 newlim->pl_rlimit[RLIMIT_DATA].rlim_max = linux32_maxdsiz;
959 }
960 if (linux32_maxssiz != 0) {
961 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_cur > linux32_maxssiz)
962 newlim->pl_rlimit[RLIMIT_STACK].rlim_cur = linux32_maxssiz;
963 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_max > linux32_maxssiz)
964 newlim->pl_rlimit[RLIMIT_STACK].rlim_max = linux32_maxssiz;
965 }
966 if (linux32_maxvmem != 0) {
967 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur > linux32_maxvmem)
968 newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur = linux32_maxvmem;
969 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_max > linux32_maxvmem)
970 newlim->pl_rlimit[RLIMIT_VMEM].rlim_max = linux32_maxvmem;
971 }
972 p->p_limit = newlim;
973 PROC_UNLOCK(p);
974 lim_free(oldlim);
975 }
976
977 struct sysentvec elf_linux_sysvec = {
978 LINUX_SYS_MAXSYSCALL,
979 linux_sysent,
980 0xff,
981 LINUX_SIGTBLSZ,
982 bsd_to_linux_signal,
983 ELAST + 1,
984 bsd_to_linux_errno,
985 translate_traps,
986 elf_linux_fixup,
987 linux_sendsig,
988 linux_sigcode,
989 &linux_szsigcode,
990 linux_prepsyscall,
991 "Linux ELF32",
992 elf32_coredump,
993 exec_linux_imgact_try,
994 LINUX_MINSIGSTKSZ,
995 PAGE_SIZE,
996 VM_MIN_ADDRESS,
997 LINUX32_USRSTACK,
998 LINUX32_USRSTACK,
999 LINUX32_PS_STRINGS,
1000 VM_PROT_ALL,
1001 linux_copyout_strings,
1002 exec_linux_setregs,
1003 linux32_fixlimits
1004 };
1005
1006 static Elf32_Brandinfo linux_brand = {
1007 ELFOSABI_LINUX,
1008 EM_386,
1009 "Linux",
1010 "/compat/linux",
1011 "/lib/ld-linux.so.1",
1012 &elf_linux_sysvec,
1013 NULL,
1014 };
1015
1016 static Elf32_Brandinfo linux_glibc2brand = {
1017 ELFOSABI_LINUX,
1018 EM_386,
1019 "Linux",
1020 "/compat/linux",
1021 "/lib/ld-linux.so.2",
1022 &elf_linux_sysvec,
1023 NULL,
1024 };
1025
1026 Elf32_Brandinfo *linux_brandlist[] = {
1027 &linux_brand,
1028 &linux_glibc2brand,
1029 NULL
1030 };
1031
1032 static int
1033 linux_elf_modevent(module_t mod, int type, void *data)
1034 {
1035 Elf32_Brandinfo **brandinfo;
1036 int error;
1037 struct linux_ioctl_handler **lihp;
1038
1039 error = 0;
1040
1041 switch(type) {
1042 case MOD_LOAD:
1043 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1044 ++brandinfo)
1045 if (elf32_insert_brand_entry(*brandinfo) < 0)
1046 error = EINVAL;
1047 if (error == 0) {
1048 SET_FOREACH(lihp, linux_ioctl_handler_set)
1049 linux_ioctl_register_handler(*lihp);
1050 if (bootverbose)
1051 printf("Linux ELF exec handler installed\n");
1052 } else
1053 printf("cannot insert Linux ELF brand handler\n");
1054 break;
1055 case MOD_UNLOAD:
1056 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1057 ++brandinfo)
1058 if (elf32_brand_inuse(*brandinfo))
1059 error = EBUSY;
1060 if (error == 0) {
1061 for (brandinfo = &linux_brandlist[0];
1062 *brandinfo != NULL; ++brandinfo)
1063 if (elf32_remove_brand_entry(*brandinfo) < 0)
1064 error = EINVAL;
1065 }
1066 if (error == 0) {
1067 SET_FOREACH(lihp, linux_ioctl_handler_set)
1068 linux_ioctl_unregister_handler(*lihp);
1069 if (bootverbose)
1070 printf("Linux ELF exec handler removed\n");
1071 linux_mib_destroy();
1072 } else
1073 printf("Could not deinstall ELF interpreter entry\n");
1074 break;
1075 default:
1076 break;
1077 }
1078 return error;
1079 }
1080
1081 static moduledata_t linux_elf_mod = {
1082 "linuxelf",
1083 linux_elf_modevent,
1084 0
1085 };
1086
1087 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Cache object: b5c768e579bc6da6b367bc42f2e64aef
|