1 /*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 Søren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD: releng/6.2/sys/amd64/linux32/linux32_sysvec.c 163947 2006-11-03 09:18:31Z kib $");
35
36 /* XXX we use functions that might not exist. */
37 #include "opt_compat.h"
38
39 #ifndef COMPAT_43
40 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
41 #endif
42 #ifndef COMPAT_IA32
43 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
44 #endif
45
46 #define __ELF_WORD_SIZE 32
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/exec.h>
51 #include <sys/imgact.h>
52 #include <sys/imgact_elf.h>
53 #include <sys/kernel.h>
54 #include <sys/lock.h>
55 #include <sys/malloc.h>
56 #include <sys/module.h>
57 #include <sys/mutex.h>
58 #include <sys/proc.h>
59 #include <sys/resourcevar.h>
60 #include <sys/signalvar.h>
61 #include <sys/sysctl.h>
62 #include <sys/syscallsubr.h>
63 #include <sys/sysent.h>
64 #include <sys/sysproto.h>
65 #include <sys/vnode.h>
66
67 #include <vm/vm.h>
68 #include <vm/pmap.h>
69 #include <vm/vm_extern.h>
70 #include <vm/vm_map.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_param.h>
74
75 #include <machine/cpu.h>
76 #include <machine/md_var.h>
77 #include <machine/pcb.h>
78 #include <machine/specialreg.h>
79
80 #include <amd64/linux32/linux.h>
81 #include <amd64/linux32/linux32_proto.h>
82 #include <compat/linux/linux_mib.h>
83 #include <compat/linux/linux_signal.h>
84 #include <compat/linux/linux_util.h>
85
86 MODULE_VERSION(linux, 1);
87 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
88 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
89 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
90
91 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
92
93 #define AUXARGS_ENTRY_32(pos, id, val) \
94 do { \
95 suword32(pos++, id); \
96 suword32(pos++, val); \
97 } while (0)
98
99 #if BYTE_ORDER == LITTLE_ENDIAN
100 #define SHELLMAGIC 0x2123 /* #! */
101 #else
102 #define SHELLMAGIC 0x2321
103 #endif
104
105 /*
106 * Allow the sendsig functions to use the ldebug() facility
107 * even though they are not syscalls themselves. Map them
108 * to syscall 0. This is slightly less bogus than using
109 * ldebug(sigreturn).
110 */
111 #define LINUX_SYS_linux_rt_sendsig 0
112 #define LINUX_SYS_linux_sendsig 0
113
114 extern char linux_sigcode[];
115 extern int linux_szsigcode;
116
117 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
118
119 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
120 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
121
122 static int elf_linux_fixup(register_t **stack_base,
123 struct image_params *iparams);
124 static register_t *linux_copyout_strings(struct image_params *imgp);
125 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
126 caddr_t *params);
127 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
128 u_long code);
129 static void exec_linux_setregs(struct thread *td, u_long entry,
130 u_long stack, u_long ps_strings);
131 static void linux32_fixlimits(struct image_params *imgp);
132
133 /*
134 * Linux syscalls return negative errno's, we do positive and map them
135 */
136 static int bsd_to_linux_errno[ELAST + 1] = {
137 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
138 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
139 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
140 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
141 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
142 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
143 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
144 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
145 -6, -6, -43, -42, -75, -6, -84
146 };
147
148 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
149 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
150 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
151 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
152 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
153 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
154 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
155 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
156 0, LINUX_SIGUSR1, LINUX_SIGUSR2
157 };
158
159 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
160 SIGHUP, SIGINT, SIGQUIT, SIGILL,
161 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
162 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
163 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
164 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
165 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
166 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
167 SIGIO, SIGURG, SIGSYS
168 };
169
170 #define LINUX_T_UNKNOWN 255
171 static int _bsd_to_linux_trapcode[] = {
172 LINUX_T_UNKNOWN, /* 0 */
173 6, /* 1 T_PRIVINFLT */
174 LINUX_T_UNKNOWN, /* 2 */
175 3, /* 3 T_BPTFLT */
176 LINUX_T_UNKNOWN, /* 4 */
177 LINUX_T_UNKNOWN, /* 5 */
178 16, /* 6 T_ARITHTRAP */
179 254, /* 7 T_ASTFLT */
180 LINUX_T_UNKNOWN, /* 8 */
181 13, /* 9 T_PROTFLT */
182 1, /* 10 T_TRCTRAP */
183 LINUX_T_UNKNOWN, /* 11 */
184 14, /* 12 T_PAGEFLT */
185 LINUX_T_UNKNOWN, /* 13 */
186 17, /* 14 T_ALIGNFLT */
187 LINUX_T_UNKNOWN, /* 15 */
188 LINUX_T_UNKNOWN, /* 16 */
189 LINUX_T_UNKNOWN, /* 17 */
190 0, /* 18 T_DIVIDE */
191 2, /* 19 T_NMI */
192 4, /* 20 T_OFLOW */
193 5, /* 21 T_BOUND */
194 7, /* 22 T_DNA */
195 8, /* 23 T_DOUBLEFLT */
196 9, /* 24 T_FPOPFLT */
197 10, /* 25 T_TSSFLT */
198 11, /* 26 T_SEGNPFLT */
199 12, /* 27 T_STKFLT */
200 18, /* 28 T_MCHK */
201 19, /* 29 T_XMMFLT */
202 15 /* 30 T_RESERVED */
203 };
204 #define bsd_to_linux_trapcode(code) \
205 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
206 _bsd_to_linux_trapcode[(code)]: \
207 LINUX_T_UNKNOWN)
208
209 struct linux32_ps_strings {
210 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
211 u_int ps_nargvstr; /* the number of argument strings */
212 u_int32_t ps_envstr; /* first of 0 or more environment strings */
213 u_int ps_nenvstr; /* the number of environment strings */
214 };
215
216 /*
217 * If FreeBSD & Linux have a difference of opinion about what a trap
218 * means, deal with it here.
219 *
220 * MPSAFE
221 */
222 static int
223 translate_traps(int signal, int trap_code)
224 {
225 if (signal != SIGBUS)
226 return signal;
227 switch (trap_code) {
228 case T_PROTFLT:
229 case T_TSSFLT:
230 case T_DOUBLEFLT:
231 case T_PAGEFLT:
232 return SIGSEGV;
233 default:
234 return signal;
235 }
236 }
237
238 static int
239 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
240 {
241 Elf32_Auxargs *args;
242 Elf32_Addr *base;
243 Elf32_Addr *pos;
244
245 KASSERT(curthread->td_proc == imgp->proc &&
246 (curthread->td_proc->p_flag & P_SA) == 0,
247 ("unsafe elf_linux_fixup(), should be curproc"));
248 base = (Elf32_Addr *)*stack_base;
249 args = (Elf32_Auxargs *)imgp->auxargs;
250 pos = base + (imgp->args->argc + imgp->args->envc + 2);
251
252 if (args->trace)
253 AUXARGS_ENTRY_32(pos, AT_DEBUG, 1);
254 if (args->execfd != -1)
255 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
256 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
257 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
258 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
259 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
260 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
261 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
262 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
263 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
264 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
265 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
266 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
267 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
268
269 free(imgp->auxargs, M_TEMP);
270 imgp->auxargs = NULL;
271
272 base--;
273 suword32(base, (uint32_t)imgp->args->argc);
274 *stack_base = (register_t *)base;
275 return 0;
276 }
277
278 extern int _ucodesel, _ucode32sel, _udatasel;
279 extern unsigned long linux_sznonrtsigcode;
280
281 static void
282 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
283 {
284 struct thread *td = curthread;
285 struct proc *p = td->td_proc;
286 struct sigacts *psp;
287 struct trapframe *regs;
288 struct l_rt_sigframe *fp, frame;
289 int oonstack;
290
291 PROC_LOCK_ASSERT(p, MA_OWNED);
292 psp = p->p_sigacts;
293 mtx_assert(&psp->ps_mtx, MA_OWNED);
294 regs = td->td_frame;
295 oonstack = sigonstack(regs->tf_rsp);
296
297 #ifdef DEBUG
298 if (ldebug(rt_sendsig))
299 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
300 catcher, sig, (void*)mask, code);
301 #endif
302 /*
303 * Allocate space for the signal handler context.
304 */
305 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
306 SIGISMEMBER(psp->ps_sigonstack, sig)) {
307 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
308 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
309 } else
310 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
311 mtx_unlock(&psp->ps_mtx);
312
313 /*
314 * Build the argument list for the signal handler.
315 */
316 if (p->p_sysent->sv_sigtbl)
317 if (sig <= p->p_sysent->sv_sigsize)
318 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
319
320 bzero(&frame, sizeof(frame));
321
322 frame.sf_handler = PTROUT(catcher);
323 frame.sf_sig = sig;
324 frame.sf_siginfo = PTROUT(&fp->sf_si);
325 frame.sf_ucontext = PTROUT(&fp->sf_sc);
326
327 /* Fill in POSIX parts */
328 frame.sf_si.lsi_signo = sig;
329 frame.sf_si.lsi_code = code;
330 frame.sf_si.lsi_addr = PTROUT(regs->tf_err);
331
332 /*
333 * Build the signal context to be used by sigreturn.
334 */
335 frame.sf_sc.uc_flags = 0; /* XXX ??? */
336 frame.sf_sc.uc_link = 0; /* XXX ??? */
337
338 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
339 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
340 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
341 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
342 PROC_UNLOCK(p);
343
344 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
345
346 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
347 frame.sf_sc.uc_mcontext.sc_gs = rgs();
348 frame.sf_sc.uc_mcontext.sc_fs = rfs();
349 __asm __volatile("movl %%es,%0" :
350 "=rm" (frame.sf_sc.uc_mcontext.sc_es));
351 __asm __volatile("movl %%ds,%0" :
352 "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
353 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
354 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
355 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
356 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
357 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
358 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
359 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
360 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
361 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
362 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
363 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
364 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
365 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
366 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
367
368 #ifdef DEBUG
369 if (ldebug(rt_sendsig))
370 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
371 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
372 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
373 #endif
374
375 if (copyout(&frame, fp, sizeof(frame)) != 0) {
376 /*
377 * Process has trashed its stack; give it an illegal
378 * instruction to halt it in its tracks.
379 */
380 #ifdef DEBUG
381 if (ldebug(rt_sendsig))
382 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
383 fp, oonstack);
384 #endif
385 PROC_LOCK(p);
386 sigexit(td, SIGILL);
387 }
388
389 /*
390 * Build context to run handler in.
391 */
392 regs->tf_rsp = PTROUT(fp);
393 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
394 linux_sznonrtsigcode;
395 regs->tf_rflags &= ~PSL_T;
396 regs->tf_cs = _ucode32sel;
397 regs->tf_ss = _udatasel;
398 load_ds(_udatasel);
399 td->td_pcb->pcb_ds = _udatasel;
400 load_es(_udatasel);
401 td->td_pcb->pcb_es = _udatasel;
402 PROC_LOCK(p);
403 mtx_lock(&psp->ps_mtx);
404 }
405
406
407 /*
408 * Send an interrupt to process.
409 *
410 * Stack is set up to allow sigcode stored
411 * in u. to call routine, followed by kcall
412 * to sigreturn routine below. After sigreturn
413 * resets the signal mask, the stack, and the
414 * frame pointer, it returns to the user
415 * specified pc, psl.
416 */
417 static void
418 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
419 {
420 struct thread *td = curthread;
421 struct proc *p = td->td_proc;
422 struct sigacts *psp;
423 struct trapframe *regs;
424 struct l_sigframe *fp, frame;
425 l_sigset_t lmask;
426 int oonstack, i;
427
428 PROC_LOCK_ASSERT(p, MA_OWNED);
429 psp = p->p_sigacts;
430 mtx_assert(&psp->ps_mtx, MA_OWNED);
431 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
432 /* Signal handler installed with SA_SIGINFO. */
433 linux_rt_sendsig(catcher, sig, mask, code);
434 return;
435 }
436
437 regs = td->td_frame;
438 oonstack = sigonstack(regs->tf_rsp);
439
440 #ifdef DEBUG
441 if (ldebug(sendsig))
442 printf(ARGS(sendsig, "%p, %d, %p, %lu"),
443 catcher, sig, (void*)mask, code);
444 #endif
445
446 /*
447 * Allocate space for the signal handler context.
448 */
449 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
450 SIGISMEMBER(psp->ps_sigonstack, sig)) {
451 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
452 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
453 } else
454 fp = (struct l_sigframe *)regs->tf_rsp - 1;
455 mtx_unlock(&psp->ps_mtx);
456 PROC_UNLOCK(p);
457
458 /*
459 * Build the argument list for the signal handler.
460 */
461 if (p->p_sysent->sv_sigtbl)
462 if (sig <= p->p_sysent->sv_sigsize)
463 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
464
465 bzero(&frame, sizeof(frame));
466
467 frame.sf_handler = PTROUT(catcher);
468 frame.sf_sig = sig;
469
470 bsd_to_linux_sigset(mask, &lmask);
471
472 /*
473 * Build the signal context to be used by sigreturn.
474 */
475 frame.sf_sc.sc_mask = lmask.__bits[0];
476 frame.sf_sc.sc_gs = rgs();
477 frame.sf_sc.sc_fs = rfs();
478 __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
479 __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
480 frame.sf_sc.sc_edi = regs->tf_rdi;
481 frame.sf_sc.sc_esi = regs->tf_rsi;
482 frame.sf_sc.sc_ebp = regs->tf_rbp;
483 frame.sf_sc.sc_ebx = regs->tf_rbx;
484 frame.sf_sc.sc_edx = regs->tf_rdx;
485 frame.sf_sc.sc_ecx = regs->tf_rcx;
486 frame.sf_sc.sc_eax = regs->tf_rax;
487 frame.sf_sc.sc_eip = regs->tf_rip;
488 frame.sf_sc.sc_cs = regs->tf_cs;
489 frame.sf_sc.sc_eflags = regs->tf_rflags;
490 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
491 frame.sf_sc.sc_ss = regs->tf_ss;
492 frame.sf_sc.sc_err = regs->tf_err;
493 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
494
495 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
496 frame.sf_extramask[i] = lmask.__bits[i+1];
497
498 if (copyout(&frame, fp, sizeof(frame)) != 0) {
499 /*
500 * Process has trashed its stack; give it an illegal
501 * instruction to halt it in its tracks.
502 */
503 PROC_LOCK(p);
504 sigexit(td, SIGILL);
505 }
506
507 /*
508 * Build context to run handler in.
509 */
510 regs->tf_rsp = PTROUT(fp);
511 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
512 regs->tf_rflags &= ~PSL_T;
513 regs->tf_cs = _ucode32sel;
514 regs->tf_ss = _udatasel;
515 load_ds(_udatasel);
516 td->td_pcb->pcb_ds = _udatasel;
517 load_es(_udatasel);
518 td->td_pcb->pcb_es = _udatasel;
519 PROC_LOCK(p);
520 mtx_lock(&psp->ps_mtx);
521 }
522
523 /*
524 * System call to cleanup state after a signal
525 * has been taken. Reset signal mask and
526 * stack state from context left by sendsig (above).
527 * Return to previous pc and psl as specified by
528 * context left by sendsig. Check carefully to
529 * make sure that the user has not modified the
530 * psl to gain improper privileges or to cause
531 * a machine fault.
532 */
533 int
534 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
535 {
536 struct proc *p = td->td_proc;
537 struct l_sigframe frame;
538 struct trapframe *regs;
539 l_sigset_t lmask;
540 int eflags, i;
541
542 regs = td->td_frame;
543
544 #ifdef DEBUG
545 if (ldebug(sigreturn))
546 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
547 #endif
548 /*
549 * The trampoline code hands us the sigframe.
550 * It is unsafe to keep track of it ourselves, in the event that a
551 * program jumps out of a signal handler.
552 */
553 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
554 return (EFAULT);
555
556 /*
557 * Check for security violations.
558 */
559 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
560 eflags = frame.sf_sc.sc_eflags;
561 /*
562 * XXX do allow users to change the privileged flag PSL_RF. The
563 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
564 * sometimes set it there too. tf_eflags is kept in the signal
565 * context during signal handling and there is no other place
566 * to remember it, so the PSL_RF bit may be corrupted by the
567 * signal handler without us knowing. Corruption of the PSL_RF
568 * bit at worst causes one more or one less debugger trap, so
569 * allowing it is fairly harmless.
570 */
571 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
572 return(EINVAL);
573
574 /*
575 * Don't allow users to load a valid privileged %cs. Let the
576 * hardware check for invalid selectors, excess privilege in
577 * other selectors, invalid %eip's and invalid %esp's.
578 */
579 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
580 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
581 trapsignal(td, SIGBUS, T_PROTFLT);
582 return(EINVAL);
583 }
584
585 lmask.__bits[0] = frame.sf_sc.sc_mask;
586 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
587 lmask.__bits[i+1] = frame.sf_extramask[i];
588 PROC_LOCK(p);
589 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
590 SIG_CANTMASK(td->td_sigmask);
591 signotify(td);
592 PROC_UNLOCK(p);
593
594 /*
595 * Restore signal context.
596 */
597 /* Selectors were restored by the trampoline. */
598 regs->tf_rdi = frame.sf_sc.sc_edi;
599 regs->tf_rsi = frame.sf_sc.sc_esi;
600 regs->tf_rbp = frame.sf_sc.sc_ebp;
601 regs->tf_rbx = frame.sf_sc.sc_ebx;
602 regs->tf_rdx = frame.sf_sc.sc_edx;
603 regs->tf_rcx = frame.sf_sc.sc_ecx;
604 regs->tf_rax = frame.sf_sc.sc_eax;
605 regs->tf_rip = frame.sf_sc.sc_eip;
606 regs->tf_cs = frame.sf_sc.sc_cs;
607 regs->tf_rflags = eflags;
608 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
609 regs->tf_ss = frame.sf_sc.sc_ss;
610
611 return (EJUSTRETURN);
612 }
613
614 /*
615 * System call to cleanup state after a signal
616 * has been taken. Reset signal mask and
617 * stack state from context left by rt_sendsig (above).
618 * Return to previous pc and psl as specified by
619 * context left by sendsig. Check carefully to
620 * make sure that the user has not modified the
621 * psl to gain improper privileges or to cause
622 * a machine fault.
623 */
624 int
625 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
626 {
627 struct proc *p = td->td_proc;
628 struct l_ucontext uc;
629 struct l_sigcontext *context;
630 l_stack_t *lss;
631 stack_t ss;
632 struct trapframe *regs;
633 int eflags;
634
635 regs = td->td_frame;
636
637 #ifdef DEBUG
638 if (ldebug(rt_sigreturn))
639 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
640 #endif
641 /*
642 * The trampoline code hands us the ucontext.
643 * It is unsafe to keep track of it ourselves, in the event that a
644 * program jumps out of a signal handler.
645 */
646 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
647 return (EFAULT);
648
649 context = &uc.uc_mcontext;
650
651 /*
652 * Check for security violations.
653 */
654 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
655 eflags = context->sc_eflags;
656 /*
657 * XXX do allow users to change the privileged flag PSL_RF. The
658 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
659 * sometimes set it there too. tf_eflags is kept in the signal
660 * context during signal handling and there is no other place
661 * to remember it, so the PSL_RF bit may be corrupted by the
662 * signal handler without us knowing. Corruption of the PSL_RF
663 * bit at worst causes one more or one less debugger trap, so
664 * allowing it is fairly harmless.
665 */
666 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
667 return(EINVAL);
668
669 /*
670 * Don't allow users to load a valid privileged %cs. Let the
671 * hardware check for invalid selectors, excess privilege in
672 * other selectors, invalid %eip's and invalid %esp's.
673 */
674 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
675 if (!CS_SECURE(context->sc_cs)) {
676 trapsignal(td, SIGBUS, T_PROTFLT);
677 return(EINVAL);
678 }
679
680 PROC_LOCK(p);
681 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
682 SIG_CANTMASK(td->td_sigmask);
683 signotify(td);
684 PROC_UNLOCK(p);
685
686 /*
687 * Restore signal context
688 */
689 /* Selectors were restored by the trampoline. */
690 regs->tf_rdi = context->sc_edi;
691 regs->tf_rsi = context->sc_esi;
692 regs->tf_rbp = context->sc_ebp;
693 regs->tf_rbx = context->sc_ebx;
694 regs->tf_rdx = context->sc_edx;
695 regs->tf_rcx = context->sc_ecx;
696 regs->tf_rax = context->sc_eax;
697 regs->tf_rip = context->sc_eip;
698 regs->tf_cs = context->sc_cs;
699 regs->tf_rflags = eflags;
700 regs->tf_rsp = context->sc_esp_at_signal;
701 regs->tf_ss = context->sc_ss;
702
703 /*
704 * call sigaltstack & ignore results..
705 */
706 lss = &uc.uc_stack;
707 ss.ss_sp = PTRIN(lss->ss_sp);
708 ss.ss_size = lss->ss_size;
709 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
710
711 #ifdef DEBUG
712 if (ldebug(rt_sigreturn))
713 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
714 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
715 #endif
716 (void)kern_sigaltstack(td, &ss, NULL);
717
718 return (EJUSTRETURN);
719 }
720
721 /*
722 * MPSAFE
723 */
724 static void
725 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
726 {
727 args[0] = tf->tf_rbx;
728 args[1] = tf->tf_rcx;
729 args[2] = tf->tf_rdx;
730 args[3] = tf->tf_rsi;
731 args[4] = tf->tf_rdi;
732 args[5] = tf->tf_rbp; /* Unconfirmed */
733 *params = NULL; /* no copyin */
734 }
735
736 /*
737 * If a linux binary is exec'ing something, try this image activator
738 * first. We override standard shell script execution in order to
739 * be able to modify the interpreter path. We only do this if a linux
740 * binary is doing the exec, so we do not create an EXEC module for it.
741 */
742 static int exec_linux_imgact_try(struct image_params *iparams);
743
744 static int
745 exec_linux_imgact_try(struct image_params *imgp)
746 {
747 const char *head = (const char *)imgp->image_header;
748 char *rpath;
749 int error = -1, len;
750
751 /*
752 * The interpreter for shell scripts run from a linux binary needs
753 * to be located in /compat/linux if possible in order to recursively
754 * maintain linux path emulation.
755 */
756 if (((const short *)head)[0] == SHELLMAGIC) {
757 /*
758 * Run our normal shell image activator. If it succeeds attempt
759 * to use the alternate path for the interpreter. If an alternate
760 * path is found, use our stringspace to store it.
761 */
762 if ((error = exec_shell_imgact(imgp)) == 0) {
763 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
764 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
765 if (rpath != NULL) {
766 len = strlen(rpath) + 1;
767
768 if (len <= MAXSHELLCMDLEN) {
769 memcpy(imgp->interpreter_name, rpath, len);
770 }
771 free(rpath, M_TEMP);
772 }
773 }
774 }
775 return(error);
776 }
777
778 /*
779 * Clear registers on exec
780 * XXX copied from ia32_signal.c.
781 */
782 static void
783 exec_linux_setregs(td, entry, stack, ps_strings)
784 struct thread *td;
785 u_long entry;
786 u_long stack;
787 u_long ps_strings;
788 {
789 struct trapframe *regs = td->td_frame;
790 struct pcb *pcb = td->td_pcb;
791
792 wrmsr(MSR_FSBASE, 0);
793 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
794 pcb->pcb_fsbase = 0;
795 pcb->pcb_gsbase = 0;
796 load_ds(_udatasel);
797 load_es(_udatasel);
798 load_fs(_udatasel);
799 load_gs(0);
800 pcb->pcb_ds = _udatasel;
801 pcb->pcb_es = _udatasel;
802 pcb->pcb_fs = _udatasel;
803 pcb->pcb_gs = 0;
804
805 bzero((char *)regs, sizeof(struct trapframe));
806 regs->tf_rip = entry;
807 regs->tf_rsp = stack;
808 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
809 regs->tf_ss = _udatasel;
810 regs->tf_cs = _ucode32sel;
811 regs->tf_rbx = ps_strings;
812 load_cr0(rcr0() | CR0_MP | CR0_TS);
813 fpstate_drop(td);
814
815 /* Return via doreti so that we can change to a different %cs */
816 pcb->pcb_flags |= PCB_FULLCTX;
817 td->td_retval[1] = 0;
818 }
819
820 /*
821 * XXX copied from ia32_sysvec.c.
822 */
823 static register_t *
824 linux_copyout_strings(struct image_params *imgp)
825 {
826 int argc, envc;
827 u_int32_t *vectp;
828 char *stringp, *destp;
829 u_int32_t *stack_base;
830 struct linux32_ps_strings *arginfo;
831 int sigcodesz;
832
833 /*
834 * Calculate string base and vector table pointers.
835 * Also deal with signal trampoline code for this exec type.
836 */
837 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
838 sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode);
839 destp = (caddr_t)arginfo - sigcodesz - SPARE_USRSPACE -
840 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
841
842 /*
843 * install sigcode
844 */
845 if (sigcodesz)
846 copyout(imgp->proc->p_sysent->sv_sigcode,
847 ((caddr_t)arginfo - sigcodesz), sigcodesz);
848
849 /*
850 * If we have a valid auxargs ptr, prepare some room
851 * on the stack.
852 */
853 if (imgp->auxargs) {
854 /*
855 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
856 * lower compatibility.
857 */
858 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
859 : (AT_COUNT * 2);
860 /*
861 * The '+ 2' is for the null pointers at the end of each of
862 * the arg and env vector sets,and imgp->auxarg_size is room
863 * for argument of Runtime loader.
864 */
865 vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 +
866 imgp->auxarg_size) * sizeof(u_int32_t));
867
868 } else
869 /*
870 * The '+ 2' is for the null pointers at the end of each of
871 * the arg and env vector sets
872 */
873 vectp = (u_int32_t *)
874 (destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t));
875
876 /*
877 * vectp also becomes our initial stack base
878 */
879 stack_base = vectp;
880
881 stringp = imgp->args->begin_argv;
882 argc = imgp->args->argc;
883 envc = imgp->args->envc;
884 /*
885 * Copy out strings - arguments and environment.
886 */
887 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
888
889 /*
890 * Fill in "ps_strings" struct for ps, w, etc.
891 */
892 suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
893 suword32(&arginfo->ps_nargvstr, argc);
894
895 /*
896 * Fill in argument portion of vector table.
897 */
898 for (; argc > 0; --argc) {
899 suword32(vectp++, (u_int32_t)(intptr_t)destp);
900 while (*stringp++ != 0)
901 destp++;
902 destp++;
903 }
904
905 /* a null vector table pointer separates the argp's from the envp's */
906 suword32(vectp++, 0);
907
908 suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
909 suword32(&arginfo->ps_nenvstr, envc);
910
911 /*
912 * Fill in environment portion of vector table.
913 */
914 for (; envc > 0; --envc) {
915 suword32(vectp++, (u_int32_t)(intptr_t)destp);
916 while (*stringp++ != 0)
917 destp++;
918 destp++;
919 }
920
921 /* end of vector table is a null pointer */
922 suword32(vectp, 0);
923
924 return ((register_t *)stack_base);
925 }
926
927 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
928 "32-bit Linux emulation");
929
930 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
931 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
932 &linux32_maxdsiz, 0, "");
933 static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
934 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
935 &linux32_maxssiz, 0, "");
936 static u_long linux32_maxvmem = LINUX32_MAXVMEM;
937 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
938 &linux32_maxvmem, 0, "");
939
940 /*
941 * XXX copied from ia32_sysvec.c.
942 */
943 static void
944 linux32_fixlimits(struct image_params *imgp)
945 {
946 struct proc *p = imgp->proc;
947 struct plimit *oldlim, *newlim;
948
949 if (linux32_maxdsiz == 0 && linux32_maxssiz == 0 &&
950 linux32_maxvmem == 0)
951 return;
952 newlim = lim_alloc();
953 PROC_LOCK(p);
954 oldlim = p->p_limit;
955 lim_copy(newlim, oldlim);
956 if (linux32_maxdsiz != 0) {
957 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_cur > linux32_maxdsiz)
958 newlim->pl_rlimit[RLIMIT_DATA].rlim_cur = linux32_maxdsiz;
959 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_max > linux32_maxdsiz)
960 newlim->pl_rlimit[RLIMIT_DATA].rlim_max = linux32_maxdsiz;
961 }
962 if (linux32_maxssiz != 0) {
963 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_cur > linux32_maxssiz)
964 newlim->pl_rlimit[RLIMIT_STACK].rlim_cur = linux32_maxssiz;
965 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_max > linux32_maxssiz)
966 newlim->pl_rlimit[RLIMIT_STACK].rlim_max = linux32_maxssiz;
967 }
968 if (linux32_maxvmem != 0) {
969 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur > linux32_maxvmem)
970 newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur = linux32_maxvmem;
971 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_max > linux32_maxvmem)
972 newlim->pl_rlimit[RLIMIT_VMEM].rlim_max = linux32_maxvmem;
973 }
974 p->p_limit = newlim;
975 PROC_UNLOCK(p);
976 lim_free(oldlim);
977 }
978
979 struct sysentvec elf_linux_sysvec = {
980 LINUX_SYS_MAXSYSCALL,
981 linux_sysent,
982 0xff,
983 LINUX_SIGTBLSZ,
984 bsd_to_linux_signal,
985 ELAST + 1,
986 bsd_to_linux_errno,
987 translate_traps,
988 elf_linux_fixup,
989 linux_sendsig,
990 linux_sigcode,
991 &linux_szsigcode,
992 linux_prepsyscall,
993 "Linux ELF32",
994 elf32_coredump,
995 exec_linux_imgact_try,
996 LINUX_MINSIGSTKSZ,
997 PAGE_SIZE,
998 VM_MIN_ADDRESS,
999 LINUX32_USRSTACK,
1000 LINUX32_USRSTACK,
1001 LINUX32_PS_STRINGS,
1002 VM_PROT_ALL,
1003 linux_copyout_strings,
1004 exec_linux_setregs,
1005 linux32_fixlimits
1006 };
1007
1008 static Elf32_Brandinfo linux_brand = {
1009 ELFOSABI_LINUX,
1010 EM_386,
1011 "Linux",
1012 "/compat/linux",
1013 "/lib/ld-linux.so.1",
1014 &elf_linux_sysvec,
1015 NULL,
1016 };
1017
1018 static Elf32_Brandinfo linux_glibc2brand = {
1019 ELFOSABI_LINUX,
1020 EM_386,
1021 "Linux",
1022 "/compat/linux",
1023 "/lib/ld-linux.so.2",
1024 &elf_linux_sysvec,
1025 NULL,
1026 };
1027
1028 Elf32_Brandinfo *linux_brandlist[] = {
1029 &linux_brand,
1030 &linux_glibc2brand,
1031 NULL
1032 };
1033
1034 static int
1035 linux_elf_modevent(module_t mod, int type, void *data)
1036 {
1037 Elf32_Brandinfo **brandinfo;
1038 int error;
1039 struct linux_ioctl_handler **lihp;
1040 struct linux_device_handler **ldhp;
1041
1042 error = 0;
1043
1044 switch(type) {
1045 case MOD_LOAD:
1046 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1047 ++brandinfo)
1048 if (elf32_insert_brand_entry(*brandinfo) < 0)
1049 error = EINVAL;
1050 if (error == 0) {
1051 SET_FOREACH(lihp, linux_ioctl_handler_set)
1052 linux_ioctl_register_handler(*lihp);
1053 SET_FOREACH(ldhp, linux_device_handler_set)
1054 linux_device_register_handler(*ldhp);
1055 if (bootverbose)
1056 printf("Linux ELF exec handler installed\n");
1057 } else
1058 printf("cannot insert Linux ELF brand handler\n");
1059 break;
1060 case MOD_UNLOAD:
1061 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1062 ++brandinfo)
1063 if (elf32_brand_inuse(*brandinfo))
1064 error = EBUSY;
1065 if (error == 0) {
1066 for (brandinfo = &linux_brandlist[0];
1067 *brandinfo != NULL; ++brandinfo)
1068 if (elf32_remove_brand_entry(*brandinfo) < 0)
1069 error = EINVAL;
1070 }
1071 if (error == 0) {
1072 SET_FOREACH(lihp, linux_ioctl_handler_set)
1073 linux_ioctl_unregister_handler(*lihp);
1074 SET_FOREACH(ldhp, linux_device_handler_set)
1075 linux_device_unregister_handler(*ldhp);
1076 if (bootverbose)
1077 printf("Linux ELF exec handler removed\n");
1078 } else
1079 printf("Could not deinstall ELF interpreter entry\n");
1080 break;
1081 default:
1082 break;
1083 }
1084 return error;
1085 }
1086
1087 static moduledata_t linux_elf_mod = {
1088 "linuxelf",
1089 linux_elf_modevent,
1090 0
1091 };
1092
1093 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Cache object: 2b1bef2522672b78771e95ff0ee67230
|