1 /*-
2 * Copyright (c) 1994-1996 Søren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD: releng/5.3/sys/i386/linux/linux_sysvec.c 132199 2004-07-15 08:26:07Z phk $");
31
32 /* XXX we use functions that might not exist. */
33 #include "opt_compat.h"
34
35 #ifndef COMPAT_43
36 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/exec.h>
42 #include <sys/imgact.h>
43 #include <sys/imgact_aout.h>
44 #include <sys/imgact_elf.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/module.h>
49 #include <sys/mutex.h>
50 #include <sys/proc.h>
51 #include <sys/signalvar.h>
52 #include <sys/syscallsubr.h>
53 #include <sys/sysent.h>
54 #include <sys/sysproto.h>
55 #include <sys/user.h>
56 #include <sys/vnode.h>
57
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_extern.h>
61 #include <vm/vm_map.h>
62 #include <vm/vm_object.h>
63 #include <vm/vm_page.h>
64 #include <vm/vm_param.h>
65
66 #include <machine/cpu.h>
67 #include <machine/md_var.h>
68
69 #include <i386/linux/linux.h>
70 #include <i386/linux/linux_proto.h>
71 #include <compat/linux/linux_mib.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
74
75 MODULE_VERSION(linux, 1);
76 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
77 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
78 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
79
80 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
81
82 #if BYTE_ORDER == LITTLE_ENDIAN
83 #define SHELLMAGIC 0x2123 /* #! */
84 #else
85 #define SHELLMAGIC 0x2321
86 #endif
87
88 /*
89 * Allow the sendsig functions to use the ldebug() facility
90 * even though they are not syscalls themselves. Map them
91 * to syscall 0. This is slightly less bogus than using
92 * ldebug(sigreturn).
93 */
94 #define LINUX_SYS_linux_rt_sendsig 0
95 #define LINUX_SYS_linux_sendsig 0
96
97 extern char linux_sigcode[];
98 extern int linux_szsigcode;
99
100 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
101
102 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
103
104 static int linux_fixup(register_t **stack_base,
105 struct image_params *iparams);
106 static int elf_linux_fixup(register_t **stack_base,
107 struct image_params *iparams);
108 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
109 caddr_t *params);
110 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
111 u_long code);
112 static void exec_linux_setregs(struct thread *td, u_long entry,
113 u_long stack, u_long ps_strings);
114
115 /*
116 * Linux syscalls return negative errno's, we do positive and map them
117 */
118 static int bsd_to_linux_errno[ELAST + 1] = {
119 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
120 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
121 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
122 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
123 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
124 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
125 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
126 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
127 -6, -6, -43, -42, -75, -6, -84
128 };
129
130 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
131 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
132 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
133 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
134 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
135 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
136 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
137 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
138 0, LINUX_SIGUSR1, LINUX_SIGUSR2
139 };
140
141 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
142 SIGHUP, SIGINT, SIGQUIT, SIGILL,
143 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
144 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
145 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
146 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
147 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
148 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
149 SIGIO, SIGURG, SIGSYS
150 };
151
152 #define LINUX_T_UNKNOWN 255
153 static int _bsd_to_linux_trapcode[] = {
154 LINUX_T_UNKNOWN, /* 0 */
155 6, /* 1 T_PRIVINFLT */
156 LINUX_T_UNKNOWN, /* 2 */
157 3, /* 3 T_BPTFLT */
158 LINUX_T_UNKNOWN, /* 4 */
159 LINUX_T_UNKNOWN, /* 5 */
160 16, /* 6 T_ARITHTRAP */
161 254, /* 7 T_ASTFLT */
162 LINUX_T_UNKNOWN, /* 8 */
163 13, /* 9 T_PROTFLT */
164 1, /* 10 T_TRCTRAP */
165 LINUX_T_UNKNOWN, /* 11 */
166 14, /* 12 T_PAGEFLT */
167 LINUX_T_UNKNOWN, /* 13 */
168 17, /* 14 T_ALIGNFLT */
169 LINUX_T_UNKNOWN, /* 15 */
170 LINUX_T_UNKNOWN, /* 16 */
171 LINUX_T_UNKNOWN, /* 17 */
172 0, /* 18 T_DIVIDE */
173 2, /* 19 T_NMI */
174 4, /* 20 T_OFLOW */
175 5, /* 21 T_BOUND */
176 7, /* 22 T_DNA */
177 8, /* 23 T_DOUBLEFLT */
178 9, /* 24 T_FPOPFLT */
179 10, /* 25 T_TSSFLT */
180 11, /* 26 T_SEGNPFLT */
181 12, /* 27 T_STKFLT */
182 18, /* 28 T_MCHK */
183 19, /* 29 T_XMMFLT */
184 15 /* 30 T_RESERVED */
185 };
186 #define bsd_to_linux_trapcode(code) \
187 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
188 _bsd_to_linux_trapcode[(code)]: \
189 LINUX_T_UNKNOWN)
190
191 /*
192 * If FreeBSD & Linux have a difference of opinion about what a trap
193 * means, deal with it here.
194 *
195 * MPSAFE
196 */
197 static int
198 translate_traps(int signal, int trap_code)
199 {
200 if (signal != SIGBUS)
201 return signal;
202 switch (trap_code) {
203 case T_PROTFLT:
204 case T_TSSFLT:
205 case T_DOUBLEFLT:
206 case T_PAGEFLT:
207 return SIGSEGV;
208 default:
209 return signal;
210 }
211 }
212
213 static int
214 linux_fixup(register_t **stack_base, struct image_params *imgp)
215 {
216 register_t *argv, *envp;
217
218 argv = *stack_base;
219 envp = *stack_base + (imgp->argc + 1);
220 (*stack_base)--;
221 **stack_base = (intptr_t)(void *)envp;
222 (*stack_base)--;
223 **stack_base = (intptr_t)(void *)argv;
224 (*stack_base)--;
225 **stack_base = imgp->argc;
226 return 0;
227 }
228
229 static int
230 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
231 {
232 Elf32_Auxargs *args;
233 register_t *pos;
234
235 KASSERT(curthread->td_proc == imgp->proc &&
236 (curthread->td_proc->p_flag & P_SA) == 0,
237 ("unsafe elf_linux_fixup(), should be curproc"));
238 args = (Elf32_Auxargs *)imgp->auxargs;
239 pos = *stack_base + (imgp->argc + imgp->envc + 2);
240
241 if (args->trace)
242 AUXARGS_ENTRY(pos, AT_DEBUG, 1);
243 if (args->execfd != -1)
244 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
245 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
246 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
247 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
248 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
249 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
250 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
251 AUXARGS_ENTRY(pos, AT_BASE, args->base);
252 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
253 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
254 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
255 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
256 AUXARGS_ENTRY(pos, AT_NULL, 0);
257
258 free(imgp->auxargs, M_TEMP);
259 imgp->auxargs = NULL;
260
261 (*stack_base)--;
262 **stack_base = (register_t)imgp->argc;
263 return 0;
264 }
265
266 extern int _ucodesel, _udatasel;
267 extern unsigned long linux_sznonrtsigcode;
268
269 static void
270 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
271 {
272 struct thread *td = curthread;
273 struct proc *p = td->td_proc;
274 struct sigacts *psp;
275 struct trapframe *regs;
276 struct l_rt_sigframe *fp, frame;
277 int oonstack;
278
279 PROC_LOCK_ASSERT(p, MA_OWNED);
280 psp = p->p_sigacts;
281 mtx_assert(&psp->ps_mtx, MA_OWNED);
282 regs = td->td_frame;
283 oonstack = sigonstack(regs->tf_esp);
284
285 #ifdef DEBUG
286 if (ldebug(rt_sendsig))
287 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
288 catcher, sig, (void*)mask, code);
289 #endif
290 /*
291 * Allocate space for the signal handler context.
292 */
293 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
294 SIGISMEMBER(psp->ps_sigonstack, sig)) {
295 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
296 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
297 } else
298 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
299 mtx_unlock(&psp->ps_mtx);
300
301 /*
302 * Build the argument list for the signal handler.
303 */
304 if (p->p_sysent->sv_sigtbl)
305 if (sig <= p->p_sysent->sv_sigsize)
306 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
307
308 bzero(&frame, sizeof(frame));
309
310 frame.sf_handler = catcher;
311 frame.sf_sig = sig;
312 frame.sf_siginfo = &fp->sf_si;
313 frame.sf_ucontext = &fp->sf_sc;
314
315 /* Fill in POSIX parts */
316 frame.sf_si.lsi_signo = sig;
317 frame.sf_si.lsi_code = code;
318 frame.sf_si.lsi_addr = (void *)regs->tf_err;
319
320 /*
321 * Build the signal context to be used by sigreturn.
322 */
323 frame.sf_sc.uc_flags = 0; /* XXX ??? */
324 frame.sf_sc.uc_link = NULL; /* XXX ??? */
325
326 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
327 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
328 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
329 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
330 PROC_UNLOCK(p);
331
332 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
333
334 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
335 frame.sf_sc.uc_mcontext.sc_gs = rgs();
336 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
337 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
338 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
339 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
340 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
341 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
342 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
343 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
344 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
345 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
346 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
347 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
348 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
349 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
350 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
351 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
352 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
353
354 #ifdef DEBUG
355 if (ldebug(rt_sendsig))
356 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
357 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
358 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
359 #endif
360
361 if (copyout(&frame, fp, sizeof(frame)) != 0) {
362 /*
363 * Process has trashed its stack; give it an illegal
364 * instruction to halt it in its tracks.
365 */
366 #ifdef DEBUG
367 if (ldebug(rt_sendsig))
368 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
369 fp, oonstack);
370 #endif
371 PROC_LOCK(p);
372 sigexit(td, SIGILL);
373 }
374
375 /*
376 * Build context to run handler in.
377 */
378 regs->tf_esp = (int)fp;
379 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
380 linux_sznonrtsigcode;
381 regs->tf_eflags &= ~(PSL_T | PSL_VM);
382 regs->tf_cs = _ucodesel;
383 regs->tf_ds = _udatasel;
384 regs->tf_es = _udatasel;
385 regs->tf_fs = _udatasel;
386 regs->tf_ss = _udatasel;
387 PROC_LOCK(p);
388 mtx_lock(&psp->ps_mtx);
389 }
390
391
392 /*
393 * Send an interrupt to process.
394 *
395 * Stack is set up to allow sigcode stored
396 * in u. to call routine, followed by kcall
397 * to sigreturn routine below. After sigreturn
398 * resets the signal mask, the stack, and the
399 * frame pointer, it returns to the user
400 * specified pc, psl.
401 */
402 static void
403 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
404 {
405 struct thread *td = curthread;
406 struct proc *p = td->td_proc;
407 struct sigacts *psp;
408 struct trapframe *regs;
409 struct l_sigframe *fp, frame;
410 l_sigset_t lmask;
411 int oonstack, i;
412
413 PROC_LOCK_ASSERT(p, MA_OWNED);
414 psp = p->p_sigacts;
415 mtx_assert(&psp->ps_mtx, MA_OWNED);
416 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
417 /* Signal handler installed with SA_SIGINFO. */
418 linux_rt_sendsig(catcher, sig, mask, code);
419 return;
420 }
421
422 regs = td->td_frame;
423 oonstack = sigonstack(regs->tf_esp);
424
425 #ifdef DEBUG
426 if (ldebug(sendsig))
427 printf(ARGS(sendsig, "%p, %d, %p, %lu"),
428 catcher, sig, (void*)mask, code);
429 #endif
430
431 /*
432 * Allocate space for the signal handler context.
433 */
434 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
435 SIGISMEMBER(psp->ps_sigonstack, sig)) {
436 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
437 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
438 } else
439 fp = (struct l_sigframe *)regs->tf_esp - 1;
440 mtx_unlock(&psp->ps_mtx);
441 PROC_UNLOCK(p);
442
443 /*
444 * Build the argument list for the signal handler.
445 */
446 if (p->p_sysent->sv_sigtbl)
447 if (sig <= p->p_sysent->sv_sigsize)
448 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
449
450 bzero(&frame, sizeof(frame));
451
452 frame.sf_handler = catcher;
453 frame.sf_sig = sig;
454
455 bsd_to_linux_sigset(mask, &lmask);
456
457 /*
458 * Build the signal context to be used by sigreturn.
459 */
460 frame.sf_sc.sc_mask = lmask.__bits[0];
461 frame.sf_sc.sc_gs = rgs();
462 frame.sf_sc.sc_fs = regs->tf_fs;
463 frame.sf_sc.sc_es = regs->tf_es;
464 frame.sf_sc.sc_ds = regs->tf_ds;
465 frame.sf_sc.sc_edi = regs->tf_edi;
466 frame.sf_sc.sc_esi = regs->tf_esi;
467 frame.sf_sc.sc_ebp = regs->tf_ebp;
468 frame.sf_sc.sc_ebx = regs->tf_ebx;
469 frame.sf_sc.sc_edx = regs->tf_edx;
470 frame.sf_sc.sc_ecx = regs->tf_ecx;
471 frame.sf_sc.sc_eax = regs->tf_eax;
472 frame.sf_sc.sc_eip = regs->tf_eip;
473 frame.sf_sc.sc_cs = regs->tf_cs;
474 frame.sf_sc.sc_eflags = regs->tf_eflags;
475 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
476 frame.sf_sc.sc_ss = regs->tf_ss;
477 frame.sf_sc.sc_err = regs->tf_err;
478 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
479
480 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
481 frame.sf_extramask[i] = lmask.__bits[i+1];
482
483 if (copyout(&frame, fp, sizeof(frame)) != 0) {
484 /*
485 * Process has trashed its stack; give it an illegal
486 * instruction to halt it in its tracks.
487 */
488 PROC_LOCK(p);
489 sigexit(td, SIGILL);
490 }
491
492 /*
493 * Build context to run handler in.
494 */
495 regs->tf_esp = (int)fp;
496 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
497 regs->tf_eflags &= ~(PSL_T | PSL_VM);
498 regs->tf_cs = _ucodesel;
499 regs->tf_ds = _udatasel;
500 regs->tf_es = _udatasel;
501 regs->tf_fs = _udatasel;
502 regs->tf_ss = _udatasel;
503 PROC_LOCK(p);
504 mtx_lock(&psp->ps_mtx);
505 }
506
507 /*
508 * System call to cleanup state after a signal
509 * has been taken. Reset signal mask and
510 * stack state from context left by sendsig (above).
511 * Return to previous pc and psl as specified by
512 * context left by sendsig. Check carefully to
513 * make sure that the user has not modified the
514 * psl to gain improper privileges or to cause
515 * a machine fault.
516 */
517 int
518 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
519 {
520 struct proc *p = td->td_proc;
521 struct l_sigframe frame;
522 struct trapframe *regs;
523 l_sigset_t lmask;
524 int eflags, i;
525
526 regs = td->td_frame;
527
528 #ifdef DEBUG
529 if (ldebug(sigreturn))
530 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
531 #endif
532 /*
533 * The trampoline code hands us the sigframe.
534 * It is unsafe to keep track of it ourselves, in the event that a
535 * program jumps out of a signal handler.
536 */
537 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
538 return (EFAULT);
539
540 /*
541 * Check for security violations.
542 */
543 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
544 eflags = frame.sf_sc.sc_eflags;
545 /*
546 * XXX do allow users to change the privileged flag PSL_RF. The
547 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
548 * sometimes set it there too. tf_eflags is kept in the signal
549 * context during signal handling and there is no other place
550 * to remember it, so the PSL_RF bit may be corrupted by the
551 * signal handler without us knowing. Corruption of the PSL_RF
552 * bit at worst causes one more or one less debugger trap, so
553 * allowing it is fairly harmless.
554 */
555 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
556 return(EINVAL);
557
558 /*
559 * Don't allow users to load a valid privileged %cs. Let the
560 * hardware check for invalid selectors, excess privilege in
561 * other selectors, invalid %eip's and invalid %esp's.
562 */
563 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
564 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
565 trapsignal(td, SIGBUS, T_PROTFLT);
566 return(EINVAL);
567 }
568
569 lmask.__bits[0] = frame.sf_sc.sc_mask;
570 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
571 lmask.__bits[i+1] = frame.sf_extramask[i];
572 PROC_LOCK(p);
573 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
574 SIG_CANTMASK(td->td_sigmask);
575 signotify(td);
576 PROC_UNLOCK(p);
577
578 /*
579 * Restore signal context.
580 */
581 /* %gs was restored by the trampoline. */
582 regs->tf_fs = frame.sf_sc.sc_fs;
583 regs->tf_es = frame.sf_sc.sc_es;
584 regs->tf_ds = frame.sf_sc.sc_ds;
585 regs->tf_edi = frame.sf_sc.sc_edi;
586 regs->tf_esi = frame.sf_sc.sc_esi;
587 regs->tf_ebp = frame.sf_sc.sc_ebp;
588 regs->tf_ebx = frame.sf_sc.sc_ebx;
589 regs->tf_edx = frame.sf_sc.sc_edx;
590 regs->tf_ecx = frame.sf_sc.sc_ecx;
591 regs->tf_eax = frame.sf_sc.sc_eax;
592 regs->tf_eip = frame.sf_sc.sc_eip;
593 regs->tf_cs = frame.sf_sc.sc_cs;
594 regs->tf_eflags = eflags;
595 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
596 regs->tf_ss = frame.sf_sc.sc_ss;
597
598 return (EJUSTRETURN);
599 }
600
601 /*
602 * System call to cleanup state after a signal
603 * has been taken. Reset signal mask and
604 * stack state from context left by rt_sendsig (above).
605 * Return to previous pc and psl as specified by
606 * context left by sendsig. Check carefully to
607 * make sure that the user has not modified the
608 * psl to gain improper privileges or to cause
609 * a machine fault.
610 */
611 int
612 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
613 {
614 struct proc *p = td->td_proc;
615 struct l_ucontext uc;
616 struct l_sigcontext *context;
617 l_stack_t *lss;
618 stack_t ss;
619 struct trapframe *regs;
620 int eflags;
621
622 regs = td->td_frame;
623
624 #ifdef DEBUG
625 if (ldebug(rt_sigreturn))
626 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
627 #endif
628 /*
629 * The trampoline code hands us the ucontext.
630 * It is unsafe to keep track of it ourselves, in the event that a
631 * program jumps out of a signal handler.
632 */
633 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
634 return (EFAULT);
635
636 context = &uc.uc_mcontext;
637
638 /*
639 * Check for security violations.
640 */
641 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
642 eflags = context->sc_eflags;
643 /*
644 * XXX do allow users to change the privileged flag PSL_RF. The
645 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
646 * sometimes set it there too. tf_eflags is kept in the signal
647 * context during signal handling and there is no other place
648 * to remember it, so the PSL_RF bit may be corrupted by the
649 * signal handler without us knowing. Corruption of the PSL_RF
650 * bit at worst causes one more or one less debugger trap, so
651 * allowing it is fairly harmless.
652 */
653 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
654 return(EINVAL);
655
656 /*
657 * Don't allow users to load a valid privileged %cs. Let the
658 * hardware check for invalid selectors, excess privilege in
659 * other selectors, invalid %eip's and invalid %esp's.
660 */
661 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
662 if (!CS_SECURE(context->sc_cs)) {
663 trapsignal(td, SIGBUS, T_PROTFLT);
664 return(EINVAL);
665 }
666
667 PROC_LOCK(p);
668 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
669 SIG_CANTMASK(td->td_sigmask);
670 signotify(td);
671 PROC_UNLOCK(p);
672
673 /*
674 * Restore signal context
675 */
676 /* %gs was restored by the trampoline. */
677 regs->tf_fs = context->sc_fs;
678 regs->tf_es = context->sc_es;
679 regs->tf_ds = context->sc_ds;
680 regs->tf_edi = context->sc_edi;
681 regs->tf_esi = context->sc_esi;
682 regs->tf_ebp = context->sc_ebp;
683 regs->tf_ebx = context->sc_ebx;
684 regs->tf_edx = context->sc_edx;
685 regs->tf_ecx = context->sc_ecx;
686 regs->tf_eax = context->sc_eax;
687 regs->tf_eip = context->sc_eip;
688 regs->tf_cs = context->sc_cs;
689 regs->tf_eflags = eflags;
690 regs->tf_esp = context->sc_esp_at_signal;
691 regs->tf_ss = context->sc_ss;
692
693 /*
694 * call sigaltstack & ignore results..
695 */
696 lss = &uc.uc_stack;
697 ss.ss_sp = lss->ss_sp;
698 ss.ss_size = lss->ss_size;
699 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
700
701 #ifdef DEBUG
702 if (ldebug(rt_sigreturn))
703 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
704 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
705 #endif
706 (void)kern_sigaltstack(td, &ss, NULL);
707
708 return (EJUSTRETURN);
709 }
710
711 /*
712 * MPSAFE
713 */
714 static void
715 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
716 {
717 args[0] = tf->tf_ebx;
718 args[1] = tf->tf_ecx;
719 args[2] = tf->tf_edx;
720 args[3] = tf->tf_esi;
721 args[4] = tf->tf_edi;
722 args[5] = tf->tf_ebp; /* Unconfirmed */
723 *params = NULL; /* no copyin */
724 }
725
726
727
728 /*
729 * Dump core, into a file named as described in the comments for
730 * expand_name(), unless the process was setuid/setgid.
731 */
732 static int
733 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit)
734 {
735 struct proc *p = td->td_proc;
736 struct ucred *cred = td->td_ucred;
737 struct vmspace *vm = p->p_vmspace;
738 char *tempuser;
739 int error;
740
741 if (ctob((uarea_pages + kstack_pages) +
742 vm->vm_dsize + vm->vm_ssize) >= limit)
743 return (EFAULT);
744 tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP,
745 M_WAITOK | M_ZERO);
746 if (tempuser == NULL)
747 return (ENOMEM);
748 PROC_LOCK(p);
749 fill_kinfo_proc(p, &p->p_uarea->u_kproc);
750 PROC_UNLOCK(p);
751 bcopy(p->p_uarea, tempuser, sizeof(struct user));
752 bcopy(td->td_frame,
753 tempuser + ctob(uarea_pages) +
754 ((caddr_t)td->td_frame - (caddr_t)td->td_kstack),
755 sizeof(struct trapframe));
756 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser,
757 ctob(uarea_pages + kstack_pages),
758 (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED,
759 (int *)NULL, td);
760 free(tempuser, M_TEMP);
761 if (error == 0)
762 error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
763 (int)ctob(vm->vm_dsize),
764 (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE,
765 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
766 if (error == 0)
767 error = vn_rdwr_inchunks(UIO_WRITE, vp,
768 (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)),
769 round_page(ctob(vm->vm_ssize)),
770 (off_t)ctob(uarea_pages + kstack_pages) +
771 ctob(vm->vm_dsize), UIO_USERSPACE,
772 IO_UNIT | IO_DIRECT, cred, NOCRED, NULL, td);
773 return (error);
774 }
775 /*
776 * If a linux binary is exec'ing something, try this image activator
777 * first. We override standard shell script execution in order to
778 * be able to modify the interpreter path. We only do this if a linux
779 * binary is doing the exec, so we do not create an EXEC module for it.
780 */
781 static int exec_linux_imgact_try(struct image_params *iparams);
782
783 static int
784 exec_linux_imgact_try(struct image_params *imgp)
785 {
786 const char *head = (const char *)imgp->image_header;
787 int error = -1;
788
789 /*
790 * The interpreter for shell scripts run from a linux binary needs
791 * to be located in /compat/linux if possible in order to recursively
792 * maintain linux path emulation.
793 */
794 if (((const short *)head)[0] == SHELLMAGIC) {
795 /*
796 * Run our normal shell image activator. If it succeeds attempt
797 * to use the alternate path for the interpreter. If an alternate
798 * path is found, use our stringspace to store it.
799 */
800 if ((error = exec_shell_imgact(imgp)) == 0) {
801 char *rpath = NULL;
802
803 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
804 imgp->interpreter_name, &rpath, 0);
805 if (rpath != imgp->interpreter_name) {
806 int len = strlen(rpath) + 1;
807
808 if (len <= MAXSHELLCMDLEN) {
809 memcpy(imgp->interpreter_name, rpath, len);
810 }
811 free(rpath, M_TEMP);
812 }
813 }
814 }
815 return(error);
816 }
817
818 /*
819 * exec_setregs may initialize some registers differently than Linux
820 * does, thus potentially confusing Linux binaries. If necessary, we
821 * override the exec_setregs default(s) here.
822 */
823 static void
824 exec_linux_setregs(struct thread *td, u_long entry,
825 u_long stack, u_long ps_strings)
826 {
827 struct pcb *pcb = td->td_pcb;
828
829 exec_setregs(td, entry, stack, ps_strings);
830
831 /* Linux sets %gs to 0, we default to _udatasel */
832 pcb->pcb_gs = 0; load_gs(0);
833 }
834
835 struct sysentvec linux_sysvec = {
836 LINUX_SYS_MAXSYSCALL,
837 linux_sysent,
838 0xff,
839 LINUX_SIGTBLSZ,
840 bsd_to_linux_signal,
841 ELAST + 1,
842 bsd_to_linux_errno,
843 translate_traps,
844 linux_fixup,
845 linux_sendsig,
846 linux_sigcode,
847 &linux_szsigcode,
848 linux_prepsyscall,
849 "Linux a.out",
850 linux_aout_coredump,
851 exec_linux_imgact_try,
852 LINUX_MINSIGSTKSZ,
853 PAGE_SIZE,
854 VM_MIN_ADDRESS,
855 VM_MAXUSER_ADDRESS,
856 USRSTACK,
857 PS_STRINGS,
858 VM_PROT_ALL,
859 exec_copyout_strings,
860 exec_linux_setregs,
861 NULL
862 };
863
864 struct sysentvec elf_linux_sysvec = {
865 LINUX_SYS_MAXSYSCALL,
866 linux_sysent,
867 0xff,
868 LINUX_SIGTBLSZ,
869 bsd_to_linux_signal,
870 ELAST + 1,
871 bsd_to_linux_errno,
872 translate_traps,
873 elf_linux_fixup,
874 linux_sendsig,
875 linux_sigcode,
876 &linux_szsigcode,
877 linux_prepsyscall,
878 "Linux ELF",
879 elf32_coredump,
880 exec_linux_imgact_try,
881 LINUX_MINSIGSTKSZ,
882 PAGE_SIZE,
883 VM_MIN_ADDRESS,
884 VM_MAXUSER_ADDRESS,
885 USRSTACK,
886 PS_STRINGS,
887 VM_PROT_ALL,
888 exec_copyout_strings,
889 exec_linux_setregs,
890 NULL
891 };
892
893 static Elf32_Brandinfo linux_brand = {
894 ELFOSABI_LINUX,
895 EM_386,
896 "Linux",
897 "/compat/linux",
898 "/lib/ld-linux.so.1",
899 &elf_linux_sysvec,
900 NULL,
901 };
902
903 static Elf32_Brandinfo linux_glibc2brand = {
904 ELFOSABI_LINUX,
905 EM_386,
906 "Linux",
907 "/compat/linux",
908 "/lib/ld-linux.so.2",
909 &elf_linux_sysvec,
910 NULL,
911 };
912
913 Elf32_Brandinfo *linux_brandlist[] = {
914 &linux_brand,
915 &linux_glibc2brand,
916 NULL
917 };
918
919 static int
920 linux_elf_modevent(module_t mod, int type, void *data)
921 {
922 Elf32_Brandinfo **brandinfo;
923 int error;
924 struct linux_ioctl_handler **lihp;
925
926 error = 0;
927
928 switch(type) {
929 case MOD_LOAD:
930 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
931 ++brandinfo)
932 if (elf32_insert_brand_entry(*brandinfo) < 0)
933 error = EINVAL;
934 if (error == 0) {
935 SET_FOREACH(lihp, linux_ioctl_handler_set)
936 linux_ioctl_register_handler(*lihp);
937 if (bootverbose)
938 printf("Linux ELF exec handler installed\n");
939 } else
940 printf("cannot insert Linux ELF brand handler\n");
941 break;
942 case MOD_UNLOAD:
943 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
944 ++brandinfo)
945 if (elf32_brand_inuse(*brandinfo))
946 error = EBUSY;
947 if (error == 0) {
948 for (brandinfo = &linux_brandlist[0];
949 *brandinfo != NULL; ++brandinfo)
950 if (elf32_remove_brand_entry(*brandinfo) < 0)
951 error = EINVAL;
952 }
953 if (error == 0) {
954 SET_FOREACH(lihp, linux_ioctl_handler_set)
955 linux_ioctl_unregister_handler(*lihp);
956 if (bootverbose)
957 printf("Linux ELF exec handler removed\n");
958 linux_mib_destroy();
959 } else
960 printf("Could not deinstall ELF interpreter entry\n");
961 break;
962 default:
963 return EOPNOTSUPP;
964 }
965 return error;
966 }
967
968 static moduledata_t linux_elf_mod = {
969 "linuxelf",
970 linux_elf_modevent,
971 0
972 };
973
974 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Cache object: c0a9b9ced40846cda0851a52218dc362
|