1 /*-
2 * Copyright (c) 1994-1996 Søren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: releng/5.0/sys/i386/linux/linux_sysvec.c 106332 2002-11-02 07:41:04Z marcel $
29 */
30
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/sysent.h>
50 #include <sys/sysproto.h>
51 #include <sys/user.h>
52 #include <sys/vnode.h>
53
54 #include <vm/vm.h>
55 #include <vm/vm_param.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_extern.h>
58 #include <sys/exec.h>
59 #include <sys/kernel.h>
60 #include <sys/module.h>
61 #include <machine/cpu.h>
62 #include <machine/md_var.h>
63 #include <sys/mutex.h>
64
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <vm/pmap.h>
68 #include <vm/vm_map.h>
69 #include <vm/vm_object.h>
70
71 #include <i386/linux/linux.h>
72 #include <i386/linux/linux_proto.h>
73 #include <compat/linux/linux_signal.h>
74 #include <compat/linux/linux_util.h>
75
76 MODULE_VERSION(linux, 1);
77 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
78 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
79 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
80
81 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
82
83 #if BYTE_ORDER == LITTLE_ENDIAN
84 #define SHELLMAGIC 0x2123 /* #! */
85 #else
86 #define SHELLMAGIC 0x2321
87 #endif
88
89 /*
90 * Allow the sendsig functions to use the ldebug() facility
91 * even though they are not syscalls themselves. Map them
92 * to syscall 0. This is slightly less bogus than using
93 * ldebug(sigreturn).
94 */
95 #define LINUX_SYS_linux_rt_sendsig 0
96 #define LINUX_SYS_linux_sendsig 0
97
98 extern char linux_sigcode[];
99 extern int linux_szsigcode;
100
101 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
102
103 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
104
105 static int linux_fixup(register_t **stack_base,
106 struct image_params *iparams);
107 static int elf_linux_fixup(register_t **stack_base,
108 struct image_params *iparams);
109 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
110 caddr_t *params);
111 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
112 u_long code);
113
114 /*
115 * Linux syscalls return negative errno's, we do positive and map them
116 */
117 static int bsd_to_linux_errno[ELAST + 1] = {
118 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
119 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
120 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
121 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
122 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
123 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
124 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
125 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
126 -6, -6, -43, -42, -75, -6, -84
127 };
128
129 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
130 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
131 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
132 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
133 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
134 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
135 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
136 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
137 0, LINUX_SIGUSR1, LINUX_SIGUSR2
138 };
139
140 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
141 SIGHUP, SIGINT, SIGQUIT, SIGILL,
142 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
143 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
144 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
145 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
146 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
147 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
148 SIGIO, SIGURG, 0
149 };
150
151 #define LINUX_T_UNKNOWN 255
152 static int _bsd_to_linux_trapcode[] = {
153 LINUX_T_UNKNOWN, /* 0 */
154 6, /* 1 T_PRIVINFLT */
155 LINUX_T_UNKNOWN, /* 2 */
156 3, /* 3 T_BPTFLT */
157 LINUX_T_UNKNOWN, /* 4 */
158 LINUX_T_UNKNOWN, /* 5 */
159 16, /* 6 T_ARITHTRAP */
160 254, /* 7 T_ASTFLT */
161 LINUX_T_UNKNOWN, /* 8 */
162 13, /* 9 T_PROTFLT */
163 1, /* 10 T_TRCTRAP */
164 LINUX_T_UNKNOWN, /* 11 */
165 14, /* 12 T_PAGEFLT */
166 LINUX_T_UNKNOWN, /* 13 */
167 17, /* 14 T_ALIGNFLT */
168 LINUX_T_UNKNOWN, /* 15 */
169 LINUX_T_UNKNOWN, /* 16 */
170 LINUX_T_UNKNOWN, /* 17 */
171 0, /* 18 T_DIVIDE */
172 2, /* 19 T_NMI */
173 4, /* 20 T_OFLOW */
174 5, /* 21 T_BOUND */
175 7, /* 22 T_DNA */
176 8, /* 23 T_DOUBLEFLT */
177 9, /* 24 T_FPOPFLT */
178 10, /* 25 T_TSSFLT */
179 11, /* 26 T_SEGNPFLT */
180 12, /* 27 T_STKFLT */
181 18, /* 28 T_MCHK */
182 19, /* 29 T_XMMFLT */
183 15 /* 30 T_RESERVED */
184 };
185 #define bsd_to_linux_trapcode(code) \
186 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
187 _bsd_to_linux_trapcode[(code)]: \
188 LINUX_T_UNKNOWN)
189
190 /*
191 * If FreeBSD & Linux have a difference of opinion about what a trap
192 * means, deal with it here.
193 *
194 * MPSAFE
195 */
196 static int
197 translate_traps(int signal, int trap_code)
198 {
199 if (signal != SIGBUS)
200 return signal;
201 switch (trap_code) {
202 case T_PROTFLT:
203 case T_TSSFLT:
204 case T_DOUBLEFLT:
205 case T_PAGEFLT:
206 return SIGSEGV;
207 default:
208 return signal;
209 }
210 }
211
212 static int
213 linux_fixup(register_t **stack_base, struct image_params *imgp)
214 {
215 register_t *argv, *envp;
216
217 argv = *stack_base;
218 envp = *stack_base + (imgp->argc + 1);
219 (*stack_base)--;
220 **stack_base = (intptr_t)(void *)envp;
221 (*stack_base)--;
222 **stack_base = (intptr_t)(void *)argv;
223 (*stack_base)--;
224 **stack_base = imgp->argc;
225 return 0;
226 }
227
228 static int
229 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
230 {
231 Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
232 register_t *pos;
233
234 pos = *stack_base + (imgp->argc + imgp->envc + 2);
235
236 if (args->trace)
237 AUXARGS_ENTRY(pos, AT_DEBUG, 1);
238 if (args->execfd != -1)
239 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
240 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
241 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
242 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
243 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
244 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
245 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
246 AUXARGS_ENTRY(pos, AT_BASE, args->base);
247 PROC_LOCK(imgp->proc);
248 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
249 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
250 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
251 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
252 PROC_UNLOCK(imgp->proc);
253 AUXARGS_ENTRY(pos, AT_NULL, 0);
254
255 free(imgp->auxargs, M_TEMP);
256 imgp->auxargs = NULL;
257
258 (*stack_base)--;
259 **stack_base = (long)imgp->argc;
260 return 0;
261 }
262
263 extern int _ucodesel, _udatasel;
264 extern unsigned long linux_sznonrtsigcode;
265
266 static void
267 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
268 {
269 register struct thread *td = curthread;
270 register struct proc *p = td->td_proc;
271 register struct trapframe *regs;
272 struct l_rt_sigframe *fp, frame;
273 int oonstack;
274
275 PROC_LOCK_ASSERT(p, MA_OWNED);
276 regs = td->td_frame;
277 oonstack = sigonstack(regs->tf_esp);
278
279 #ifdef DEBUG
280 if (ldebug(rt_sendsig))
281 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
282 catcher, sig, (void*)mask, code);
283 #endif
284 /*
285 * Allocate space for the signal handler context.
286 */
287 if ((p->p_flag & P_ALTSTACK) && !oonstack &&
288 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
289 fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
290 p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
291 } else
292 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
293 PROC_UNLOCK(p);
294
295 /*
296 * Build the argument list for the signal handler.
297 */
298 if (p->p_sysent->sv_sigtbl)
299 if (sig <= p->p_sysent->sv_sigsize)
300 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
301
302 bzero(&frame, sizeof(frame));
303
304 frame.sf_handler = catcher;
305 frame.sf_sig = sig;
306 frame.sf_siginfo = &fp->sf_si;
307 frame.sf_ucontext = &fp->sf_sc;
308
309 /* Fill in POSIX parts */
310 frame.sf_si.lsi_signo = sig;
311 frame.sf_si.lsi_code = code;
312 frame.sf_si.lsi_addr = (void *)regs->tf_err;
313
314 /*
315 * Build the signal context to be used by sigreturn.
316 */
317 frame.sf_sc.uc_flags = 0; /* XXX ??? */
318 frame.sf_sc.uc_link = NULL; /* XXX ??? */
319
320 PROC_LOCK(p);
321 frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
322 frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
323 frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
324 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
325 PROC_UNLOCK(p);
326
327 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
328
329 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
330 frame.sf_sc.uc_mcontext.sc_gs = rgs();
331 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
332 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
333 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
334 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
335 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
336 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
337 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
338 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
339 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
340 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
341 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
342 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
343 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
344 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
345 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
346 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
347 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
348
349 #ifdef DEBUG
350 if (ldebug(rt_sendsig))
351 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
352 frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
353 p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
354 #endif
355
356 if (copyout(&frame, fp, sizeof(frame)) != 0) {
357 /*
358 * Process has trashed its stack; give it an illegal
359 * instruction to halt it in its tracks.
360 */
361 #ifdef DEBUG
362 if (ldebug(rt_sendsig))
363 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
364 fp, oonstack);
365 #endif
366 PROC_LOCK(p);
367 sigexit(td, SIGILL);
368 }
369
370 /*
371 * Build context to run handler in.
372 */
373 regs->tf_esp = (int)fp;
374 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
375 linux_sznonrtsigcode;
376 regs->tf_eflags &= ~(PSL_T | PSL_VM);
377 regs->tf_cs = _ucodesel;
378 regs->tf_ds = _udatasel;
379 regs->tf_es = _udatasel;
380 regs->tf_fs = _udatasel;
381 regs->tf_ss = _udatasel;
382 PROC_LOCK(p);
383 }
384
385
386 /*
387 * Send an interrupt to process.
388 *
389 * Stack is set up to allow sigcode stored
390 * in u. to call routine, followed by kcall
391 * to sigreturn routine below. After sigreturn
392 * resets the signal mask, the stack, and the
393 * frame pointer, it returns to the user
394 * specified pc, psl.
395 */
396 static void
397 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
398 {
399 register struct thread *td = curthread;
400 register struct proc *p = td->td_proc;
401 register struct trapframe *regs;
402 struct l_sigframe *fp, frame;
403 l_sigset_t lmask;
404 int oonstack, i;
405
406 PROC_LOCK_ASSERT(p, MA_OWNED);
407 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
408 /* Signal handler installed with SA_SIGINFO. */
409 linux_rt_sendsig(catcher, sig, mask, code);
410 return;
411 }
412
413 regs = td->td_frame;
414 oonstack = sigonstack(regs->tf_esp);
415
416 #ifdef DEBUG
417 if (ldebug(sendsig))
418 printf(ARGS(sendsig, "%p, %d, %p, %lu"),
419 catcher, sig, (void*)mask, code);
420 #endif
421
422 /*
423 * Allocate space for the signal handler context.
424 */
425 if ((p->p_flag & P_ALTSTACK) && !oonstack &&
426 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
427 fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
428 p->p_sigstk.ss_size - sizeof(struct l_sigframe));
429 } else
430 fp = (struct l_sigframe *)regs->tf_esp - 1;
431 PROC_UNLOCK(p);
432
433 /*
434 * Build the argument list for the signal handler.
435 */
436 if (p->p_sysent->sv_sigtbl)
437 if (sig <= p->p_sysent->sv_sigsize)
438 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
439
440 bzero(&frame, sizeof(frame));
441
442 frame.sf_handler = catcher;
443 frame.sf_sig = sig;
444
445 bsd_to_linux_sigset(mask, &lmask);
446
447 /*
448 * Build the signal context to be used by sigreturn.
449 */
450 frame.sf_sc.sc_mask = lmask.__bits[0];
451 frame.sf_sc.sc_gs = rgs();
452 frame.sf_sc.sc_fs = regs->tf_fs;
453 frame.sf_sc.sc_es = regs->tf_es;
454 frame.sf_sc.sc_ds = regs->tf_ds;
455 frame.sf_sc.sc_edi = regs->tf_edi;
456 frame.sf_sc.sc_esi = regs->tf_esi;
457 frame.sf_sc.sc_ebp = regs->tf_ebp;
458 frame.sf_sc.sc_ebx = regs->tf_ebx;
459 frame.sf_sc.sc_edx = regs->tf_edx;
460 frame.sf_sc.sc_ecx = regs->tf_ecx;
461 frame.sf_sc.sc_eax = regs->tf_eax;
462 frame.sf_sc.sc_eip = regs->tf_eip;
463 frame.sf_sc.sc_cs = regs->tf_cs;
464 frame.sf_sc.sc_eflags = regs->tf_eflags;
465 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
466 frame.sf_sc.sc_ss = regs->tf_ss;
467 frame.sf_sc.sc_err = regs->tf_err;
468 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
469
470 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
471 frame.sf_extramask[i] = lmask.__bits[i+1];
472
473 if (copyout(&frame, fp, sizeof(frame)) != 0) {
474 /*
475 * Process has trashed its stack; give it an illegal
476 * instruction to halt it in its tracks.
477 */
478 PROC_LOCK(p);
479 sigexit(td, SIGILL);
480 }
481
482 /*
483 * Build context to run handler in.
484 */
485 regs->tf_esp = (int)fp;
486 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
487 regs->tf_eflags &= ~(PSL_T | PSL_VM);
488 regs->tf_cs = _ucodesel;
489 regs->tf_ds = _udatasel;
490 regs->tf_es = _udatasel;
491 regs->tf_fs = _udatasel;
492 regs->tf_ss = _udatasel;
493 PROC_LOCK(p);
494 }
495
496 /*
497 * System call to cleanup state after a signal
498 * has been taken. Reset signal mask and
499 * stack state from context left by sendsig (above).
500 * Return to previous pc and psl as specified by
501 * context left by sendsig. Check carefully to
502 * make sure that the user has not modified the
503 * psl to gain improper privileges or to cause
504 * a machine fault.
505 */
506 int
507 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
508 {
509 struct proc *p = td->td_proc;
510 struct l_sigframe frame;
511 register struct trapframe *regs;
512 l_sigset_t lmask;
513 int eflags, i;
514
515 regs = td->td_frame;
516
517 #ifdef DEBUG
518 if (ldebug(sigreturn))
519 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
520 #endif
521 /*
522 * The trampoline code hands us the sigframe.
523 * It is unsafe to keep track of it ourselves, in the event that a
524 * program jumps out of a signal handler.
525 */
526 if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
527 return (EFAULT);
528
529 /*
530 * Check for security violations.
531 */
532 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
533 eflags = frame.sf_sc.sc_eflags;
534 /*
535 * XXX do allow users to change the privileged flag PSL_RF. The
536 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
537 * sometimes set it there too. tf_eflags is kept in the signal
538 * context during signal handling and there is no other place
539 * to remember it, so the PSL_RF bit may be corrupted by the
540 * signal handler without us knowing. Corruption of the PSL_RF
541 * bit at worst causes one more or one less debugger trap, so
542 * allowing it is fairly harmless.
543 */
544 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
545 return(EINVAL);
546
547 /*
548 * Don't allow users to load a valid privileged %cs. Let the
549 * hardware check for invalid selectors, excess privilege in
550 * other selectors, invalid %eip's and invalid %esp's.
551 */
552 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
553 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
554 trapsignal(p, SIGBUS, T_PROTFLT);
555 return(EINVAL);
556 }
557
558 lmask.__bits[0] = frame.sf_sc.sc_mask;
559 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
560 lmask.__bits[i+1] = frame.sf_extramask[i];
561 PROC_LOCK(p);
562 linux_to_bsd_sigset(&lmask, &p->p_sigmask);
563 SIG_CANTMASK(p->p_sigmask);
564 signotify(p);
565 PROC_UNLOCK(p);
566
567 /*
568 * Restore signal context.
569 */
570 /* %gs was restored by the trampoline. */
571 regs->tf_fs = frame.sf_sc.sc_fs;
572 regs->tf_es = frame.sf_sc.sc_es;
573 regs->tf_ds = frame.sf_sc.sc_ds;
574 regs->tf_edi = frame.sf_sc.sc_edi;
575 regs->tf_esi = frame.sf_sc.sc_esi;
576 regs->tf_ebp = frame.sf_sc.sc_ebp;
577 regs->tf_ebx = frame.sf_sc.sc_ebx;
578 regs->tf_edx = frame.sf_sc.sc_edx;
579 regs->tf_ecx = frame.sf_sc.sc_ecx;
580 regs->tf_eax = frame.sf_sc.sc_eax;
581 regs->tf_eip = frame.sf_sc.sc_eip;
582 regs->tf_cs = frame.sf_sc.sc_cs;
583 regs->tf_eflags = eflags;
584 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
585 regs->tf_ss = frame.sf_sc.sc_ss;
586
587 return (EJUSTRETURN);
588 }
589
590 /*
591 * System call to cleanup state after a signal
592 * has been taken. Reset signal mask and
593 * stack state from context left by rt_sendsig (above).
594 * Return to previous pc and psl as specified by
595 * context left by sendsig. Check carefully to
596 * make sure that the user has not modified the
597 * psl to gain improper privileges or to cause
598 * a machine fault.
599 */
600 int
601 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
602 {
603 struct proc *p = td->td_proc;
604 struct l_ucontext uc;
605 struct l_sigcontext *context;
606 l_stack_t *lss;
607 stack_t ss;
608 register struct trapframe *regs;
609 int eflags;
610
611 regs = td->td_frame;
612
613 #ifdef DEBUG
614 if (ldebug(rt_sigreturn))
615 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
616 #endif
617 /*
618 * The trampoline code hands us the ucontext.
619 * It is unsafe to keep track of it ourselves, in the event that a
620 * program jumps out of a signal handler.
621 */
622 if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
623 return (EFAULT);
624
625 context = &uc.uc_mcontext;
626
627 /*
628 * Check for security violations.
629 */
630 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
631 eflags = context->sc_eflags;
632 /*
633 * XXX do allow users to change the privileged flag PSL_RF. The
634 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
635 * sometimes set it there too. tf_eflags is kept in the signal
636 * context during signal handling and there is no other place
637 * to remember it, so the PSL_RF bit may be corrupted by the
638 * signal handler without us knowing. Corruption of the PSL_RF
639 * bit at worst causes one more or one less debugger trap, so
640 * allowing it is fairly harmless.
641 */
642 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
643 return(EINVAL);
644
645 /*
646 * Don't allow users to load a valid privileged %cs. Let the
647 * hardware check for invalid selectors, excess privilege in
648 * other selectors, invalid %eip's and invalid %esp's.
649 */
650 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
651 if (!CS_SECURE(context->sc_cs)) {
652 trapsignal(p, SIGBUS, T_PROTFLT);
653 return(EINVAL);
654 }
655
656 PROC_LOCK(p);
657 linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
658 SIG_CANTMASK(p->p_sigmask);
659 signotify(p);
660 PROC_UNLOCK(p);
661
662 /*
663 * Restore signal context
664 */
665 /* %gs was restored by the trampoline. */
666 regs->tf_fs = context->sc_fs;
667 regs->tf_es = context->sc_es;
668 regs->tf_ds = context->sc_ds;
669 regs->tf_edi = context->sc_edi;
670 regs->tf_esi = context->sc_esi;
671 regs->tf_ebp = context->sc_ebp;
672 regs->tf_ebx = context->sc_ebx;
673 regs->tf_edx = context->sc_edx;
674 regs->tf_ecx = context->sc_ecx;
675 regs->tf_eax = context->sc_eax;
676 regs->tf_eip = context->sc_eip;
677 regs->tf_cs = context->sc_cs;
678 regs->tf_eflags = eflags;
679 regs->tf_esp = context->sc_esp_at_signal;
680 regs->tf_ss = context->sc_ss;
681
682 /*
683 * call sigaltstack & ignore results..
684 */
685 lss = &uc.uc_stack;
686 ss.ss_sp = lss->ss_sp;
687 ss.ss_size = lss->ss_size;
688 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
689
690 #ifdef DEBUG
691 if (ldebug(rt_sigreturn))
692 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
693 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
694 #endif
695 (void)kern_sigaltstack(td, &ss, NULL);
696
697 return (EJUSTRETURN);
698 }
699
700 /*
701 * MPSAFE
702 */
703 static void
704 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
705 {
706 args[0] = tf->tf_ebx;
707 args[1] = tf->tf_ecx;
708 args[2] = tf->tf_edx;
709 args[3] = tf->tf_esi;
710 args[4] = tf->tf_edi;
711 args[5] = tf->tf_ebp; /* Unconfirmed */
712 *params = NULL; /* no copyin */
713 }
714
715
716
717 /*
718 * Dump core, into a file named as described in the comments for
719 * expand_name(), unless the process was setuid/setgid.
720 */
721 static int
722 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit)
723 {
724 struct proc *p = td->td_proc;
725 struct ucred *cred = td->td_ucred;
726 struct vmspace *vm = p->p_vmspace;
727 char *tempuser;
728 int error;
729
730 if (ctob((uarea_pages + kstack_pages) +
731 vm->vm_dsize + vm->vm_ssize) >= limit)
732 return (EFAULT);
733 tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP,
734 M_WAITOK | M_ZERO);
735 if (tempuser == NULL)
736 return (ENOMEM);
737 PROC_LOCK(p);
738 fill_kinfo_proc(p, &p->p_uarea->u_kproc);
739 PROC_UNLOCK(p);
740 bcopy(p->p_uarea, tempuser, sizeof(struct user));
741 bcopy(td->td_frame,
742 tempuser + ctob(uarea_pages) +
743 ((caddr_t)td->td_frame - (caddr_t)td->td_kstack),
744 sizeof(struct trapframe));
745 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser,
746 ctob(uarea_pages + kstack_pages),
747 (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED,
748 (int *)NULL, td);
749 free(tempuser, M_TEMP);
750 if (error == 0)
751 error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
752 (int)ctob(vm->vm_dsize),
753 (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE,
754 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
755 if (error == 0)
756 error = vn_rdwr_inchunks(UIO_WRITE, vp,
757 (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)),
758 round_page(ctob(vm->vm_ssize)),
759 (off_t)ctob(uarea_pages + kstack_pages) +
760 ctob(vm->vm_dsize), UIO_USERSPACE,
761 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
762 return (error);
763 }
764 /*
765 * If a linux binary is exec'ing something, try this image activator
766 * first. We override standard shell script execution in order to
767 * be able to modify the interpreter path. We only do this if a linux
768 * binary is doing the exec, so we do not create an EXEC module for it.
769 */
770 static int exec_linux_imgact_try(struct image_params *iparams);
771
772 static int
773 exec_linux_imgact_try(struct image_params *imgp)
774 {
775 const char *head = (const char *)imgp->image_header;
776 int error = -1;
777
778 /*
779 * The interpreter for shell scripts run from a linux binary needs
780 * to be located in /compat/linux if possible in order to recursively
781 * maintain linux path emulation.
782 */
783 if (((const short *)head)[0] == SHELLMAGIC) {
784 /*
785 * Run our normal shell image activator. If it succeeds attempt
786 * to use the alternate path for the interpreter. If an alternate
787 * path is found, use our stringspace to store it.
788 */
789 if ((error = exec_shell_imgact(imgp)) == 0) {
790 char *rpath = NULL;
791
792 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
793 imgp->interpreter_name, &rpath, 0);
794 if (rpath != imgp->interpreter_name) {
795 int len = strlen(rpath) + 1;
796
797 if (len <= MAXSHELLCMDLEN) {
798 memcpy(imgp->interpreter_name, rpath, len);
799 }
800 free(rpath, M_TEMP);
801 }
802 }
803 }
804 return(error);
805 }
806
807 struct sysentvec linux_sysvec = {
808 LINUX_SYS_MAXSYSCALL,
809 linux_sysent,
810 0xff,
811 LINUX_SIGTBLSZ,
812 bsd_to_linux_signal,
813 ELAST + 1,
814 bsd_to_linux_errno,
815 translate_traps,
816 linux_fixup,
817 linux_sendsig,
818 linux_sigcode,
819 &linux_szsigcode,
820 linux_prepsyscall,
821 "Linux a.out",
822 linux_aout_coredump,
823 exec_linux_imgact_try,
824 LINUX_MINSIGSTKSZ,
825 PAGE_SIZE,
826 VM_MIN_ADDRESS,
827 VM_MAXUSER_ADDRESS,
828 USRSTACK,
829 PS_STRINGS,
830 VM_PROT_ALL,
831 exec_copyout_strings,
832 exec_setregs
833 };
834
835 struct sysentvec elf_linux_sysvec = {
836 LINUX_SYS_MAXSYSCALL,
837 linux_sysent,
838 0xff,
839 LINUX_SIGTBLSZ,
840 bsd_to_linux_signal,
841 ELAST + 1,
842 bsd_to_linux_errno,
843 translate_traps,
844 elf_linux_fixup,
845 linux_sendsig,
846 linux_sigcode,
847 &linux_szsigcode,
848 linux_prepsyscall,
849 "Linux ELF",
850 elf32_coredump,
851 exec_linux_imgact_try,
852 LINUX_MINSIGSTKSZ,
853 PAGE_SIZE,
854 VM_MIN_ADDRESS,
855 VM_MAXUSER_ADDRESS,
856 USRSTACK,
857 PS_STRINGS,
858 VM_PROT_ALL,
859 exec_copyout_strings,
860 exec_setregs
861 };
862
863 static Elf32_Brandinfo linux_brand = {
864 ELFOSABI_LINUX,
865 EM_386,
866 "Linux",
867 "/compat/linux",
868 "/lib/ld-linux.so.1",
869 &elf_linux_sysvec
870 };
871
872 static Elf32_Brandinfo linux_glibc2brand = {
873 ELFOSABI_LINUX,
874 EM_386,
875 "Linux",
876 "/compat/linux",
877 "/lib/ld-linux.so.2",
878 &elf_linux_sysvec
879 };
880
881 Elf32_Brandinfo *linux_brandlist[] = {
882 &linux_brand,
883 &linux_glibc2brand,
884 NULL
885 };
886
887 static int
888 linux_elf_modevent(module_t mod, int type, void *data)
889 {
890 Elf32_Brandinfo **brandinfo;
891 int error;
892 struct linux_ioctl_handler **lihp;
893
894 error = 0;
895
896 switch(type) {
897 case MOD_LOAD:
898 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
899 ++brandinfo)
900 if (elf32_insert_brand_entry(*brandinfo) < 0)
901 error = EINVAL;
902 if (error == 0) {
903 SET_FOREACH(lihp, linux_ioctl_handler_set)
904 linux_ioctl_register_handler(*lihp);
905 if (bootverbose)
906 printf("Linux ELF exec handler installed\n");
907 } else
908 printf("cannot insert Linux ELF brand handler\n");
909 break;
910 case MOD_UNLOAD:
911 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
912 ++brandinfo)
913 if (elf32_brand_inuse(*brandinfo))
914 error = EBUSY;
915 if (error == 0) {
916 for (brandinfo = &linux_brandlist[0];
917 *brandinfo != NULL; ++brandinfo)
918 if (elf32_remove_brand_entry(*brandinfo) < 0)
919 error = EINVAL;
920 }
921 if (error == 0) {
922 SET_FOREACH(lihp, linux_ioctl_handler_set)
923 linux_ioctl_unregister_handler(*lihp);
924 if (bootverbose)
925 printf("Linux ELF exec handler removed\n");
926 } else
927 printf("Could not deinstall ELF interpreter entry\n");
928 break;
929 default:
930 break;
931 }
932 return error;
933 }
934
935 static moduledata_t linux_elf_mod = {
936 "linuxelf",
937 linux_elf_modevent,
938 0
939 };
940
941 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Cache object: 8edb40db774759d4e6947ce848e0081c
|