[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/linux32/linux32_sysvec.c

Version: -  FREEBSD  -  FREEBSD7  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  OPENSOLARIS  -  minix-3-1-1  -  TRUSTEDBSD-SEBSD  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

  1 /*-
  2  * Copyright (c) 2004 Tim J. Robbins
  3  * Copyright (c) 2003 Peter Wemm
  4  * Copyright (c) 2002 Doug Rabson
  5  * Copyright (c) 1998-1999 Andrew Gallatin
  6  * Copyright (c) 1994-1996 Søren Schmidt
  7  * All rights reserved.
  8  *
  9  * Redistribution and use in source and binary forms, with or without
 10  * modification, are permitted provided that the following conditions
 11  * are met:
 12  * 1. Redistributions of source code must retain the above copyright
 13  *    notice, this list of conditions and the following disclaimer
 14  *    in this position and unchanged.
 15  * 2. Redistributions in binary form must reproduce the above copyright
 16  *    notice, this list of conditions and the following disclaimer in the
 17  *    documentation and/or other materials provided with the distribution.
 18  * 3. The name of the author may not be used to endorse or promote products
 19  *    derived from this software without specific prior written permission
 20  *
 21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #include <sys/cdefs.h>
 34 __FBSDID("$FreeBSD: src/sys/amd64/linux32/linux32_sysvec.c,v 1.39 2008/12/17 06:11:42 imp Exp $");
 35 #include "opt_compat.h"
 36 
 37 #ifndef COMPAT_IA32
 38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
 39 #endif
 40 
 41 #define __ELF_WORD_SIZE 32
 42 
 43 #include <sys/param.h>
 44 #include <sys/systm.h>
 45 #include <sys/exec.h>
 46 #include <sys/fcntl.h>
 47 #include <sys/imgact.h>
 48 #include <sys/imgact_elf.h>
 49 #include <sys/kernel.h>
 50 #include <sys/lock.h>
 51 #include <sys/malloc.h>
 52 #include <sys/module.h>
 53 #include <sys/mutex.h>
 54 #include <sys/proc.h>
 55 #include <sys/resourcevar.h>
 56 #include <sys/signalvar.h>
 57 #include <sys/sysctl.h>
 58 #include <sys/syscallsubr.h>
 59 #include <sys/sysent.h>
 60 #include <sys/sysproto.h>
 61 #include <sys/vnode.h>
 62 #include <sys/eventhandler.h>
 63 
 64 #include <vm/vm.h>
 65 #include <vm/pmap.h>
 66 #include <vm/vm_extern.h>
 67 #include <vm/vm_map.h>
 68 #include <vm/vm_object.h>
 69 #include <vm/vm_page.h>
 70 #include <vm/vm_param.h>
 71 
 72 #include <machine/cpu.h>
 73 #include <machine/md_var.h>
 74 #include <machine/pcb.h>
 75 #include <machine/specialreg.h>
 76 
 77 #include <amd64/linux32/linux.h>
 78 #include <amd64/linux32/linux32_proto.h>
 79 #include <compat/linux/linux_emul.h>
 80 #include <compat/linux/linux_mib.h>
 81 #include <compat/linux/linux_signal.h>
 82 #include <compat/linux/linux_util.h>
 83 
 84 MODULE_VERSION(linux, 1);
 85 
 86 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
 87 
 88 #define AUXARGS_ENTRY_32(pos, id, val)  \
 89         do {                            \
 90                 suword32(pos++, id);    \
 91                 suword32(pos++, val);   \
 92         } while (0)
 93 
 94 #if BYTE_ORDER == LITTLE_ENDIAN
 95 #define SHELLMAGIC      0x2123 /* #! */
 96 #else
 97 #define SHELLMAGIC      0x2321
 98 #endif
 99 
100 /*
101  * Allow the sendsig functions to use the ldebug() facility
102  * even though they are not syscalls themselves. Map them
103  * to syscall 0. This is slightly less bogus than using
104  * ldebug(sigreturn).
105  */
106 #define LINUX_SYS_linux_rt_sendsig      0
107 #define LINUX_SYS_linux_sendsig         0
108 
109 extern char linux_sigcode[];
110 extern int linux_szsigcode;
111 
112 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
113 
114 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
115 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
116 
117 static int      elf_linux_fixup(register_t **stack_base,
118                     struct image_params *iparams);
119 static register_t *linux_copyout_strings(struct image_params *imgp);
120 static void     linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
121                     caddr_t *params);
122 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
123 static void     exec_linux_setregs(struct thread *td, u_long entry,
124                                    u_long stack, u_long ps_strings);
125 static void     linux32_fixlimit(struct rlimit *rl, int which);
126 
127 extern LIST_HEAD(futex_list, futex) futex_list;
128 extern struct sx futex_sx;
129 
130 static eventhandler_tag linux_exit_tag;
131 static eventhandler_tag linux_schedtail_tag;
132 static eventhandler_tag linux_exec_tag;
133 
134 /*
135  * Linux syscalls return negative errno's, we do positive and map them
136  * Reference:
137  *   FreeBSD: src/sys/sys/errno.h
138  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
139  *            linux-2.6.17.8/include/asm-generic/errno.h
140  */
141 static int bsd_to_linux_errno[ELAST + 1] = {
142         -0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
143         -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
144         -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
145         -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
146         -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
147         -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
148         -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
149         -116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
150           -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
151          -72, -67, -71
152 };
153 
154 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
155         LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
156         LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
157         LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
158         LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
159         LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
160         LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
161         LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
162         0, LINUX_SIGUSR1, LINUX_SIGUSR2
163 };
164 
165 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
166         SIGHUP, SIGINT, SIGQUIT, SIGILL,
167         SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
168         SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
169         SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
170         SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
171         SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
172         SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
173         SIGIO, SIGURG, SIGSYS
174 };
175 
176 #define LINUX_T_UNKNOWN  255
177 static int _bsd_to_linux_trapcode[] = {
178         LINUX_T_UNKNOWN,        /* 0 */
179         6,                      /* 1  T_PRIVINFLT */
180         LINUX_T_UNKNOWN,        /* 2 */
181         3,                      /* 3  T_BPTFLT */
182         LINUX_T_UNKNOWN,        /* 4 */
183         LINUX_T_UNKNOWN,        /* 5 */
184         16,                     /* 6  T_ARITHTRAP */
185         254,                    /* 7  T_ASTFLT */
186         LINUX_T_UNKNOWN,        /* 8 */
187         13,                     /* 9  T_PROTFLT */
188         1,                      /* 10 T_TRCTRAP */
189         LINUX_T_UNKNOWN,        /* 11 */
190         14,                     /* 12 T_PAGEFLT */
191         LINUX_T_UNKNOWN,        /* 13 */
192         17,                     /* 14 T_ALIGNFLT */
193         LINUX_T_UNKNOWN,        /* 15 */
194         LINUX_T_UNKNOWN,        /* 16 */
195         LINUX_T_UNKNOWN,        /* 17 */
196         0,                      /* 18 T_DIVIDE */
197         2,                      /* 19 T_NMI */
198         4,                      /* 20 T_OFLOW */
199         5,                      /* 21 T_BOUND */
200         7,                      /* 22 T_DNA */
201         8,                      /* 23 T_DOUBLEFLT */
202         9,                      /* 24 T_FPOPFLT */
203         10,                     /* 25 T_TSSFLT */
204         11,                     /* 26 T_SEGNPFLT */
205         12,                     /* 27 T_STKFLT */
206         18,                     /* 28 T_MCHK */
207         19,                     /* 29 T_XMMFLT */
208         15                      /* 30 T_RESERVED */
209 };
210 #define bsd_to_linux_trapcode(code) \
211     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
212      _bsd_to_linux_trapcode[(code)]: \
213      LINUX_T_UNKNOWN)
214 
215 struct linux32_ps_strings {
216         u_int32_t ps_argvstr;   /* first of 0 or more argument strings */
217         u_int ps_nargvstr;      /* the number of argument strings */
218         u_int32_t ps_envstr;    /* first of 0 or more environment strings */
219         u_int ps_nenvstr;       /* the number of environment strings */
220 };
221 
222 /*
223  * If FreeBSD & Linux have a difference of opinion about what a trap
224  * means, deal with it here.
225  *
226  * MPSAFE
227  */
228 static int
229 translate_traps(int signal, int trap_code)
230 {
231         if (signal != SIGBUS)
232                 return signal;
233         switch (trap_code) {
234         case T_PROTFLT:
235         case T_TSSFLT:
236         case T_DOUBLEFLT:
237         case T_PAGEFLT:
238                 return SIGSEGV;
239         default:
240                 return signal;
241         }
242 }
243 
244 static int
245 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
246 {
247         Elf32_Auxargs *args;
248         Elf32_Addr *base;
249         Elf32_Addr *pos;
250 
251         KASSERT(curthread->td_proc == imgp->proc,
252             ("unsafe elf_linux_fixup(), should be curproc"));
253         base = (Elf32_Addr *)*stack_base;
254         args = (Elf32_Auxargs *)imgp->auxargs;
255         pos = base + (imgp->args->argc + imgp->args->envc + 2);
256 
257         if (args->execfd != -1)
258                 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
259         AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
260         AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
261         AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
262         AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
263         AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
264         AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
265         AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
266         AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
267         AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
268         AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
269         AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
270         AUXARGS_ENTRY_32(pos, AT_NULL, 0);
271 
272         free(imgp->auxargs, M_TEMP);
273         imgp->auxargs = NULL;
274 
275         base--;
276         suword32(base, (uint32_t)imgp->args->argc);
277         *stack_base = (register_t *)base;
278         return 0;
279 }
280 
281 extern int _ucodesel, _ucode32sel, _udatasel;
282 extern unsigned long linux_sznonrtsigcode;
283 
284 static void
285 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
286 {
287         struct thread *td = curthread;
288         struct proc *p = td->td_proc;
289         struct sigacts *psp;
290         struct trapframe *regs;
291         struct l_rt_sigframe *fp, frame;
292         int oonstack;
293         int sig;
294         int code;
295         
296         sig = ksi->ksi_signo;
297         code = ksi->ksi_code;
298         PROC_LOCK_ASSERT(p, MA_OWNED);
299         psp = p->p_sigacts;
300         mtx_assert(&psp->ps_mtx, MA_OWNED);
301         regs = td->td_frame;
302         oonstack = sigonstack(regs->tf_rsp);
303 
304 #ifdef DEBUG
305         if (ldebug(rt_sendsig))
306                 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
307                     catcher, sig, (void*)mask, code);
308 #endif
309         /*
310          * Allocate space for the signal handler context.
311          */
312         if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
313             SIGISMEMBER(psp->ps_sigonstack, sig)) {
314                 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
315                     td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
316         } else
317                 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
318         mtx_unlock(&psp->ps_mtx);
319 
320         /*
321          * Build the argument list for the signal handler.
322          */
323         if (p->p_sysent->sv_sigtbl)
324                 if (sig <= p->p_sysent->sv_sigsize)
325                         sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
326 
327         bzero(&frame, sizeof(frame));
328 
329         frame.sf_handler = PTROUT(catcher);
330         frame.sf_sig = sig;
331         frame.sf_siginfo = PTROUT(&fp->sf_si);
332         frame.sf_ucontext = PTROUT(&fp->sf_sc);
333 
334         /* Fill in POSIX parts */
335         ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
336 
337         /*
338          * Build the signal context to be used by sigreturn.
339          */
340         frame.sf_sc.uc_flags = 0;               /* XXX ??? */
341         frame.sf_sc.uc_link = 0;                /* XXX ??? */
342 
343         frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
344         frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
345         frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
346             ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
347         PROC_UNLOCK(p);
348 
349         bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
350 
351         frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
352         frame.sf_sc.uc_mcontext.sc_gs     = rgs();
353         frame.sf_sc.uc_mcontext.sc_fs     = rfs();
354         __asm __volatile("movl %%es,%0" :
355             "=rm" (frame.sf_sc.uc_mcontext.sc_es));
356         __asm __volatile("movl %%ds,%0" :
357             "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
358         frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
359         frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
360         frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
361         frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
362         frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
363         frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
364         frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
365         frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
366         frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
367         frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
368         frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
369         frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
370         frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
371         frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
372         frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
373 
374 #ifdef DEBUG
375         if (ldebug(rt_sendsig))
376                 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
377                     frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
378                     td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
379 #endif
380 
381         if (copyout(&frame, fp, sizeof(frame)) != 0) {
382                 /*
383                  * Process has trashed its stack; give it an illegal
384                  * instruction to halt it in its tracks.
385                  */
386 #ifdef DEBUG
387                 if (ldebug(rt_sendsig))
388                         printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
389                             fp, oonstack);
390 #endif
391                 PROC_LOCK(p);
392                 sigexit(td, SIGILL);
393         }
394 
395         /*
396          * Build context to run handler in.
397          */
398         regs->tf_rsp = PTROUT(fp);
399         regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
400             linux_sznonrtsigcode;
401         regs->tf_rflags &= ~(PSL_T | PSL_D);
402         regs->tf_cs = _ucode32sel;
403         regs->tf_ss = _udatasel;
404         load_ds(_udatasel);
405         td->td_pcb->pcb_ds = _udatasel;
406         load_es(_udatasel);
407         td->td_pcb->pcb_es = _udatasel;
408         /* leave user %fs and %gs untouched */
409         PROC_LOCK(p);
410         mtx_lock(&psp->ps_mtx);
411 }
412 
413 
414 /*
415  * Send an interrupt to process.
416  *
417  * Stack is set up to allow sigcode stored
418  * in u. to call routine, followed by kcall
419  * to sigreturn routine below.  After sigreturn
420  * resets the signal mask, the stack, and the
421  * frame pointer, it returns to the user
422  * specified pc, psl.
423  */
424 static void
425 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
426 {
427         struct thread *td = curthread;
428         struct proc *p = td->td_proc;
429         struct sigacts *psp;
430         struct trapframe *regs;
431         struct l_sigframe *fp, frame;
432         l_sigset_t lmask;
433         int oonstack, i;
434         int sig, code;
435 
436         sig = ksi->ksi_signo;
437         code = ksi->ksi_code;
438         PROC_LOCK_ASSERT(p, MA_OWNED);
439         psp = p->p_sigacts;
440         mtx_assert(&psp->ps_mtx, MA_OWNED);
441         if (SIGISMEMBER(psp->ps_siginfo, sig)) {
442                 /* Signal handler installed with SA_SIGINFO. */
443                 linux_rt_sendsig(catcher, ksi, mask);
444                 return;
445         }
446 
447         regs = td->td_frame;
448         oonstack = sigonstack(regs->tf_rsp);
449 
450 #ifdef DEBUG
451         if (ldebug(sendsig))
452                 printf(ARGS(sendsig, "%p, %d, %p, %u"),
453                     catcher, sig, (void*)mask, code);
454 #endif
455 
456         /*
457          * Allocate space for the signal handler context.
458          */
459         if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
460             SIGISMEMBER(psp->ps_sigonstack, sig)) {
461                 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
462                     td->td_sigstk.ss_size - sizeof(struct l_sigframe));
463         } else
464                 fp = (struct l_sigframe *)regs->tf_rsp - 1;
465         mtx_unlock(&psp->ps_mtx);
466         PROC_UNLOCK(p);
467 
468         /*
469          * Build the argument list for the signal handler.
470          */
471         if (p->p_sysent->sv_sigtbl)
472                 if (sig <= p->p_sysent->sv_sigsize)
473                         sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
474 
475         bzero(&frame, sizeof(frame));
476 
477         frame.sf_handler = PTROUT(catcher);
478         frame.sf_sig = sig;
479 
480         bsd_to_linux_sigset(mask, &lmask);
481 
482         /*
483          * Build the signal context to be used by sigreturn.
484          */
485         frame.sf_sc.sc_mask   = lmask.__bits[0];
486         frame.sf_sc.sc_gs     = rgs();
487         frame.sf_sc.sc_fs     = rfs();
488         __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
489         __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
490         frame.sf_sc.sc_edi    = regs->tf_rdi;
491         frame.sf_sc.sc_esi    = regs->tf_rsi;
492         frame.sf_sc.sc_ebp    = regs->tf_rbp;
493         frame.sf_sc.sc_ebx    = regs->tf_rbx;
494         frame.sf_sc.sc_edx    = regs->tf_rdx;
495         frame.sf_sc.sc_ecx    = regs->tf_rcx;
496         frame.sf_sc.sc_eax    = regs->tf_rax;
497         frame.sf_sc.sc_eip    = regs->tf_rip;
498         frame.sf_sc.sc_cs     = regs->tf_cs;
499         frame.sf_sc.sc_eflags = regs->tf_rflags;
500         frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
501         frame.sf_sc.sc_ss     = regs->tf_ss;
502         frame.sf_sc.sc_err    = regs->tf_err;
503         frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
504         frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
505 
506         for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
507                 frame.sf_extramask[i] = lmask.__bits[i+1];
508 
509         if (copyout(&frame, fp, sizeof(frame)) != 0) {
510                 /*
511                  * Process has trashed its stack; give it an illegal
512                  * instruction to halt it in its tracks.
513                  */
514                 PROC_LOCK(p);
515                 sigexit(td, SIGILL);
516         }
517 
518         /*
519          * Build context to run handler in.
520          */
521         regs->tf_rsp = PTROUT(fp);
522         regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
523         regs->tf_rflags &= ~(PSL_T | PSL_D);
524         regs->tf_cs = _ucode32sel;
525         regs->tf_ss = _udatasel;
526         load_ds(_udatasel);
527         td->td_pcb->pcb_ds = _udatasel;
528         load_es(_udatasel);
529         td->td_pcb->pcb_es = _udatasel;
530         /* leave user %fs and %gs untouched */
531         PROC_LOCK(p);
532         mtx_lock(&psp->ps_mtx);
533 }
534 
535 /*
536  * System call to cleanup state after a signal
537  * has been taken.  Reset signal mask and
538  * stack state from context left by sendsig (above).
539  * Return to previous pc and psl as specified by
540  * context left by sendsig. Check carefully to
541  * make sure that the user has not modified the
542  * psl to gain improper privileges or to cause
543  * a machine fault.
544  */
545 int
546 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
547 {
548         struct proc *p = td->td_proc;
549         struct l_sigframe frame;
550         struct trapframe *regs;
551         l_sigset_t lmask;
552         int eflags, i;
553         ksiginfo_t ksi;
554 
555         regs = td->td_frame;
556 
557 #ifdef DEBUG
558         if (ldebug(sigreturn))
559                 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
560 #endif
561         /*
562          * The trampoline code hands us the sigframe.
563          * It is unsafe to keep track of it ourselves, in the event that a
564          * program jumps out of a signal handler.
565          */
566         if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
567                 return (EFAULT);
568 
569         /*
570          * Check for security violations.
571          */
572 #define EFLAGS_SECURE(ef, oef)  ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
573         eflags = frame.sf_sc.sc_eflags;
574         /*
575          * XXX do allow users to change the privileged flag PSL_RF.  The
576          * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
577          * sometimes set it there too.  tf_eflags is kept in the signal
578          * context during signal handling and there is no other place
579          * to remember it, so the PSL_RF bit may be corrupted by the
580          * signal handler without us knowing.  Corruption of the PSL_RF
581          * bit at worst causes one more or one less debugger trap, so
582          * allowing it is fairly harmless.
583          */
584         if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
585                 return(EINVAL);
586 
587         /*
588          * Don't allow users to load a valid privileged %cs.  Let the
589          * hardware check for invalid selectors, excess privilege in
590          * other selectors, invalid %eip's and invalid %esp's.
591          */
592 #define CS_SECURE(cs)   (ISPL(cs) == SEL_UPL)
593         if (!CS_SECURE(frame.sf_sc.sc_cs)) {
594                 ksiginfo_init_trap(&ksi);
595                 ksi.ksi_signo = SIGBUS;
596                 ksi.ksi_code = BUS_OBJERR;
597                 ksi.ksi_trapno = T_PROTFLT;
598                 ksi.ksi_addr = (void *)regs->tf_rip;
599                 trapsignal(td, &ksi);
600                 return(EINVAL);
601         }
602 
603         lmask.__bits[0] = frame.sf_sc.sc_mask;
604         for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
605                 lmask.__bits[i+1] = frame.sf_extramask[i];
606         PROC_LOCK(p);
607         linux_to_bsd_sigset(&lmask, &td->td_sigmask);
608         SIG_CANTMASK(td->td_sigmask);
609         signotify(td);
610         PROC_UNLOCK(p);
611 
612         /*
613          * Restore signal context.
614          */
615         /* Selectors were restored by the trampoline. */
616         regs->tf_rdi    = frame.sf_sc.sc_edi;
617         regs->tf_rsi    = frame.sf_sc.sc_esi;
618         regs->tf_rbp    = frame.sf_sc.sc_ebp;
619         regs->tf_rbx    = frame.sf_sc.sc_ebx;
620         regs->tf_rdx    = frame.sf_sc.sc_edx;
621         regs->tf_rcx    = frame.sf_sc.sc_ecx;
622         regs->tf_rax    = frame.sf_sc.sc_eax;
623         regs->tf_rip    = frame.sf_sc.sc_eip;
624         regs->tf_cs     = frame.sf_sc.sc_cs;
625         regs->tf_rflags = eflags;
626         regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
627         regs->tf_ss     = frame.sf_sc.sc_ss;
628 
629         return (EJUSTRETURN);
630 }
631 
632 /*
633  * System call to cleanup state after a signal
634  * has been taken.  Reset signal mask and
635  * stack state from context left by rt_sendsig (above).
636  * Return to previous pc and psl as specified by
637  * context left by sendsig. Check carefully to
638  * make sure that the user has not modified the
639  * psl to gain improper privileges or to cause
640  * a machine fault.
641  */
642 int
643 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
644 {
645         struct proc *p = td->td_proc;
646         struct l_ucontext uc;
647         struct l_sigcontext *context;
648         l_stack_t *lss;
649         stack_t ss;
650         struct trapframe *regs;
651         int eflags;
652         ksiginfo_t ksi;
653 
654         regs = td->td_frame;
655 
656 #ifdef DEBUG
657         if (ldebug(rt_sigreturn))
658                 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
659 #endif
660         /*
661          * The trampoline code hands us the ucontext.
662          * It is unsafe to keep track of it ourselves, in the event that a
663          * program jumps out of a signal handler.
664          */
665         if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
666                 return (EFAULT);
667 
668         context = &uc.uc_mcontext;
669 
670         /*
671          * Check for security violations.
672          */
673 #define EFLAGS_SECURE(ef, oef)  ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
674         eflags = context->sc_eflags;
675         /*
676          * XXX do allow users to change the privileged flag PSL_RF.  The
677          * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
678          * sometimes set it there too.  tf_eflags is kept in the signal
679          * context during signal handling and there is no other place
680          * to remember it, so the PSL_RF bit may be corrupted by the
681          * signal handler without us knowing.  Corruption of the PSL_RF
682          * bit at worst causes one more or one less debugger trap, so
683          * allowing it is fairly harmless.
684          */
685         if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
686                 return(EINVAL);
687 
688         /*
689          * Don't allow users to load a valid privileged %cs.  Let the
690          * hardware check for invalid selectors, excess privilege in
691          * other selectors, invalid %eip's and invalid %esp's.
692          */
693 #define CS_SECURE(cs)   (ISPL(cs) == SEL_UPL)
694         if (!CS_SECURE(context->sc_cs)) {
695                 ksiginfo_init_trap(&ksi);
696                 ksi.ksi_signo = SIGBUS;
697                 ksi.ksi_code = BUS_OBJERR;
698                 ksi.ksi_trapno = T_PROTFLT;
699                 ksi.ksi_addr = (void *)regs->tf_rip;
700                 trapsignal(td, &ksi);
701                 return(EINVAL);
702         }
703 
704         PROC_LOCK(p);
705         linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
706         SIG_CANTMASK(td->td_sigmask);
707         signotify(td);
708         PROC_UNLOCK(p);
709 
710         /*
711          * Restore signal context
712          */
713         /* Selectors were restored by the trampoline. */
714         regs->tf_rdi    = context->sc_edi;
715         regs->tf_rsi    = context->sc_esi;
716         regs->tf_rbp    = context->sc_ebp;
717         regs->tf_rbx    = context->sc_ebx;
718         regs->tf_rdx    = context->sc_edx;
719         regs->tf_rcx    = context->sc_ecx;
720         regs->tf_rax    = context->sc_eax;
721         regs->tf_rip    = context->sc_eip;
722         regs->tf_cs     = context->sc_cs;
723         regs->tf_rflags = eflags;
724         regs->tf_rsp    = context->sc_esp_at_signal;
725         regs->tf_ss     = context->sc_ss;
726 
727         /*
728          * call sigaltstack & ignore results..
729          */
730         lss = &uc.uc_stack;
731         ss.ss_sp = PTRIN(lss->ss_sp);
732         ss.ss_size = lss->ss_size;
733         ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
734 
735 #ifdef DEBUG
736         if (ldebug(rt_sigreturn))
737                 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
738                     ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
739 #endif
740         (void)kern_sigaltstack(td, &ss, NULL);
741 
742         return (EJUSTRETURN);
743 }
744 
745 /*
746  * MPSAFE
747  */
748 static void
749 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
750 {
751         args[0] = tf->tf_rbx;
752         args[1] = tf->tf_rcx;
753         args[2] = tf->tf_rdx;
754         args[3] = tf->tf_rsi;
755         args[4] = tf->tf_rdi;
756         args[5] = tf->tf_rbp;   /* Unconfirmed */
757         *params = NULL;         /* no copyin */
758 }
759 
760 /*
761  * If a linux binary is exec'ing something, try this image activator
762  * first.  We override standard shell script execution in order to
763  * be able to modify the interpreter path.  We only do this if a linux
764  * binary is doing the exec, so we do not create an EXEC module for it.
765  */
766 static int      exec_linux_imgact_try(struct image_params *iparams);
767 
768 static int
769 exec_linux_imgact_try(struct image_params *imgp)
770 {
771     const char *head = (const char *)imgp->image_header;
772     char *rpath;
773     int error = -1, len;
774 
775     /*
776      * The interpreter for shell scripts run from a linux binary needs
777      * to be located in /compat/linux if possible in order to recursively
778      * maintain linux path emulation.
779      */
780     if (((const short *)head)[0] == SHELLMAGIC) {
781             /*
782              * Run our normal shell image activator.  If it succeeds attempt
783              * to use the alternate path for the interpreter.  If an alternate
784              * path is found, use our stringspace to store it.
785              */
786             if ((error = exec_shell_imgact(imgp)) == 0) {
787                     linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
788                         imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
789                     if (rpath != NULL) {
790                             len = strlen(rpath) + 1;
791 
792                             if (len <= MAXSHELLCMDLEN) {
793                                     memcpy(imgp->interpreter_name, rpath, len);
794                             }
795                             free(rpath, M_TEMP);
796                     }
797             }
798     }
799     return(error);
800 }
801 
802 /*
803  * Clear registers on exec
804  * XXX copied from ia32_signal.c.
805  */
806 static void
807 exec_linux_setregs(td, entry, stack, ps_strings)
808         struct thread *td;
809         u_long entry;
810         u_long stack;
811         u_long ps_strings;
812 {
813         struct trapframe *regs = td->td_frame;
814         struct pcb *pcb = td->td_pcb;
815 
816         critical_enter();
817         wrmsr(MSR_FSBASE, 0);
818         wrmsr(MSR_KGSBASE, 0);  /* User value while we're in the kernel */
819         pcb->pcb_fsbase = 0;
820         pcb->pcb_gsbase = 0;
821         critical_exit();
822         load_ds(_udatasel);
823         load_es(_udatasel);
824         load_fs(_udatasel);
825         load_gs(_udatasel);
826         pcb->pcb_ds = _udatasel;
827         pcb->pcb_es = _udatasel;
828         pcb->pcb_fs = _udatasel;
829         pcb->pcb_gs = _udatasel;
830 
831         bzero((char *)regs, sizeof(struct trapframe));
832         regs->tf_rip = entry;
833         regs->tf_rsp = stack;
834         regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
835         regs->tf_ss = _udatasel;
836         regs->tf_cs = _ucode32sel;
837         regs->tf_rbx = ps_strings;
838         load_cr0(rcr0() | CR0_MP | CR0_TS);
839         fpstate_drop(td);
840 
841         /* Return via doreti so that we can change to a different %cs */
842         pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
843         pcb->pcb_flags &= ~PCB_GS32BIT;
844         td->td_retval[1] = 0;
845 }
846 
847 /*
848  * XXX copied from ia32_sysvec.c.
849  */
850 static register_t *
851