1 /*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 Søren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD: releng/7.4/sys/amd64/linux32/linux32_sysvec.c 196602 2009-08-27 17:36:59Z bz $");
35 #include "opt_compat.h"
36
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40
41 #define __ELF_WORD_SIZE 32
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/imgact.h>
47 #include <sys/imgact_elf.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/module.h>
52 #include <sys/mutex.h>
53 #include <sys/proc.h>
54 #include <sys/resourcevar.h>
55 #include <sys/signalvar.h>
56 #include <sys/sysctl.h>
57 #include <sys/syscallsubr.h>
58 #include <sys/sysent.h>
59 #include <sys/sysproto.h>
60 #include <sys/vnode.h>
61 #include <sys/eventhandler.h>
62
63 #include <vm/vm.h>
64 #include <vm/pmap.h>
65 #include <vm/vm_extern.h>
66 #include <vm/vm_map.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_param.h>
70
71 #include <machine/cpu.h>
72 #include <machine/md_var.h>
73 #include <machine/pcb.h>
74 #include <machine/specialreg.h>
75
76 #include <amd64/linux32/linux.h>
77 #include <amd64/linux32/linux32_proto.h>
78 #include <compat/linux/linux_futex.h>
79 #include <compat/linux/linux_emul.h>
80 #include <compat/linux/linux_mib.h>
81 #include <compat/linux/linux_misc.h>
82 #include <compat/linux/linux_signal.h>
83 #include <compat/linux/linux_util.h>
84
85 MODULE_VERSION(linux, 1);
86
87 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
88
89 #define AUXARGS_ENTRY_32(pos, id, val) \
90 do { \
91 suword32(pos++, id); \
92 suword32(pos++, val); \
93 } while (0)
94
95 #if BYTE_ORDER == LITTLE_ENDIAN
96 #define SHELLMAGIC 0x2123 /* #! */
97 #else
98 #define SHELLMAGIC 0x2321
99 #endif
100
101 /*
102 * Allow the sendsig functions to use the ldebug() facility
103 * even though they are not syscalls themselves. Map them
104 * to syscall 0. This is slightly less bogus than using
105 * ldebug(sigreturn).
106 */
107 #define LINUX_SYS_linux_rt_sendsig 0
108 #define LINUX_SYS_linux_sendsig 0
109
110 const char *linux_platform = "i686";
111 static int linux_szplatform;
112 extern char linux_sigcode[];
113 extern int linux_szsigcode;
114
115 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
116
117 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
118 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
119
120 static int elf_linux_fixup(register_t **stack_base,
121 struct image_params *iparams);
122 static register_t *linux_copyout_strings(struct image_params *imgp);
123 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
124 caddr_t *params);
125 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
126 static void exec_linux_setregs(struct thread *td, u_long entry,
127 u_long stack, u_long ps_strings);
128 static void linux32_fixlimit(struct rlimit *rl, int which);
129 static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
130
131 static eventhandler_tag linux_exit_tag;
132 static eventhandler_tag linux_schedtail_tag;
133 static eventhandler_tag linux_exec_tag;
134
135 /*
136 * Linux syscalls return negative errno's, we do positive and map them
137 * Reference:
138 * FreeBSD: src/sys/sys/errno.h
139 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
140 * linux-2.6.17.8/include/asm-generic/errno.h
141 */
142 static int bsd_to_linux_errno[ELAST + 1] = {
143 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
144 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
145 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
146 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
147 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
148 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
149 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
150 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
151 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
152 -72, -67, -71
153 };
154
155 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
156 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
157 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
158 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
159 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
160 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
161 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
162 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
163 0, LINUX_SIGUSR1, LINUX_SIGUSR2
164 };
165
166 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
167 SIGHUP, SIGINT, SIGQUIT, SIGILL,
168 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
169 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
170 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
171 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
172 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
173 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
174 SIGIO, SIGURG, SIGSYS
175 };
176
177 #define LINUX_T_UNKNOWN 255
178 static int _bsd_to_linux_trapcode[] = {
179 LINUX_T_UNKNOWN, /* 0 */
180 6, /* 1 T_PRIVINFLT */
181 LINUX_T_UNKNOWN, /* 2 */
182 3, /* 3 T_BPTFLT */
183 LINUX_T_UNKNOWN, /* 4 */
184 LINUX_T_UNKNOWN, /* 5 */
185 16, /* 6 T_ARITHTRAP */
186 254, /* 7 T_ASTFLT */
187 LINUX_T_UNKNOWN, /* 8 */
188 13, /* 9 T_PROTFLT */
189 1, /* 10 T_TRCTRAP */
190 LINUX_T_UNKNOWN, /* 11 */
191 14, /* 12 T_PAGEFLT */
192 LINUX_T_UNKNOWN, /* 13 */
193 17, /* 14 T_ALIGNFLT */
194 LINUX_T_UNKNOWN, /* 15 */
195 LINUX_T_UNKNOWN, /* 16 */
196 LINUX_T_UNKNOWN, /* 17 */
197 0, /* 18 T_DIVIDE */
198 2, /* 19 T_NMI */
199 4, /* 20 T_OFLOW */
200 5, /* 21 T_BOUND */
201 7, /* 22 T_DNA */
202 8, /* 23 T_DOUBLEFLT */
203 9, /* 24 T_FPOPFLT */
204 10, /* 25 T_TSSFLT */
205 11, /* 26 T_SEGNPFLT */
206 12, /* 27 T_STKFLT */
207 18, /* 28 T_MCHK */
208 19, /* 29 T_XMMFLT */
209 15 /* 30 T_RESERVED */
210 };
211 #define bsd_to_linux_trapcode(code) \
212 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
213 _bsd_to_linux_trapcode[(code)]: \
214 LINUX_T_UNKNOWN)
215
216 struct linux32_ps_strings {
217 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
218 u_int ps_nargvstr; /* the number of argument strings */
219 u_int32_t ps_envstr; /* first of 0 or more environment strings */
220 u_int ps_nenvstr; /* the number of environment strings */
221 };
222
223 /*
224 * If FreeBSD & Linux have a difference of opinion about what a trap
225 * means, deal with it here.
226 *
227 * MPSAFE
228 */
229 static int
230 translate_traps(int signal, int trap_code)
231 {
232 if (signal != SIGBUS)
233 return signal;
234 switch (trap_code) {
235 case T_PROTFLT:
236 case T_TSSFLT:
237 case T_DOUBLEFLT:
238 case T_PAGEFLT:
239 return SIGSEGV;
240 default:
241 return signal;
242 }
243 }
244
245 static int
246 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
247 {
248 Elf32_Auxargs *args;
249 Elf32_Addr *base;
250 Elf32_Addr *pos, *uplatform;
251 struct linux32_ps_strings *arginfo;
252
253 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
254 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
255 linux_szplatform);
256
257 KASSERT(curthread->td_proc == imgp->proc &&
258 (curthread->td_proc->p_flag & P_SA) == 0,
259 ("unsafe elf_linux_fixup(), should be curproc"));
260 base = (Elf32_Addr *)*stack_base;
261 args = (Elf32_Auxargs *)imgp->auxargs;
262 pos = base + (imgp->args->argc + imgp->args->envc + 2);
263
264 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
265
266 /*
267 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
268 * as it has appeared in the 2.4.0-rc7 first time.
269 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
270 * glibc falls back to the hard-coded CLK_TCK value when aux entry
271 * is not present.
272 * Also see linux_times() implementation.
273 */
274 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
275 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
276 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
277 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
278 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
279 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
280 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
281 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
282 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
283 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
284 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
285 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
286 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
287 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
288 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
289 if (args->execfd != -1)
290 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
291 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
292
293 free(imgp->auxargs, M_TEMP);
294 imgp->auxargs = NULL;
295
296 base--;
297 suword32(base, (uint32_t)imgp->args->argc);
298 *stack_base = (register_t *)base;
299 return 0;
300 }
301
302 extern int _ucodesel, _ucode32sel, _udatasel;
303 extern unsigned long linux_sznonrtsigcode;
304
305 static void
306 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
307 {
308 struct thread *td = curthread;
309 struct proc *p = td->td_proc;
310 struct sigacts *psp;
311 struct trapframe *regs;
312 struct l_rt_sigframe *fp, frame;
313 int oonstack;
314 int sig;
315 int code;
316
317 sig = ksi->ksi_signo;
318 code = ksi->ksi_code;
319 PROC_LOCK_ASSERT(p, MA_OWNED);
320 psp = p->p_sigacts;
321 mtx_assert(&psp->ps_mtx, MA_OWNED);
322 regs = td->td_frame;
323 oonstack = sigonstack(regs->tf_rsp);
324
325 #ifdef DEBUG
326 if (ldebug(rt_sendsig))
327 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
328 catcher, sig, (void*)mask, code);
329 #endif
330 /*
331 * Allocate space for the signal handler context.
332 */
333 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
334 SIGISMEMBER(psp->ps_sigonstack, sig)) {
335 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
336 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
337 } else
338 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
339 mtx_unlock(&psp->ps_mtx);
340
341 /*
342 * Build the argument list for the signal handler.
343 */
344 if (p->p_sysent->sv_sigtbl)
345 if (sig <= p->p_sysent->sv_sigsize)
346 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
347
348 bzero(&frame, sizeof(frame));
349
350 frame.sf_handler = PTROUT(catcher);
351 frame.sf_sig = sig;
352 frame.sf_siginfo = PTROUT(&fp->sf_si);
353 frame.sf_ucontext = PTROUT(&fp->sf_sc);
354
355 /* Fill in POSIX parts */
356 frame.sf_si.lsi_signo = sig;
357 frame.sf_si.lsi_code = code;
358 frame.sf_si.lsi_addr = PTROUT(ksi->ksi_addr);
359
360 /*
361 * Build the signal context to be used by sigreturn.
362 */
363 frame.sf_sc.uc_flags = 0; /* XXX ??? */
364 frame.sf_sc.uc_link = 0; /* XXX ??? */
365
366 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
367 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
368 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
369 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
370 PROC_UNLOCK(p);
371
372 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
373
374 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
375 frame.sf_sc.uc_mcontext.sc_gs = rgs();
376 frame.sf_sc.uc_mcontext.sc_fs = rfs();
377 __asm __volatile("movl %%es,%0" :
378 "=rm" (frame.sf_sc.uc_mcontext.sc_es));
379 __asm __volatile("movl %%ds,%0" :
380 "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
381 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
382 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
383 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
384 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
385 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
386 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
387 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
388 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
389 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
390 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
391 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
392 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
393 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
394 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
395 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
396
397 #ifdef DEBUG
398 if (ldebug(rt_sendsig))
399 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
400 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
401 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
402 #endif
403
404 if (copyout(&frame, fp, sizeof(frame)) != 0) {
405 /*
406 * Process has trashed its stack; give it an illegal
407 * instruction to halt it in its tracks.
408 */
409 #ifdef DEBUG
410 if (ldebug(rt_sendsig))
411 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
412 fp, oonstack);
413 #endif
414 PROC_LOCK(p);
415 sigexit(td, SIGILL);
416 }
417
418 /*
419 * Build context to run handler in.
420 */
421 regs->tf_rsp = PTROUT(fp);
422 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
423 linux_sznonrtsigcode;
424 regs->tf_rflags &= ~(PSL_T | PSL_D);
425 regs->tf_cs = _ucode32sel;
426 regs->tf_ss = _udatasel;
427 load_ds(_udatasel);
428 td->td_pcb->pcb_ds = _udatasel;
429 load_es(_udatasel);
430 td->td_pcb->pcb_es = _udatasel;
431 /* leave user %fs and %gs untouched */
432 PROC_LOCK(p);
433 mtx_lock(&psp->ps_mtx);
434 }
435
436
437 /*
438 * Send an interrupt to process.
439 *
440 * Stack is set up to allow sigcode stored
441 * in u. to call routine, followed by kcall
442 * to sigreturn routine below. After sigreturn
443 * resets the signal mask, the stack, and the
444 * frame pointer, it returns to the user
445 * specified pc, psl.
446 */
447 static void
448 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
449 {
450 struct thread *td = curthread;
451 struct proc *p = td->td_proc;
452 struct sigacts *psp;
453 struct trapframe *regs;
454 struct l_sigframe *fp, frame;
455 l_sigset_t lmask;
456 int oonstack, i;
457 int sig, code;
458
459 sig = ksi->ksi_signo;
460 code = ksi->ksi_code;
461 PROC_LOCK_ASSERT(p, MA_OWNED);
462 psp = p->p_sigacts;
463 mtx_assert(&psp->ps_mtx, MA_OWNED);
464 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
465 /* Signal handler installed with SA_SIGINFO. */
466 linux_rt_sendsig(catcher, ksi, mask);
467 return;
468 }
469
470 regs = td->td_frame;
471 oonstack = sigonstack(regs->tf_rsp);
472
473 #ifdef DEBUG
474 if (ldebug(sendsig))
475 printf(ARGS(sendsig, "%p, %d, %p, %u"),
476 catcher, sig, (void*)mask, code);
477 #endif
478
479 /*
480 * Allocate space for the signal handler context.
481 */
482 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
483 SIGISMEMBER(psp->ps_sigonstack, sig)) {
484 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
485 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
486 } else
487 fp = (struct l_sigframe *)regs->tf_rsp - 1;
488 mtx_unlock(&psp->ps_mtx);
489 PROC_UNLOCK(p);
490
491 /*
492 * Build the argument list for the signal handler.
493 */
494 if (p->p_sysent->sv_sigtbl)
495 if (sig <= p->p_sysent->sv_sigsize)
496 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
497
498 bzero(&frame, sizeof(frame));
499
500 frame.sf_handler = PTROUT(catcher);
501 frame.sf_sig = sig;
502
503 bsd_to_linux_sigset(mask, &lmask);
504
505 /*
506 * Build the signal context to be used by sigreturn.
507 */
508 frame.sf_sc.sc_mask = lmask.__bits[0];
509 frame.sf_sc.sc_gs = rgs();
510 frame.sf_sc.sc_fs = rfs();
511 __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
512 __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
513 frame.sf_sc.sc_edi = regs->tf_rdi;
514 frame.sf_sc.sc_esi = regs->tf_rsi;
515 frame.sf_sc.sc_ebp = regs->tf_rbp;
516 frame.sf_sc.sc_ebx = regs->tf_rbx;
517 frame.sf_sc.sc_edx = regs->tf_rdx;
518 frame.sf_sc.sc_ecx = regs->tf_rcx;
519 frame.sf_sc.sc_eax = regs->tf_rax;
520 frame.sf_sc.sc_eip = regs->tf_rip;
521 frame.sf_sc.sc_cs = regs->tf_cs;
522 frame.sf_sc.sc_eflags = regs->tf_rflags;
523 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
524 frame.sf_sc.sc_ss = regs->tf_ss;
525 frame.sf_sc.sc_err = regs->tf_err;
526 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
527 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
528
529 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
530 frame.sf_extramask[i] = lmask.__bits[i+1];
531
532 if (copyout(&frame, fp, sizeof(frame)) != 0) {
533 /*
534 * Process has trashed its stack; give it an illegal
535 * instruction to halt it in its tracks.
536 */
537 PROC_LOCK(p);
538 sigexit(td, SIGILL);
539 }
540
541 /*
542 * Build context to run handler in.
543 */
544 regs->tf_rsp = PTROUT(fp);
545 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
546 regs->tf_rflags &= ~(PSL_T | PSL_D);
547 regs->tf_cs = _ucode32sel;
548 regs->tf_ss = _udatasel;
549 load_ds(_udatasel);
550 td->td_pcb->pcb_ds = _udatasel;
551 load_es(_udatasel);
552 td->td_pcb->pcb_es = _udatasel;
553 /* leave user %fs and %gs untouched */
554 PROC_LOCK(p);
555 mtx_lock(&psp->ps_mtx);
556 }
557
558 /*
559 * System call to cleanup state after a signal
560 * has been taken. Reset signal mask and
561 * stack state from context left by sendsig (above).
562 * Return to previous pc and psl as specified by
563 * context left by sendsig. Check carefully to
564 * make sure that the user has not modified the
565 * psl to gain improper privileges or to cause
566 * a machine fault.
567 */
568 int
569 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
570 {
571 struct proc *p = td->td_proc;
572 struct l_sigframe frame;
573 struct trapframe *regs;
574 l_sigset_t lmask;
575 int eflags, i;
576 ksiginfo_t ksi;
577
578 regs = td->td_frame;
579
580 #ifdef DEBUG
581 if (ldebug(sigreturn))
582 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
583 #endif
584 /*
585 * The trampoline code hands us the sigframe.
586 * It is unsafe to keep track of it ourselves, in the event that a
587 * program jumps out of a signal handler.
588 */
589 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
590 return (EFAULT);
591
592 /*
593 * Check for security violations.
594 */
595 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
596 eflags = frame.sf_sc.sc_eflags;
597 /*
598 * XXX do allow users to change the privileged flag PSL_RF. The
599 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
600 * sometimes set it there too. tf_eflags is kept in the signal
601 * context during signal handling and there is no other place
602 * to remember it, so the PSL_RF bit may be corrupted by the
603 * signal handler without us knowing. Corruption of the PSL_RF
604 * bit at worst causes one more or one less debugger trap, so
605 * allowing it is fairly harmless.
606 */
607 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
608 return(EINVAL);
609
610 /*
611 * Don't allow users to load a valid privileged %cs. Let the
612 * hardware check for invalid selectors, excess privilege in
613 * other selectors, invalid %eip's and invalid %esp's.
614 */
615 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
616 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
617 ksiginfo_init_trap(&ksi);
618 ksi.ksi_signo = SIGBUS;
619 ksi.ksi_code = BUS_OBJERR;
620 ksi.ksi_trapno = T_PROTFLT;
621 ksi.ksi_addr = (void *)regs->tf_rip;
622 trapsignal(td, &ksi);
623 return(EINVAL);
624 }
625
626 lmask.__bits[0] = frame.sf_sc.sc_mask;
627 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
628 lmask.__bits[i+1] = frame.sf_extramask[i];
629 PROC_LOCK(p);
630 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
631 SIG_CANTMASK(td->td_sigmask);
632 signotify(td);
633 PROC_UNLOCK(p);
634
635 /*
636 * Restore signal context.
637 */
638 /* Selectors were restored by the trampoline. */
639 regs->tf_rdi = frame.sf_sc.sc_edi;
640 regs->tf_rsi = frame.sf_sc.sc_esi;
641 regs->tf_rbp = frame.sf_sc.sc_ebp;
642 regs->tf_rbx = frame.sf_sc.sc_ebx;
643 regs->tf_rdx = frame.sf_sc.sc_edx;
644 regs->tf_rcx = frame.sf_sc.sc_ecx;
645 regs->tf_rax = frame.sf_sc.sc_eax;
646 regs->tf_rip = frame.sf_sc.sc_eip;
647 regs->tf_cs = frame.sf_sc.sc_cs;
648 regs->tf_rflags = eflags;
649 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
650 regs->tf_ss = frame.sf_sc.sc_ss;
651
652 return (EJUSTRETURN);
653 }
654
655 /*
656 * System call to cleanup state after a signal
657 * has been taken. Reset signal mask and
658 * stack state from context left by rt_sendsig (above).
659 * Return to previous pc and psl as specified by
660 * context left by sendsig. Check carefully to
661 * make sure that the user has not modified the
662 * psl to gain improper privileges or to cause
663 * a machine fault.
664 */
665 int
666 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
667 {
668 struct proc *p = td->td_proc;
669 struct l_ucontext uc;
670 struct l_sigcontext *context;
671 l_stack_t *lss;
672 stack_t ss;
673 struct trapframe *regs;
674 int eflags;
675 ksiginfo_t ksi;
676
677 regs = td->td_frame;
678
679 #ifdef DEBUG
680 if (ldebug(rt_sigreturn))
681 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
682 #endif
683 /*
684 * The trampoline code hands us the ucontext.
685 * It is unsafe to keep track of it ourselves, in the event that a
686 * program jumps out of a signal handler.
687 */
688 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
689 return (EFAULT);
690
691 context = &uc.uc_mcontext;
692
693 /*
694 * Check for security violations.
695 */
696 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
697 eflags = context->sc_eflags;
698 /*
699 * XXX do allow users to change the privileged flag PSL_RF. The
700 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
701 * sometimes set it there too. tf_eflags is kept in the signal
702 * context during signal handling and there is no other place
703 * to remember it, so the PSL_RF bit may be corrupted by the
704 * signal handler without us knowing. Corruption of the PSL_RF
705 * bit at worst causes one more or one less debugger trap, so
706 * allowing it is fairly harmless.
707 */
708 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
709 return(EINVAL);
710
711 /*
712 * Don't allow users to load a valid privileged %cs. Let the
713 * hardware check for invalid selectors, excess privilege in
714 * other selectors, invalid %eip's and invalid %esp's.
715 */
716 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
717 if (!CS_SECURE(context->sc_cs)) {
718 ksiginfo_init_trap(&ksi);
719 ksi.ksi_signo = SIGBUS;
720 ksi.ksi_code = BUS_OBJERR;
721 ksi.ksi_trapno = T_PROTFLT;
722 ksi.ksi_addr = (void *)regs->tf_rip;
723 trapsignal(td, &ksi);
724 return(EINVAL);
725 }
726
727 PROC_LOCK(p);
728 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
729 SIG_CANTMASK(td->td_sigmask);
730 signotify(td);
731 PROC_UNLOCK(p);
732
733 /*
734 * Restore signal context
735 */
736 /* Selectors were restored by the trampoline. */
737 regs->tf_rdi = context->sc_edi;
738 regs->tf_rsi = context->sc_esi;
739 regs->tf_rbp = context->sc_ebp;
740 regs->tf_rbx = context->sc_ebx;
741 regs->tf_rdx = context->sc_edx;
742 regs->tf_rcx = context->sc_ecx;
743 regs->tf_rax = context->sc_eax;
744 regs->tf_rip = context->sc_eip;
745 regs->tf_cs = context->sc_cs;
746 regs->tf_rflags = eflags;
747 regs->tf_rsp = context->sc_esp_at_signal;
748 regs->tf_ss = context->sc_ss;
749
750 /*
751 * call sigaltstack & ignore results..
752 */
753 lss = &uc.uc_stack;
754 ss.ss_sp = PTRIN(lss->ss_sp);
755 ss.ss_size = lss->ss_size;
756 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
757
758 #ifdef DEBUG
759 if (ldebug(rt_sigreturn))
760 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
761 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
762 #endif
763 (void)kern_sigaltstack(td, &ss, NULL);
764
765 return (EJUSTRETURN);
766 }
767
768 /*
769 * MPSAFE
770 */
771 static void
772 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
773 {
774 args[0] = tf->tf_rbx;
775 args[1] = tf->tf_rcx;
776 args[2] = tf->tf_rdx;
777 args[3] = tf->tf_rsi;
778 args[4] = tf->tf_rdi;
779 args[5] = tf->tf_rbp; /* Unconfirmed */
780 *params = NULL; /* no copyin */
781 }
782
783 /*
784 * If a linux binary is exec'ing something, try this image activator
785 * first. We override standard shell script execution in order to
786 * be able to modify the interpreter path. We only do this if a linux
787 * binary is doing the exec, so we do not create an EXEC module for it.
788 */
789 static int exec_linux_imgact_try(struct image_params *iparams);
790
791 static int
792 exec_linux_imgact_try(struct image_params *imgp)
793 {
794 const char *head = (const char *)imgp->image_header;
795 char *rpath;
796 int error = -1, len;
797
798 /*
799 * The interpreter for shell scripts run from a linux binary needs
800 * to be located in /compat/linux if possible in order to recursively
801 * maintain linux path emulation.
802 */
803 if (((const short *)head)[0] == SHELLMAGIC) {
804 /*
805 * Run our normal shell image activator. If it succeeds
806 * attempt to use the alternate path for the interpreter. If
807 * an alternate path is found, use our stringspace to store it.
808 */
809 if ((error = exec_shell_imgact(imgp)) == 0) {
810 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
811 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
812 if (rpath != NULL) {
813 len = strlen(rpath) + 1;
814
815 if (len <= MAXSHELLCMDLEN) {
816 memcpy(imgp->interpreter_name, rpath,
817 len);
818 }
819 free(rpath, M_TEMP);
820 }
821 }
822 }
823 return(error);
824 }
825
826 /*
827 * Clear registers on exec
828 * XXX copied from ia32_signal.c.
829 */
830 static void
831 exec_linux_setregs(td, entry, stack, ps_strings)
832 struct thread *td;
833 u_long entry;
834 u_long stack;
835 u_long ps_strings;
836 {
837 struct trapframe *regs = td->td_frame;
838 struct pcb *pcb = td->td_pcb;
839
840 critical_enter();
841 wrmsr(MSR_FSBASE, 0);
842 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
843 pcb->pcb_fsbase = 0;
844 pcb->pcb_gsbase = 0;
845 critical_exit();
846 load_ds(_udatasel);
847 load_es(_udatasel);
848 load_fs(_udatasel);
849 load_gs(_udatasel);
850 pcb->pcb_ds = _udatasel;
851 pcb->pcb_es = _udatasel;
852 pcb->pcb_fs = _udatasel;
853 pcb->pcb_gs = _udatasel;
854 pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
855
856 bzero((char *)regs, sizeof(struct trapframe));
857 regs->tf_rip = entry;
858 regs->tf_rsp = stack;
859 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
860 regs->tf_ss = _udatasel;
861 regs->tf_cs = _ucode32sel;
862 regs->tf_rbx = ps_strings;
863 load_cr0(rcr0() | CR0_MP | CR0_TS);
864 fpstate_drop(td);
865
866 /* Return via doreti so that we can change to a different %cs */
867 pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
868 pcb->pcb_flags &= ~PCB_GS32BIT;
869 td->td_retval[1] = 0;
870 }
871
872 /*
873 * XXX copied from ia32_sysvec.c.
874 */
875 static register_t *
876 linux_copyout_strings(struct image_params *imgp)
877 {
878 int argc, envc;
879 u_int32_t *vectp;
880 char *stringp, *destp;
881 u_int32_t *stack_base;
882 struct linux32_ps_strings *arginfo;
883
884 /*
885 * Calculate string base and vector table pointers.
886 * Also deal with signal trampoline code for this exec type.
887 */
888 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
889 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
890 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
891 sizeof(char *));
892
893 /*
894 * install sigcode
895 */
896 copyout(imgp->proc->p_sysent->sv_sigcode,
897 ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode);
898
899 /*
900 * Install LINUX_PLATFORM
901 */
902 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
903 linux_szplatform), linux_szplatform);
904
905 /*
906 * If we have a valid auxargs ptr, prepare some room
907 * on the stack.
908 */
909 if (imgp->auxargs) {
910 /*
911 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
912 * lower compatibility.
913 */
914 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
915 (LINUX_AT_COUNT * 2);
916 /*
917 * The '+ 2' is for the null pointers at the end of each of
918 * the arg and env vector sets,and imgp->auxarg_size is room
919 * for argument of Runtime loader.
920 */
921 vectp = (u_int32_t *)(destp - (imgp->args->argc +
922 imgp->args->envc + 2 + imgp->auxarg_size) *
923 sizeof(u_int32_t));
924
925 } else
926 /*
927 * The '+ 2' is for the null pointers at the end of each of
928 * the arg and env vector sets
929 */
930 vectp = (u_int32_t *)(destp - (imgp->args->argc +
931 imgp->args->envc + 2) * sizeof(u_int32_t));
932
933 /*
934 * vectp also becomes our initial stack base
935 */
936 stack_base = vectp;
937
938 stringp = imgp->args->begin_argv;
939 argc = imgp->args->argc;
940 envc = imgp->args->envc;
941 /*
942 * Copy out strings - arguments and environment.
943 */
944 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
945
946 /*
947 * Fill in "ps_strings" struct for ps, w, etc.
948 */
949 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
950 suword32(&arginfo->ps_nargvstr, argc);
951
952 /*
953 * Fill in argument portion of vector table.
954 */
955 for (; argc > 0; --argc) {
956 suword32(vectp++, (uint32_t)(intptr_t)destp);
957 while (*stringp++ != 0)
958 destp++;
959 destp++;
960 }
961
962 /* a null vector table pointer separates the argp's from the envp's */
963 suword32(vectp++, 0);
964
965 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
966 suword32(&arginfo->ps_nenvstr, envc);
967
968 /*
969 * Fill in environment portion of vector table.
970 */
971 for (; envc > 0; --envc) {
972 suword32(vectp++, (uint32_t)(intptr_t)destp);
973 while (*stringp++ != 0)
974 destp++;
975 destp++;
976 }
977
978 /* end of vector table is a null pointer */
979 suword32(vectp, 0);
980
981 return ((register_t *)stack_base);
982 }
983
984 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
985 "32-bit Linux emulation");
986
987 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
988 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
989 &linux32_maxdsiz, 0, "");
990 static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
991 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
992 &linux32_maxssiz, 0, "");
993 static u_long linux32_maxvmem = LINUX32_MAXVMEM;
994 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
995 &linux32_maxvmem, 0, "");
996
997 static void
998 linux32_fixlimit(struct rlimit *rl, int which)
999 {
1000
1001 switch (which) {
1002 case RLIMIT_DATA:
1003 if (linux32_maxdsiz != 0) {
1004 if (rl->rlim_cur > linux32_maxdsiz)
1005 rl->rlim_cur = linux32_maxdsiz;
1006 if (rl->rlim_max > linux32_maxdsiz)
1007 rl->rlim_max = linux32_maxdsiz;
1008 }
1009 break;
1010 case RLIMIT_STACK:
1011 if (linux32_maxssiz != 0) {
1012 if (rl->rlim_cur > linux32_maxssiz)
1013 rl->rlim_cur = linux32_maxssiz;
1014 if (rl->rlim_max > linux32_maxssiz)
1015 rl->rlim_max = linux32_maxssiz;
1016 }
1017 break;
1018 case RLIMIT_VMEM:
1019 if (linux32_maxvmem != 0) {
1020 if (rl->rlim_cur > linux32_maxvmem)
1021 rl->rlim_cur = linux32_maxvmem;
1022 if (rl->rlim_max > linux32_maxvmem)
1023 rl->rlim_max = linux32_maxvmem;
1024 }
1025 break;
1026 }
1027 }
1028
1029 struct sysentvec elf_linux_sysvec = {
1030 .sv_size = LINUX_SYS_MAXSYSCALL,
1031 .sv_table = linux_sysent,
1032 .sv_mask = 0,
1033 .sv_sigsize = LINUX_SIGTBLSZ,
1034 .sv_sigtbl = bsd_to_linux_signal,
1035 .sv_errsize = ELAST + 1,
1036 .sv_errtbl = bsd_to_linux_errno,
1037 .sv_transtrap = translate_traps,
1038 .sv_fixup = elf_linux_fixup,
1039 .sv_sendsig = linux_sendsig,
1040 .sv_sigcode = linux_sigcode,
1041 .sv_szsigcode = &linux_szsigcode,
1042 .sv_prepsyscall = linux_prepsyscall,
1043 .sv_name = "Linux ELF32",
1044 .sv_coredump = elf32_coredump,
1045 .sv_imgact_try = exec_linux_imgact_try,
1046 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1047 .sv_pagesize = PAGE_SIZE,
1048 .sv_minuser = VM_MIN_ADDRESS,
1049 .sv_maxuser = LINUX32_USRSTACK,
1050 .sv_usrstack = LINUX32_USRSTACK,
1051 .sv_psstrings = LINUX32_PS_STRINGS,
1052 .sv_stackprot = VM_PROT_ALL,
1053 .sv_copyout_strings = linux_copyout_strings,
1054 .sv_setregs = exec_linux_setregs,
1055 .sv_fixlimit = linux32_fixlimit,
1056 .sv_maxssiz = &linux32_maxssiz,
1057 };
1058
1059 static char GNU_ABI_VENDOR[] = "GNU";
1060 static int GNULINUX_ABI_DESC = 0;
1061
1062 static boolean_t
1063 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1064 {
1065 const Elf32_Word *desc;
1066 uintptr_t p;
1067
1068 p = (uintptr_t)(note + 1);
1069 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1070
1071 desc = (const Elf32_Word *)p;
1072 if (desc[0] != GNULINUX_ABI_DESC)
1073 return (FALSE);
1074
1075 /*
1076 * For linux we encode osrel as follows (see linux_mib.c):
1077 * VVVMMMIII (version, major, minor), see linux_mib.c.
1078 */
1079 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1080
1081 return (TRUE);
1082 }
1083
1084 static Elf_Brandnote linux32_brandnote = {
1085 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1086 .hdr.n_descsz = 16, /* XXX at least 16 */
1087 .hdr.n_type = 1,
1088 .vendor = GNU_ABI_VENDOR,
1089 .flags = BN_TRANSLATE_OSREL,
1090 .trans_osrel = linux32_trans_osrel
1091 };
1092
1093 static Elf32_Brandinfo linux_brand = {
1094 .brand = ELFOSABI_LINUX,
1095 .machine = EM_386,
1096 .compat_3_brand = "Linux",
1097 .emul_path = "/compat/linux",
1098 .interp_path = "/lib/ld-linux.so.1",
1099 .sysvec = &elf_linux_sysvec,
1100 .interp_newpath = NULL,
1101 .brand_note = &linux32_brandnote,
1102 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1103 };
1104
1105 static Elf32_Brandinfo linux_glibc2brand = {
1106 .brand = ELFOSABI_LINUX,
1107 .machine = EM_386,
1108 .compat_3_brand = "Linux",
1109 .emul_path = "/compat/linux",
1110 .interp_path = "/lib/ld-linux.so.2",
1111 .sysvec = &elf_linux_sysvec,
1112 .interp_newpath = NULL,
1113 .brand_note = &linux32_brandnote,
1114 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1115 };
1116
1117 Elf32_Brandinfo *linux_brandlist[] = {
1118 &linux_brand,
1119 &linux_glibc2brand,
1120 NULL
1121 };
1122
1123 static int
1124 linux_elf_modevent(module_t mod, int type, void *data)
1125 {
1126 Elf32_Brandinfo **brandinfo;
1127 int error;
1128 struct linux_ioctl_handler **lihp;
1129 struct linux_device_handler **ldhp;
1130
1131 error = 0;
1132
1133 switch(type) {
1134 case MOD_LOAD:
1135 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1136 ++brandinfo)
1137 if (elf32_insert_brand_entry(*brandinfo) < 0)
1138 error = EINVAL;
1139 if (error == 0) {
1140 SET_FOREACH(lihp, linux_ioctl_handler_set)
1141 linux_ioctl_register_handler(*lihp);
1142 SET_FOREACH(ldhp, linux_device_handler_set)
1143 linux_device_register_handler(*ldhp);
1144 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1145 sx_init(&emul_shared_lock, "emuldata->shared lock");
1146 LIST_INIT(&futex_list);
1147 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1148 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1149 linux_proc_exit, NULL, 1000);
1150 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
1151 linux_schedtail, NULL, 1000);
1152 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1153 linux_proc_exec, NULL, 1000);
1154 linux_szplatform = roundup(strlen(linux_platform) + 1,
1155 sizeof(char *));
1156 stclohz = (stathz ? stathz : hz);
1157 if (bootverbose)
1158 printf("Linux ELF exec handler installed\n");
1159 } else
1160 printf("cannot insert Linux ELF brand handler\n");
1161 break;
1162 case MOD_UNLOAD:
1163 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1164 ++brandinfo)
1165 if (elf32_brand_inuse(*brandinfo))
1166 error = EBUSY;
1167 if (error == 0) {
1168 for (brandinfo = &linux_brandlist[0];
1169 *brandinfo != NULL; ++brandinfo)
1170 if (elf32_remove_brand_entry(*brandinfo) < 0)
1171 error = EINVAL;
1172 }
1173 if (error == 0) {
1174 SET_FOREACH(lihp, linux_ioctl_handler_set)
1175 linux_ioctl_unregister_handler(*lihp);
1176 SET_FOREACH(ldhp, linux_device_handler_set)
1177 linux_device_unregister_handler(*ldhp);
1178 mtx_destroy(&emul_lock);
1179 sx_destroy(&emul_shared_lock);
1180 mtx_destroy(&futex_mtx);
1181 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1182 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1183 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1184 if (bootverbose)
1185 printf("Linux ELF exec handler removed\n");
1186 } else
1187 printf("Could not deinstall ELF interpreter entry\n");
1188 break;
1189 default:
1190 return EOPNOTSUPP;
1191 }
1192 return error;
1193 }
1194
1195 static moduledata_t linux_elf_mod = {
1196 "linuxelf",
1197 linux_elf_modevent,
1198 0
1199 };
1200
1201 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Cache object: 11d1d243e82e0e9e31ff84cb4b527b85
|