1 /*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 Søren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD: releng/9.0/sys/amd64/linux32/linux32_sysvec.c 220026 2011-03-26 09:25:35Z dchagin $");
35 #include "opt_compat.h"
36
37 #ifndef COMPAT_FREEBSD32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
39 #endif
40
41 #define __ELF_WORD_SIZE 32
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/fcntl.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
63
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
71
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
76
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_emul.h>
80 #include <compat/linux/linux_futex.h>
81 #include <compat/linux/linux_mib.h>
82 #include <compat/linux/linux_misc.h>
83 #include <compat/linux/linux_signal.h>
84 #include <compat/linux/linux_util.h>
85
86 MODULE_VERSION(linux, 1);
87
88 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
89
90 #define AUXARGS_ENTRY_32(pos, id, val) \
91 do { \
92 suword32(pos++, id); \
93 suword32(pos++, val); \
94 } while (0)
95
96 #if BYTE_ORDER == LITTLE_ENDIAN
97 #define SHELLMAGIC 0x2123 /* #! */
98 #else
99 #define SHELLMAGIC 0x2321
100 #endif
101
102 /*
103 * Allow the sendsig functions to use the ldebug() facility
104 * even though they are not syscalls themselves. Map them
105 * to syscall 0. This is slightly less bogus than using
106 * ldebug(sigreturn).
107 */
108 #define LINUX_SYS_linux_rt_sendsig 0
109 #define LINUX_SYS_linux_sendsig 0
110
111 const char *linux_platform = "i686";
112 static int linux_szplatform;
113 extern char linux_sigcode[];
114 extern int linux_szsigcode;
115
116 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
117
118 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
119 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
120
121 static int elf_linux_fixup(register_t **stack_base,
122 struct image_params *iparams);
123 static register_t *linux_copyout_strings(struct image_params *imgp);
124 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
125 static void exec_linux_setregs(struct thread *td,
126 struct image_params *imgp, u_long stack);
127 static void linux32_fixlimit(struct rlimit *rl, int which);
128 static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
129
130 static eventhandler_tag linux_exit_tag;
131 static eventhandler_tag linux_exec_tag;
132
133 /*
134 * Linux syscalls return negative errno's, we do positive and map them
135 * Reference:
136 * FreeBSD: src/sys/sys/errno.h
137 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
138 * linux-2.6.17.8/include/asm-generic/errno.h
139 */
140 static int bsd_to_linux_errno[ELAST + 1] = {
141 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
142 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
143 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
144 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
145 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
146 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
147 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
148 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
149 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
150 -72, -67, -71
151 };
152
153 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
154 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
155 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
156 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
157 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
158 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
159 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
160 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
161 0, LINUX_SIGUSR1, LINUX_SIGUSR2
162 };
163
164 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
165 SIGHUP, SIGINT, SIGQUIT, SIGILL,
166 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
167 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
168 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
169 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
170 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
171 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
172 SIGIO, SIGURG, SIGSYS
173 };
174
175 #define LINUX_T_UNKNOWN 255
176 static int _bsd_to_linux_trapcode[] = {
177 LINUX_T_UNKNOWN, /* 0 */
178 6, /* 1 T_PRIVINFLT */
179 LINUX_T_UNKNOWN, /* 2 */
180 3, /* 3 T_BPTFLT */
181 LINUX_T_UNKNOWN, /* 4 */
182 LINUX_T_UNKNOWN, /* 5 */
183 16, /* 6 T_ARITHTRAP */
184 254, /* 7 T_ASTFLT */
185 LINUX_T_UNKNOWN, /* 8 */
186 13, /* 9 T_PROTFLT */
187 1, /* 10 T_TRCTRAP */
188 LINUX_T_UNKNOWN, /* 11 */
189 14, /* 12 T_PAGEFLT */
190 LINUX_T_UNKNOWN, /* 13 */
191 17, /* 14 T_ALIGNFLT */
192 LINUX_T_UNKNOWN, /* 15 */
193 LINUX_T_UNKNOWN, /* 16 */
194 LINUX_T_UNKNOWN, /* 17 */
195 0, /* 18 T_DIVIDE */
196 2, /* 19 T_NMI */
197 4, /* 20 T_OFLOW */
198 5, /* 21 T_BOUND */
199 7, /* 22 T_DNA */
200 8, /* 23 T_DOUBLEFLT */
201 9, /* 24 T_FPOPFLT */
202 10, /* 25 T_TSSFLT */
203 11, /* 26 T_SEGNPFLT */
204 12, /* 27 T_STKFLT */
205 18, /* 28 T_MCHK */
206 19, /* 29 T_XMMFLT */
207 15 /* 30 T_RESERVED */
208 };
209 #define bsd_to_linux_trapcode(code) \
210 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
211 _bsd_to_linux_trapcode[(code)]: \
212 LINUX_T_UNKNOWN)
213
214 struct linux32_ps_strings {
215 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
216 u_int ps_nargvstr; /* the number of argument strings */
217 u_int32_t ps_envstr; /* first of 0 or more environment strings */
218 u_int ps_nenvstr; /* the number of environment strings */
219 };
220
221 /*
222 * If FreeBSD & Linux have a difference of opinion about what a trap
223 * means, deal with it here.
224 *
225 * MPSAFE
226 */
227 static int
228 translate_traps(int signal, int trap_code)
229 {
230 if (signal != SIGBUS)
231 return signal;
232 switch (trap_code) {
233 case T_PROTFLT:
234 case T_TSSFLT:
235 case T_DOUBLEFLT:
236 case T_PAGEFLT:
237 return SIGSEGV;
238 default:
239 return signal;
240 }
241 }
242
243 static int
244 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
245 {
246 Elf32_Auxargs *args;
247 Elf32_Addr *base;
248 Elf32_Addr *pos, *uplatform;
249 struct linux32_ps_strings *arginfo;
250
251 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
252 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
253
254 KASSERT(curthread->td_proc == imgp->proc,
255 ("unsafe elf_linux_fixup(), should be curproc"));
256 base = (Elf32_Addr *)*stack_base;
257 args = (Elf32_Auxargs *)imgp->auxargs;
258 pos = base + (imgp->args->argc + imgp->args->envc + 2);
259
260 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
261
262 /*
263 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
264 * as it has appeared in the 2.4.0-rc7 first time.
265 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
266 * glibc falls back to the hard-coded CLK_TCK value when aux entry
267 * is not present.
268 * Also see linux_times() implementation.
269 */
270 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
271 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
272 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
273 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
274 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
275 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
276 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
277 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
278 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
279 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
280 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
281 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
282 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
283 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
284 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
285 if (args->execfd != -1)
286 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
287 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
288
289 free(imgp->auxargs, M_TEMP);
290 imgp->auxargs = NULL;
291
292 base--;
293 suword32(base, (uint32_t)imgp->args->argc);
294 *stack_base = (register_t *)base;
295 return 0;
296 }
297
298 extern unsigned long linux_sznonrtsigcode;
299
300 static void
301 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
302 {
303 struct thread *td = curthread;
304 struct proc *p = td->td_proc;
305 struct sigacts *psp;
306 struct trapframe *regs;
307 struct l_rt_sigframe *fp, frame;
308 int oonstack;
309 int sig;
310 int code;
311
312 sig = ksi->ksi_signo;
313 code = ksi->ksi_code;
314 PROC_LOCK_ASSERT(p, MA_OWNED);
315 psp = p->p_sigacts;
316 mtx_assert(&psp->ps_mtx, MA_OWNED);
317 regs = td->td_frame;
318 oonstack = sigonstack(regs->tf_rsp);
319
320 #ifdef DEBUG
321 if (ldebug(rt_sendsig))
322 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
323 catcher, sig, (void*)mask, code);
324 #endif
325 /*
326 * Allocate space for the signal handler context.
327 */
328 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
329 SIGISMEMBER(psp->ps_sigonstack, sig)) {
330 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
331 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
332 } else
333 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
334 mtx_unlock(&psp->ps_mtx);
335
336 /*
337 * Build the argument list for the signal handler.
338 */
339 if (p->p_sysent->sv_sigtbl)
340 if (sig <= p->p_sysent->sv_sigsize)
341 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
342
343 bzero(&frame, sizeof(frame));
344
345 frame.sf_handler = PTROUT(catcher);
346 frame.sf_sig = sig;
347 frame.sf_siginfo = PTROUT(&fp->sf_si);
348 frame.sf_ucontext = PTROUT(&fp->sf_sc);
349
350 /* Fill in POSIX parts */
351 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
352
353 /*
354 * Build the signal context to be used by sigreturn.
355 */
356 frame.sf_sc.uc_flags = 0; /* XXX ??? */
357 frame.sf_sc.uc_link = 0; /* XXX ??? */
358
359 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
360 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
361 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
362 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
363 PROC_UNLOCK(p);
364
365 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
366
367 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
368 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
369 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
370 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
371 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
372 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
373 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
374 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
375 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
376 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
377 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
378 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
379 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
380 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
381 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
382 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
383 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
384 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
385 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
386 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
387
388 #ifdef DEBUG
389 if (ldebug(rt_sendsig))
390 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
391 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
392 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
393 #endif
394
395 if (copyout(&frame, fp, sizeof(frame)) != 0) {
396 /*
397 * Process has trashed its stack; give it an illegal
398 * instruction to halt it in its tracks.
399 */
400 #ifdef DEBUG
401 if (ldebug(rt_sendsig))
402 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
403 fp, oonstack);
404 #endif
405 PROC_LOCK(p);
406 sigexit(td, SIGILL);
407 }
408
409 /*
410 * Build context to run handler in.
411 */
412 regs->tf_rsp = PTROUT(fp);
413 regs->tf_rip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
414 regs->tf_rflags &= ~(PSL_T | PSL_D);
415 regs->tf_cs = _ucode32sel;
416 regs->tf_ss = _udatasel;
417 regs->tf_ds = _udatasel;
418 regs->tf_es = _udatasel;
419 regs->tf_fs = _ufssel;
420 regs->tf_gs = _ugssel;
421 regs->tf_flags = TF_HASSEGS;
422 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
423 PROC_LOCK(p);
424 mtx_lock(&psp->ps_mtx);
425 }
426
427
428 /*
429 * Send an interrupt to process.
430 *
431 * Stack is set up to allow sigcode stored
432 * in u. to call routine, followed by kcall
433 * to sigreturn routine below. After sigreturn
434 * resets the signal mask, the stack, and the
435 * frame pointer, it returns to the user
436 * specified pc, psl.
437 */
438 static void
439 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
440 {
441 struct thread *td = curthread;
442 struct proc *p = td->td_proc;
443 struct sigacts *psp;
444 struct trapframe *regs;
445 struct l_sigframe *fp, frame;
446 l_sigset_t lmask;
447 int oonstack, i;
448 int sig, code;
449
450 sig = ksi->ksi_signo;
451 code = ksi->ksi_code;
452 PROC_LOCK_ASSERT(p, MA_OWNED);
453 psp = p->p_sigacts;
454 mtx_assert(&psp->ps_mtx, MA_OWNED);
455 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
456 /* Signal handler installed with SA_SIGINFO. */
457 linux_rt_sendsig(catcher, ksi, mask);
458 return;
459 }
460
461 regs = td->td_frame;
462 oonstack = sigonstack(regs->tf_rsp);
463
464 #ifdef DEBUG
465 if (ldebug(sendsig))
466 printf(ARGS(sendsig, "%p, %d, %p, %u"),
467 catcher, sig, (void*)mask, code);
468 #endif
469
470 /*
471 * Allocate space for the signal handler context.
472 */
473 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
474 SIGISMEMBER(psp->ps_sigonstack, sig)) {
475 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
476 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
477 } else
478 fp = (struct l_sigframe *)regs->tf_rsp - 1;
479 mtx_unlock(&psp->ps_mtx);
480 PROC_UNLOCK(p);
481
482 /*
483 * Build the argument list for the signal handler.
484 */
485 if (p->p_sysent->sv_sigtbl)
486 if (sig <= p->p_sysent->sv_sigsize)
487 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
488
489 bzero(&frame, sizeof(frame));
490
491 frame.sf_handler = PTROUT(catcher);
492 frame.sf_sig = sig;
493
494 bsd_to_linux_sigset(mask, &lmask);
495
496 /*
497 * Build the signal context to be used by sigreturn.
498 */
499 frame.sf_sc.sc_mask = lmask.__bits[0];
500 frame.sf_sc.sc_gs = regs->tf_gs;
501 frame.sf_sc.sc_fs = regs->tf_fs;
502 frame.sf_sc.sc_es = regs->tf_es;
503 frame.sf_sc.sc_ds = regs->tf_ds;
504 frame.sf_sc.sc_edi = regs->tf_rdi;
505 frame.sf_sc.sc_esi = regs->tf_rsi;
506 frame.sf_sc.sc_ebp = regs->tf_rbp;
507 frame.sf_sc.sc_ebx = regs->tf_rbx;
508 frame.sf_sc.sc_edx = regs->tf_rdx;
509 frame.sf_sc.sc_ecx = regs->tf_rcx;
510 frame.sf_sc.sc_eax = regs->tf_rax;
511 frame.sf_sc.sc_eip = regs->tf_rip;
512 frame.sf_sc.sc_cs = regs->tf_cs;
513 frame.sf_sc.sc_eflags = regs->tf_rflags;
514 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
515 frame.sf_sc.sc_ss = regs->tf_ss;
516 frame.sf_sc.sc_err = regs->tf_err;
517 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
518 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
519
520 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
521 frame.sf_extramask[i] = lmask.__bits[i+1];
522
523 if (copyout(&frame, fp, sizeof(frame)) != 0) {
524 /*
525 * Process has trashed its stack; give it an illegal
526 * instruction to halt it in its tracks.
527 */
528 PROC_LOCK(p);
529 sigexit(td, SIGILL);
530 }
531
532 /*
533 * Build context to run handler in.
534 */
535 regs->tf_rsp = PTROUT(fp);
536 regs->tf_rip = p->p_sysent->sv_sigcode_base;
537 regs->tf_rflags &= ~(PSL_T | PSL_D);
538 regs->tf_cs = _ucode32sel;
539 regs->tf_ss = _udatasel;
540 regs->tf_ds = _udatasel;
541 regs->tf_es = _udatasel;
542 regs->tf_fs = _ufssel;
543 regs->tf_gs = _ugssel;
544 regs->tf_flags = TF_HASSEGS;
545 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
546 PROC_LOCK(p);
547 mtx_lock(&psp->ps_mtx);
548 }
549
550 /*
551 * System call to cleanup state after a signal
552 * has been taken. Reset signal mask and
553 * stack state from context left by sendsig (above).
554 * Return to previous pc and psl as specified by
555 * context left by sendsig. Check carefully to
556 * make sure that the user has not modified the
557 * psl to gain improper privileges or to cause
558 * a machine fault.
559 */
560 int
561 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
562 {
563 struct l_sigframe frame;
564 struct trapframe *regs;
565 sigset_t bmask;
566 l_sigset_t lmask;
567 int eflags, i;
568 ksiginfo_t ksi;
569
570 regs = td->td_frame;
571
572 #ifdef DEBUG
573 if (ldebug(sigreturn))
574 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
575 #endif
576 /*
577 * The trampoline code hands us the sigframe.
578 * It is unsafe to keep track of it ourselves, in the event that a
579 * program jumps out of a signal handler.
580 */
581 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
582 return (EFAULT);
583
584 /*
585 * Check for security violations.
586 */
587 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
588 eflags = frame.sf_sc.sc_eflags;
589 /*
590 * XXX do allow users to change the privileged flag PSL_RF. The
591 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
592 * sometimes set it there too. tf_eflags is kept in the signal
593 * context during signal handling and there is no other place
594 * to remember it, so the PSL_RF bit may be corrupted by the
595 * signal handler without us knowing. Corruption of the PSL_RF
596 * bit at worst causes one more or one less debugger trap, so
597 * allowing it is fairly harmless.
598 */
599 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
600 return(EINVAL);
601
602 /*
603 * Don't allow users to load a valid privileged %cs. Let the
604 * hardware check for invalid selectors, excess privilege in
605 * other selectors, invalid %eip's and invalid %esp's.
606 */
607 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
608 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
609 ksiginfo_init_trap(&ksi);
610 ksi.ksi_signo = SIGBUS;
611 ksi.ksi_code = BUS_OBJERR;
612 ksi.ksi_trapno = T_PROTFLT;
613 ksi.ksi_addr = (void *)regs->tf_rip;
614 trapsignal(td, &ksi);
615 return(EINVAL);
616 }
617
618 lmask.__bits[0] = frame.sf_sc.sc_mask;
619 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
620 lmask.__bits[i+1] = frame.sf_extramask[i];
621 linux_to_bsd_sigset(&lmask, &bmask);
622 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
623
624 /*
625 * Restore signal context.
626 */
627 regs->tf_rdi = frame.sf_sc.sc_edi;
628 regs->tf_rsi = frame.sf_sc.sc_esi;
629 regs->tf_rbp = frame.sf_sc.sc_ebp;
630 regs->tf_rbx = frame.sf_sc.sc_ebx;
631 regs->tf_rdx = frame.sf_sc.sc_edx;
632 regs->tf_rcx = frame.sf_sc.sc_ecx;
633 regs->tf_rax = frame.sf_sc.sc_eax;
634 regs->tf_rip = frame.sf_sc.sc_eip;
635 regs->tf_cs = frame.sf_sc.sc_cs;
636 regs->tf_ds = frame.sf_sc.sc_ds;
637 regs->tf_es = frame.sf_sc.sc_es;
638 regs->tf_fs = frame.sf_sc.sc_fs;
639 regs->tf_gs = frame.sf_sc.sc_gs;
640 regs->tf_rflags = eflags;
641 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
642 regs->tf_ss = frame.sf_sc.sc_ss;
643 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
644
645 return (EJUSTRETURN);
646 }
647
648 /*
649 * System call to cleanup state after a signal
650 * has been taken. Reset signal mask and
651 * stack state from context left by rt_sendsig (above).
652 * Return to previous pc and psl as specified by
653 * context left by sendsig. Check carefully to
654 * make sure that the user has not modified the
655 * psl to gain improper privileges or to cause
656 * a machine fault.
657 */
658 int
659 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
660 {
661 struct l_ucontext uc;
662 struct l_sigcontext *context;
663 sigset_t bmask;
664 l_stack_t *lss;
665 stack_t ss;
666 struct trapframe *regs;
667 int eflags;
668 ksiginfo_t ksi;
669
670 regs = td->td_frame;
671
672 #ifdef DEBUG
673 if (ldebug(rt_sigreturn))
674 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
675 #endif
676 /*
677 * The trampoline code hands us the ucontext.
678 * It is unsafe to keep track of it ourselves, in the event that a
679 * program jumps out of a signal handler.
680 */
681 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
682 return (EFAULT);
683
684 context = &uc.uc_mcontext;
685
686 /*
687 * Check for security violations.
688 */
689 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
690 eflags = context->sc_eflags;
691 /*
692 * XXX do allow users to change the privileged flag PSL_RF. The
693 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
694 * sometimes set it there too. tf_eflags is kept in the signal
695 * context during signal handling and there is no other place
696 * to remember it, so the PSL_RF bit may be corrupted by the
697 * signal handler without us knowing. Corruption of the PSL_RF
698 * bit at worst causes one more or one less debugger trap, so
699 * allowing it is fairly harmless.
700 */
701 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
702 return(EINVAL);
703
704 /*
705 * Don't allow users to load a valid privileged %cs. Let the
706 * hardware check for invalid selectors, excess privilege in
707 * other selectors, invalid %eip's and invalid %esp's.
708 */
709 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
710 if (!CS_SECURE(context->sc_cs)) {
711 ksiginfo_init_trap(&ksi);
712 ksi.ksi_signo = SIGBUS;
713 ksi.ksi_code = BUS_OBJERR;
714 ksi.ksi_trapno = T_PROTFLT;
715 ksi.ksi_addr = (void *)regs->tf_rip;
716 trapsignal(td, &ksi);
717 return(EINVAL);
718 }
719
720 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
721 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
722
723 /*
724 * Restore signal context
725 */
726 regs->tf_gs = context->sc_gs;
727 regs->tf_fs = context->sc_fs;
728 regs->tf_es = context->sc_es;
729 regs->tf_ds = context->sc_ds;
730 regs->tf_rdi = context->sc_edi;
731 regs->tf_rsi = context->sc_esi;
732 regs->tf_rbp = context->sc_ebp;
733 regs->tf_rbx = context->sc_ebx;
734 regs->tf_rdx = context->sc_edx;
735 regs->tf_rcx = context->sc_ecx;
736 regs->tf_rax = context->sc_eax;
737 regs->tf_rip = context->sc_eip;
738 regs->tf_cs = context->sc_cs;
739 regs->tf_rflags = eflags;
740 regs->tf_rsp = context->sc_esp_at_signal;
741 regs->tf_ss = context->sc_ss;
742 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
743
744 /*
745 * call sigaltstack & ignore results..
746 */
747 lss = &uc.uc_stack;
748 ss.ss_sp = PTRIN(lss->ss_sp);
749 ss.ss_size = lss->ss_size;
750 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
751
752 #ifdef DEBUG
753 if (ldebug(rt_sigreturn))
754 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
755 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
756 #endif
757 (void)kern_sigaltstack(td, &ss, NULL);
758
759 return (EJUSTRETURN);
760 }
761
762 static int
763 linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
764 {
765 struct proc *p;
766 struct trapframe *frame;
767
768 p = td->td_proc;
769 frame = td->td_frame;
770
771 sa->args[0] = frame->tf_rbx;
772 sa->args[1] = frame->tf_rcx;
773 sa->args[2] = frame->tf_rdx;
774 sa->args[3] = frame->tf_rsi;
775 sa->args[4] = frame->tf_rdi;
776 sa->args[5] = frame->tf_rbp; /* Unconfirmed */
777 sa->code = frame->tf_rax;
778
779 if (sa->code >= p->p_sysent->sv_size)
780 sa->callp = &p->p_sysent->sv_table[0];
781 else
782 sa->callp = &p->p_sysent->sv_table[sa->code];
783 sa->narg = sa->callp->sy_narg;
784
785 td->td_retval[0] = 0;
786 td->td_retval[1] = frame->tf_rdx;
787
788 return (0);
789 }
790
791 /*
792 * If a linux binary is exec'ing something, try this image activator
793 * first. We override standard shell script execution in order to
794 * be able to modify the interpreter path. We only do this if a linux
795 * binary is doing the exec, so we do not create an EXEC module for it.
796 */
797 static int exec_linux_imgact_try(struct image_params *iparams);
798
799 static int
800 exec_linux_imgact_try(struct image_params *imgp)
801 {
802 const char *head = (const char *)imgp->image_header;
803 char *rpath;
804 int error = -1;
805
806 /*
807 * The interpreter for shell scripts run from a linux binary needs
808 * to be located in /compat/linux if possible in order to recursively
809 * maintain linux path emulation.
810 */
811 if (((const short *)head)[0] == SHELLMAGIC) {
812 /*
813 * Run our normal shell image activator. If it succeeds attempt
814 * to use the alternate path for the interpreter. If an
815 * alternate * path is found, use our stringspace to store it.
816 */
817 if ((error = exec_shell_imgact(imgp)) == 0) {
818 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
819 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
820 AT_FDCWD);
821 if (rpath != NULL)
822 imgp->args->fname_buf =
823 imgp->interpreter_name = rpath;
824 }
825 }
826 return (error);
827 }
828
829 /*
830 * Clear registers on exec
831 * XXX copied from ia32_signal.c.
832 */
833 static void
834 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
835 {
836 struct trapframe *regs = td->td_frame;
837 struct pcb *pcb = td->td_pcb;
838
839 mtx_lock(&dt_lock);
840 if (td->td_proc->p_md.md_ldt != NULL)
841 user_ldt_free(td);
842 else
843 mtx_unlock(&dt_lock);
844
845 critical_enter();
846 wrmsr(MSR_FSBASE, 0);
847 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
848 pcb->pcb_fsbase = 0;
849 pcb->pcb_gsbase = 0;
850 critical_exit();
851 pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
852
853 bzero((char *)regs, sizeof(struct trapframe));
854 regs->tf_rip = imgp->entry_addr;
855 regs->tf_rsp = stack;
856 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
857 regs->tf_gs = _ugssel;
858 regs->tf_fs = _ufssel;
859 regs->tf_es = _udatasel;
860 regs->tf_ds = _udatasel;
861 regs->tf_ss = _udatasel;
862 regs->tf_flags = TF_HASSEGS;
863 regs->tf_cs = _ucode32sel;
864 regs->tf_rbx = imgp->ps_strings;
865
866 fpstate_drop(td);
867
868 /* Do full restore on return so that we can change to a different %cs */
869 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
870 clear_pcb_flags(pcb, PCB_GS32BIT);
871 td->td_retval[1] = 0;
872 }
873
874 /*
875 * XXX copied from ia32_sysvec.c.
876 */
877 static register_t *
878 linux_copyout_strings(struct image_params *imgp)
879 {
880 int argc, envc;
881 u_int32_t *vectp;
882 char *stringp, *destp;
883 u_int32_t *stack_base;
884 struct linux32_ps_strings *arginfo;
885
886 /*
887 * Calculate string base and vector table pointers.
888 * Also deal with signal trampoline code for this exec type.
889 */
890 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
891 destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
892 roundup((ARG_MAX - imgp->args->stringspace),
893 sizeof(char *));
894
895 /*
896 * Install LINUX_PLATFORM
897 */
898 copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
899 linux_szplatform);
900
901 /*
902 * If we have a valid auxargs ptr, prepare some room
903 * on the stack.
904 */
905 if (imgp->auxargs) {
906 /*
907 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
908 * lower compatibility.
909 */
910 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
911 (LINUX_AT_COUNT * 2);
912 /*
913 * The '+ 2' is for the null pointers at the end of each of
914 * the arg and env vector sets,and imgp->auxarg_size is room
915 * for argument of Runtime loader.
916 */
917 vectp = (u_int32_t *) (destp - (imgp->args->argc +
918 imgp->args->envc + 2 + imgp->auxarg_size) *
919 sizeof(u_int32_t));
920
921 } else
922 /*
923 * The '+ 2' is for the null pointers at the end of each of
924 * the arg and env vector sets
925 */
926 vectp = (u_int32_t *)(destp - (imgp->args->argc +
927 imgp->args->envc + 2) * sizeof(u_int32_t));
928
929 /*
930 * vectp also becomes our initial stack base
931 */
932 stack_base = vectp;
933
934 stringp = imgp->args->begin_argv;
935 argc = imgp->args->argc;
936 envc = imgp->args->envc;
937 /*
938 * Copy out strings - arguments and environment.
939 */
940 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
941
942 /*
943 * Fill in "ps_strings" struct for ps, w, etc.
944 */
945 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
946 suword32(&arginfo->ps_nargvstr, argc);
947
948 /*
949 * Fill in argument portion of vector table.
950 */
951 for (; argc > 0; --argc) {
952 suword32(vectp++, (uint32_t)(intptr_t)destp);
953 while (*stringp++ != 0)
954 destp++;
955 destp++;
956 }
957
958 /* a null vector table pointer separates the argp's from the envp's */
959 suword32(vectp++, 0);
960
961 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
962 suword32(&arginfo->ps_nenvstr, envc);
963
964 /*
965 * Fill in environment portion of vector table.
966 */
967 for (; envc > 0; --envc) {
968 suword32(vectp++, (uint32_t)(intptr_t)destp);
969 while (*stringp++ != 0)
970 destp++;
971 destp++;
972 }
973
974 /* end of vector table is a null pointer */
975 suword32(vectp, 0);
976
977 return ((register_t *)stack_base);
978 }
979
980 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
981 "32-bit Linux emulation");
982
983 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
984 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
985 &linux32_maxdsiz, 0, "");
986 static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
987 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
988 &linux32_maxssiz, 0, "");
989 static u_long linux32_maxvmem = LINUX32_MAXVMEM;
990 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
991 &linux32_maxvmem, 0, "");
992
993 static void
994 linux32_fixlimit(struct rlimit *rl, int which)
995 {
996
997 switch (which) {
998 case RLIMIT_DATA:
999 if (linux32_maxdsiz != 0) {
1000 if (rl->rlim_cur > linux32_maxdsiz)
1001 rl->rlim_cur = linux32_maxdsiz;
1002 if (rl->rlim_max > linux32_maxdsiz)
1003 rl->rlim_max = linux32_maxdsiz;
1004 }
1005 break;
1006 case RLIMIT_STACK:
1007 if (linux32_maxssiz != 0) {
1008 if (rl->rlim_cur > linux32_maxssiz)
1009 rl->rlim_cur = linux32_maxssiz;
1010 if (rl->rlim_max > linux32_maxssiz)
1011 rl->rlim_max = linux32_maxssiz;
1012 }
1013 break;
1014 case RLIMIT_VMEM:
1015 if (linux32_maxvmem != 0) {
1016 if (rl->rlim_cur > linux32_maxvmem)
1017 rl->rlim_cur = linux32_maxvmem;
1018 if (rl->rlim_max > linux32_maxvmem)
1019 rl->rlim_max = linux32_maxvmem;
1020 }
1021 break;
1022 }
1023 }
1024
1025 struct sysentvec elf_linux_sysvec = {
1026 .sv_size = LINUX_SYS_MAXSYSCALL,
1027 .sv_table = linux_sysent,
1028 .sv_mask = 0,
1029 .sv_sigsize = LINUX_SIGTBLSZ,
1030 .sv_sigtbl = bsd_to_linux_signal,
1031 .sv_errsize = ELAST + 1,
1032 .sv_errtbl = bsd_to_linux_errno,
1033 .sv_transtrap = translate_traps,
1034 .sv_fixup = elf_linux_fixup,
1035 .sv_sendsig = linux_sendsig,
1036 .sv_sigcode = linux_sigcode,
1037 .sv_szsigcode = &linux_szsigcode,
1038 .sv_prepsyscall = NULL,
1039 .sv_name = "Linux ELF32",
1040 .sv_coredump = elf32_coredump,
1041 .sv_imgact_try = exec_linux_imgact_try,
1042 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1043 .sv_pagesize = PAGE_SIZE,
1044 .sv_minuser = VM_MIN_ADDRESS,
1045 .sv_maxuser = LINUX32_MAXUSER,
1046 .sv_usrstack = LINUX32_USRSTACK,
1047 .sv_psstrings = LINUX32_PS_STRINGS,
1048 .sv_stackprot = VM_PROT_ALL,
1049 .sv_copyout_strings = linux_copyout_strings,
1050 .sv_setregs = exec_linux_setregs,
1051 .sv_fixlimit = linux32_fixlimit,
1052 .sv_maxssiz = &linux32_maxssiz,
1053 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
1054 .sv_set_syscall_retval = cpu_set_syscall_retval,
1055 .sv_fetch_syscall_args = linux32_fetch_syscall_args,
1056 .sv_syscallnames = NULL,
1057 .sv_shared_page_base = LINUX32_SHAREDPAGE,
1058 .sv_shared_page_len = PAGE_SIZE,
1059 .sv_schedtail = linux_schedtail,
1060 };
1061 INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1062
1063 static char GNU_ABI_VENDOR[] = "GNU";
1064 static int GNULINUX_ABI_DESC = 0;
1065
1066 static boolean_t
1067 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1068 {
1069 const Elf32_Word *desc;
1070 uintptr_t p;
1071
1072 p = (uintptr_t)(note + 1);
1073 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1074
1075 desc = (const Elf32_Word *)p;
1076 if (desc[0] != GNULINUX_ABI_DESC)
1077 return (FALSE);
1078
1079 /*
1080 * For linux we encode osrel as follows (see linux_mib.c):
1081 * VVVMMMIII (version, major, minor), see linux_mib.c.
1082 */
1083 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1084
1085 return (TRUE);
1086 }
1087
1088 static Elf_Brandnote linux32_brandnote = {
1089 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1090 .hdr.n_descsz = 16, /* XXX at least 16 */
1091 .hdr.n_type = 1,
1092 .vendor = GNU_ABI_VENDOR,
1093 .flags = BN_TRANSLATE_OSREL,
1094 .trans_osrel = linux32_trans_osrel
1095 };
1096
1097 static Elf32_Brandinfo linux_brand = {
1098 .brand = ELFOSABI_LINUX,
1099 .machine = EM_386,
1100 .compat_3_brand = "Linux",
1101 .emul_path = "/compat/linux",
1102 .interp_path = "/lib/ld-linux.so.1",
1103 .sysvec = &elf_linux_sysvec,
1104 .interp_newpath = NULL,
1105 .brand_note = &linux32_brandnote,
1106 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1107 };
1108
1109 static Elf32_Brandinfo linux_glibc2brand = {
1110 .brand = ELFOSABI_LINUX,
1111 .machine = EM_386,
1112 .compat_3_brand = "Linux",
1113 .emul_path = "/compat/linux",
1114 .interp_path = "/lib/ld-linux.so.2",
1115 .sysvec = &elf_linux_sysvec,
1116 .interp_newpath = NULL,
1117 .brand_note = &linux32_brandnote,
1118 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1119 };
1120
1121 Elf32_Brandinfo *linux_brandlist[] = {
1122 &linux_brand,
1123 &linux_glibc2brand,
1124 NULL
1125 };
1126
1127 static int
1128 linux_elf_modevent(module_t mod, int type, void *data)
1129 {
1130 Elf32_Brandinfo **brandinfo;
1131 int error;
1132 struct linux_ioctl_handler **lihp;
1133 struct linux_device_handler **ldhp;
1134
1135 error = 0;
1136
1137 switch(type) {
1138 case MOD_LOAD:
1139 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1140 ++brandinfo)
1141 if (elf32_insert_brand_entry(*brandinfo) < 0)
1142 error = EINVAL;
1143 if (error == 0) {
1144 SET_FOREACH(lihp, linux_ioctl_handler_set)
1145 linux_ioctl_register_handler(*lihp);
1146 SET_FOREACH(ldhp, linux_device_handler_set)
1147 linux_device_register_handler(*ldhp);
1148 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1149 sx_init(&emul_shared_lock, "emuldata->shared lock");
1150 LIST_INIT(&futex_list);
1151 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1152 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1153 linux_proc_exit, NULL, 1000);
1154 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1155 linux_proc_exec, NULL, 1000);
1156 linux_szplatform = roundup(strlen(linux_platform) + 1,
1157 sizeof(char *));
1158 linux_osd_jail_register();
1159 stclohz = (stathz ? stathz : hz);
1160 if (bootverbose)
1161 printf("Linux ELF exec handler installed\n");
1162 } else
1163 printf("cannot insert Linux ELF brand handler\n");
1164 break;
1165 case MOD_UNLOAD:
1166 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1167 ++brandinfo)
1168 if (elf32_brand_inuse(*brandinfo))
1169 error = EBUSY;
1170 if (error == 0) {
1171 for (brandinfo = &linux_brandlist[0];
1172 *brandinfo != NULL; ++brandinfo)
1173 if (elf32_remove_brand_entry(*brandinfo) < 0)
1174 error = EINVAL;
1175 }
1176 if (error == 0) {
1177 SET_FOREACH(lihp, linux_ioctl_handler_set)
1178 linux_ioctl_unregister_handler(*lihp);
1179 SET_FOREACH(ldhp, linux_device_handler_set)
1180 linux_device_unregister_handler(*ldhp);
1181 mtx_destroy(&emul_lock);
1182 sx_destroy(&emul_shared_lock);
1183 mtx_destroy(&futex_mtx);
1184 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1185 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1186 linux_osd_jail_deregister();
1187 if (bootverbose)
1188 printf("Linux ELF exec handler removed\n");
1189 } else
1190 printf("Could not deinstall ELF interpreter entry\n");
1191 break;
1192 default:
1193 return EOPNOTSUPP;
1194 }
1195 return error;
1196 }
1197
1198 static moduledata_t linux_elf_mod = {
1199 "linuxelf",
1200 linux_elf_modevent,
1201 0
1202 };
1203
1204 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Cache object: 0f7e97c2c815cff9e7a370d6c549da0a
|