1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 1994-1996 Søren Schmidt
5 * Copyright (c) 2018 Turing Robotic Industries Inc.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/cdefs.h>
35 #include <sys/elf.h>
36 #include <sys/exec.h>
37 #include <sys/imgact.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/ktr.h>
41 #include <sys/lock.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/stddef.h>
46 #include <sys/signalvar.h>
47 #include <sys/syscallsubr.h>
48 #include <sys/sysctl.h>
49 #include <sys/sysent.h>
50
51 #include <vm/vm.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_param.h>
58
59 #include <arm64/linux/linux.h>
60 #include <arm64/linux/linux_proto.h>
61 #include <compat/linux/linux_dtrace.h>
62 #include <compat/linux/linux_emul.h>
63 #include <compat/linux/linux_fork.h>
64 #include <compat/linux/linux_ioctl.h>
65 #include <compat/linux/linux_mib.h>
66 #include <compat/linux/linux_misc.h>
67 #include <compat/linux/linux_signal.h>
68 #include <compat/linux/linux_util.h>
69 #include <compat/linux/linux_vdso.h>
70
71 #include <arm64/linux/linux_sigframe.h>
72
73 #include <machine/md_var.h>
74
75 #ifdef VFP
76 #include <machine/vfp.h>
77 #endif
78
79 MODULE_VERSION(linux64elf, 1);
80
81 #define LINUX_VDSOPAGE_SIZE PAGE_SIZE * 2
82 #define LINUX_VDSOPAGE (VM_MAXUSER_ADDRESS - \
83 LINUX_VDSOPAGE_SIZE)
84 #define LINUX_SHAREDPAGE (LINUX_VDSOPAGE - PAGE_SIZE)
85 /*
86 * PAGE_SIZE - the size
87 * of the native SHAREDPAGE
88 */
89 #define LINUX_USRSTACK LINUX_SHAREDPAGE
90 #define LINUX_PS_STRINGS (LINUX_USRSTACK - \
91 sizeof(struct ps_strings))
92
93 static int linux_szsigcode;
94 static vm_object_t linux_vdso_obj;
95 static char *linux_vdso_mapping;
96 extern char _binary_linux_vdso_so_o_start;
97 extern char _binary_linux_vdso_so_o_end;
98 static vm_offset_t linux_vdso_base;
99
100 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
101 extern const char *linux_syscallnames[];
102
103 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
104
105 static int linux_copyout_strings(struct image_params *imgp,
106 uintptr_t *stack_base);
107 static int linux_elf_fixup(uintptr_t *stack_base,
108 struct image_params *iparams);
109 static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
110 static void linux_vdso_install(const void *param);
111 static void linux_vdso_deinstall(const void *param);
112 static void linux_vdso_reloc(char *mapping, Elf_Addr offset);
113 static void linux_set_syscall_retval(struct thread *td, int error);
114 static int linux_fetch_syscall_args(struct thread *td);
115 static void linux_exec_setregs(struct thread *td, struct image_params *imgp,
116 uintptr_t stack);
117 static void linux_exec_sysvec_init(void *param);
118 static int linux_on_exec_vmspace(struct proc *p,
119 struct image_params *imgp);
120
121 /* DTrace init */
122 LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
123
124 /* DTrace probes */
125 LIN_SDT_PROBE_DEFINE0(sysvec, linux_exec_setregs, todo);
126 LIN_SDT_PROBE_DEFINE0(sysvec, linux_copyout_auxargs, todo);
127 LIN_SDT_PROBE_DEFINE0(sysvec, linux_elf_fixup, todo);
128
129 LINUX_VDSO_SYM_CHAR(linux_platform);
130 LINUX_VDSO_SYM_INTPTR(kern_timekeep_base);
131 LINUX_VDSO_SYM_INTPTR(linux_vdso_sigcode);
132
133 static int
134 linux_fetch_syscall_args(struct thread *td)
135 {
136 struct proc *p;
137 struct syscall_args *sa;
138 register_t *ap;
139
140 p = td->td_proc;
141 ap = td->td_frame->tf_x;
142 sa = &td->td_sa;
143
144 sa->code = td->td_frame->tf_x[8];
145 sa->original_code = sa->code;
146 /* LINUXTODO: generic syscall? */
147 if (sa->code >= p->p_sysent->sv_size)
148 sa->callp = &p->p_sysent->sv_table[0];
149 else
150 sa->callp = &p->p_sysent->sv_table[sa->code];
151
152 if (sa->callp->sy_narg > nitems(sa->args))
153 panic("ARM64TODO: Could we have more than %zu args?",
154 nitems(sa->args));
155 memcpy(sa->args, ap, nitems(sa->args) * sizeof(register_t));
156
157 td->td_retval[0] = 0;
158 return (0);
159 }
160
161 static void
162 linux_set_syscall_retval(struct thread *td, int error)
163 {
164
165 td->td_retval[1] = td->td_frame->tf_x[1];
166 cpu_set_syscall_retval(td, error);
167
168 if (__predict_false(error != 0)) {
169 if (error != ERESTART && error != EJUSTRETURN)
170 td->td_frame->tf_x[0] = bsd_to_linux_errno(error);
171 }
172 }
173
174 static int
175 linux_copyout_auxargs(struct image_params *imgp, uintptr_t base)
176 {
177 Elf_Auxargs *args;
178 Elf_Auxinfo *argarray, *pos;
179 struct proc *p;
180 int error, issetugid;
181
182 LIN_SDT_PROBE0(sysvec, linux_copyout_auxargs, todo);
183 p = imgp->proc;
184
185 args = (Elf64_Auxargs *)imgp->auxargs;
186 argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP,
187 M_WAITOK | M_ZERO);
188
189 issetugid = p->p_flag & P_SUGID ? 1 : 0;
190 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, linux_vdso_base);
191 AUXARGS_ENTRY(pos, LINUX_AT_MINSIGSTKSZ, LINUX_MINSIGSTKSZ);
192 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, *imgp->sysent->sv_hwcap);
193 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
194 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
195 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
196 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
197 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
198 AUXARGS_ENTRY(pos, AT_BASE, args->base);
199 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
200 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
201 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
202 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
203 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
204 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
205 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid);
206 AUXARGS_ENTRY_PTR(pos, LINUX_AT_RANDOM, imgp->canary);
207 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP2, *imgp->sysent->sv_hwcap2);
208 if (imgp->execpathp != 0)
209 AUXARGS_ENTRY_PTR(pos, LINUX_AT_EXECFN, imgp->execpathp);
210 if (args->execfd != -1)
211 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
212 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
213 AUXARGS_ENTRY(pos, AT_NULL, 0);
214
215 free(imgp->auxargs, M_TEMP);
216 imgp->auxargs = NULL;
217 KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs"));
218
219 error = copyout(argarray, (void *)base,
220 sizeof(*argarray) * LINUX_AT_COUNT);
221 free(argarray, M_TEMP);
222 return (error);
223 }
224
225 static int
226 linux_elf_fixup(uintptr_t *stack_base, struct image_params *imgp)
227 {
228
229 LIN_SDT_PROBE0(sysvec, linux_elf_fixup, todo);
230
231 return (0);
232 }
233
234 /*
235 * Copy strings out to the new process address space, constructing new arg
236 * and env vector tables. Return a pointer to the base so that it can be used
237 * as the initial stack pointer.
238 * LINUXTODO: deduplicate against other linuxulator archs
239 */
240 static int
241 linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base)
242 {
243 char **vectp;
244 char *stringp;
245 uintptr_t destp, ustringp;
246 struct ps_strings *arginfo;
247 char canary[LINUX_AT_RANDOM_LEN];
248 size_t execpath_len;
249 struct proc *p;
250 int argc, envc, error;
251
252 p = imgp->proc;
253 arginfo = (struct ps_strings *)PROC_PS_STRINGS(p);
254 destp = (uintptr_t)arginfo;
255
256 if (imgp->execpath != NULL && imgp->auxargs != NULL) {
257 execpath_len = strlen(imgp->execpath) + 1;
258 destp -= execpath_len;
259 destp = rounddown2(destp, sizeof(void *));
260 imgp->execpathp = (void *)destp;
261 error = copyout(imgp->execpath, imgp->execpathp, execpath_len);
262 if (error != 0)
263 return (error);
264 }
265
266 /* Prepare the canary for SSP. */
267 arc4rand(canary, sizeof(canary), 0);
268 destp -= roundup(sizeof(canary), sizeof(void *));
269 imgp->canary = (void *)destp;
270 error = copyout(canary, imgp->canary, sizeof(canary));
271 if (error != 0)
272 return (error);
273
274 /* Allocate room for the argument and environment strings. */
275 destp -= ARG_MAX - imgp->args->stringspace;
276 destp = rounddown2(destp, sizeof(void *));
277 ustringp = destp;
278
279 if (imgp->auxargs) {
280 /*
281 * Allocate room on the stack for the ELF auxargs
282 * array. It has up to LINUX_AT_COUNT entries.
283 */
284 destp -= LINUX_AT_COUNT * sizeof(Elf64_Auxinfo);
285 destp = rounddown2(destp, sizeof(void *));
286 }
287
288 vectp = (char **)destp;
289
290 /*
291 * Allocate room for argc and the argv[] and env vectors including the
292 * terminating NULL pointers.
293 */
294 vectp -= 1 + imgp->args->argc + 1 + imgp->args->envc + 1;
295 vectp = (char **)STACKALIGN(vectp);
296
297 /* vectp also becomes our initial stack base. */
298 *stack_base = (uintptr_t)vectp;
299
300 stringp = imgp->args->begin_argv;
301 argc = imgp->args->argc;
302 envc = imgp->args->envc;
303
304 /* Copy out strings - arguments and environment. */
305 error = copyout(stringp, (void *)ustringp,
306 ARG_MAX - imgp->args->stringspace);
307 if (error != 0)
308 return (error);
309
310 /* Fill in "ps_strings" struct for ps, w, etc. */
311 if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 ||
312 suword(&arginfo->ps_nargvstr, argc) != 0)
313 return (EFAULT);
314
315 if (suword(vectp++, argc) != 0)
316 return (EFAULT);
317
318 /* Fill in argument portion of vector table. */
319 for (; argc > 0; --argc) {
320 if (suword(vectp++, ustringp) != 0)
321 return (EFAULT);
322 while (*stringp++ != 0)
323 ustringp++;
324 ustringp++;
325 }
326
327 /* A null vector table pointer separates the argp's from the envp's. */
328 if (suword(vectp++, 0) != 0)
329 return (EFAULT);
330
331 if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 ||
332 suword(&arginfo->ps_nenvstr, envc) != 0)
333 return (EFAULT);
334
335 /* Fill in environment portion of vector table. */
336 for (; envc > 0; --envc) {
337 if (suword(vectp++, ustringp) != 0)
338 return (EFAULT);
339 while (*stringp++ != 0)
340 ustringp++;
341 ustringp++;
342 }
343
344 /* The end of the vector table is a null pointer. */
345 if (suword(vectp, 0) != 0)
346 return (EFAULT);
347
348 if (imgp->auxargs) {
349 vectp++;
350 error = imgp->sysent->sv_copyout_auxargs(imgp,
351 (uintptr_t)vectp);
352 if (error != 0)
353 return (error);
354 }
355
356 return (0);
357 }
358
359 /*
360 * Reset registers to default values on exec.
361 */
362 static void
363 linux_exec_setregs(struct thread *td, struct image_params *imgp,
364 uintptr_t stack)
365 {
366 struct trapframe *regs = td->td_frame;
367 struct pcb *pcb = td->td_pcb;
368
369 /* LINUXTODO: validate */
370 LIN_SDT_PROBE0(sysvec, linux_exec_setregs, todo);
371
372 memset(regs, 0, sizeof(*regs));
373 /* glibc start.S registers function pointer in x0 with atexit. */
374 regs->tf_sp = stack;
375 #if 0 /* LINUXTODO: See if this is used. */
376 regs->tf_lr = imgp->entry_addr;
377 #else
378 regs->tf_lr = 0xffffffffffffffff;
379 #endif
380 regs->tf_elr = imgp->entry_addr;
381
382 pcb->pcb_tpidr_el0 = 0;
383 pcb->pcb_tpidrro_el0 = 0;
384 WRITE_SPECIALREG(tpidrro_el0, 0);
385 WRITE_SPECIALREG(tpidr_el0, 0);
386
387 #ifdef VFP
388 vfp_reset_state(td, pcb);
389 #endif
390
391 /*
392 * Clear debug register state. It is not applicable to the new process.
393 */
394 bzero(&pcb->pcb_dbg_regs, sizeof(pcb->pcb_dbg_regs));
395 }
396
397 int
398 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
399 {
400 struct l_sigframe *frame;
401 ucontext_t uc;
402 struct trapframe *tf;
403 int error;
404
405 tf = td->td_frame;
406 frame = (struct l_sigframe *)tf->tf_sp;
407
408 if (copyin((void *)&frame->uc, &uc, sizeof(uc)))
409 return (EFAULT);
410
411 error = set_mcontext(td, &uc.uc_mcontext);
412 if (error != 0)
413 return (error);
414
415 /* Restore signal mask. */
416 kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
417
418 return (EJUSTRETURN);
419 }
420
421 static void
422 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
423 {
424 struct thread *td;
425 struct proc *p;
426 struct trapframe *tf;
427 struct l_sigframe *fp, *frame;
428 struct l_fpsimd_context *fpsimd;
429 struct l_esr_context *esr;
430 l_stack_t uc_stack;
431 ucontext_t uc;
432 uint8_t *scr;
433 struct sigacts *psp;
434 int onstack, sig, issiginfo;
435
436 td = curthread;
437 p = td->td_proc;
438 PROC_LOCK_ASSERT(p, MA_OWNED);
439
440 sig = ksi->ksi_signo;
441 psp = p->p_sigacts;
442 mtx_assert(&psp->ps_mtx, MA_OWNED);
443
444 tf = td->td_frame;
445 onstack = sigonstack(tf->tf_sp);
446 issiginfo = SIGISMEMBER(psp->ps_siginfo, sig);
447
448 CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
449 catcher, sig);
450
451 /* Allocate and validate space for the signal handler context. */
452 if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack &&
453 SIGISMEMBER(psp->ps_sigonstack, sig)) {
454 fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
455 td->td_sigstk.ss_size);
456 #if defined(COMPAT_43)
457 td->td_sigstk.ss_flags |= SS_ONSTACK;
458 #endif
459 } else {
460 fp = (struct l_sigframe *)td->td_frame->tf_sp;
461 }
462
463 /* Make room, keeping the stack aligned */
464 fp--;
465 fp = (struct l_sigframe *)STACKALIGN(fp);
466
467 get_mcontext(td, &uc.uc_mcontext, 0);
468 uc.uc_sigmask = *mask;
469
470 uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
471 uc_stack.ss_size = td->td_sigstk.ss_size;
472 uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ?
473 (onstack ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
474 mtx_unlock(&psp->ps_mtx);
475 PROC_UNLOCK(td->td_proc);
476
477 /* Fill in the frame to copy out */
478 frame = malloc(sizeof(*frame), M_LINUX, M_WAITOK | M_ZERO);
479
480 memcpy(&frame->sf.sf_uc.uc_sc.regs, tf->tf_x, sizeof(tf->tf_x));
481 frame->sf.sf_uc.uc_sc.regs[30] = tf->tf_lr;
482 frame->sf.sf_uc.uc_sc.sp = tf->tf_sp;
483 frame->sf.sf_uc.uc_sc.pc = tf->tf_lr;
484 frame->sf.sf_uc.uc_sc.pstate = tf->tf_spsr;
485 frame->sf.sf_uc.uc_sc.fault_address = (register_t)ksi->ksi_addr;
486
487 /* Stack frame for unwinding */
488 frame->fp = tf->tf_x[29];
489 frame->lr = tf->tf_lr;
490
491 /* Translate the signal. */
492 sig = bsd_to_linux_signal(sig);
493 siginfo_to_lsiginfo(&ksi->ksi_info, &frame->sf.sf_si, sig);
494 bsd_to_linux_sigset(mask, &frame->sf.sf_uc.uc_sigmask);
495
496 /*
497 * Prepare fpsimd & esr. Does not check sizes, as
498 * __reserved is big enougth.
499 */
500 scr = (uint8_t *)&frame->sf.sf_uc.uc_sc.__reserved;
501 #ifdef VFP
502 fpsimd = (struct l_fpsimd_context *) scr;
503 fpsimd->head.magic = L_FPSIMD_MAGIC;
504 fpsimd->head.size = sizeof(struct l_fpsimd_context);
505 fpsimd->fpsr = uc.uc_mcontext.mc_fpregs.fp_sr;
506 fpsimd->fpcr = uc.uc_mcontext.mc_fpregs.fp_cr;
507
508 memcpy(fpsimd->vregs, &uc.uc_mcontext.mc_fpregs.fp_q,
509 sizeof(uc.uc_mcontext.mc_fpregs.fp_q));
510 scr += roundup(sizeof(struct l_fpsimd_context), 16);
511 #endif
512 if (ksi->ksi_addr != 0) {
513 esr = (struct l_esr_context *) scr;
514 esr->head.magic = L_ESR_MAGIC;
515 esr->head.size = sizeof(struct l_esr_context);
516 esr->esr = tf->tf_esr;
517 }
518
519 memcpy(&frame->sf.sf_uc.uc_stack, &uc_stack, sizeof(uc_stack));
520 memcpy(&frame->uc, &uc, sizeof(uc));
521
522 /* Copy the sigframe out to the user's stack. */
523 if (copyout(frame, fp, sizeof(*fp)) != 0) {
524 /* Process has trashed its stack. Kill it. */
525 free(frame, M_LINUX);
526 CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
527 PROC_LOCK(p);
528 sigexit(td, SIGILL);
529 }
530 free(frame, M_LINUX);
531
532 tf->tf_x[0]= sig;
533 if (issiginfo) {
534 tf->tf_x[1] = (register_t)&fp->sf.sf_si;
535 tf->tf_x[2] = (register_t)&fp->sf.sf_uc;
536 } else {
537 tf->tf_x[1] = 0;
538 tf->tf_x[2] = 0;
539 }
540 tf->tf_x[8] = (register_t)catcher;
541 tf->tf_sp = (register_t)fp;
542 tf->tf_elr = (register_t)linux_vdso_sigcode;
543
544 CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr,
545 tf->tf_sp);
546
547 PROC_LOCK(p);
548 mtx_lock(&psp->ps_mtx);
549 }
550
551 struct sysentvec elf_linux_sysvec = {
552 .sv_size = LINUX_SYS_MAXSYSCALL,
553 .sv_table = linux_sysent,
554 .sv_fixup = linux_elf_fixup,
555 .sv_sendsig = linux_rt_sendsig,
556 .sv_sigcode = &_binary_linux_vdso_so_o_start,
557 .sv_szsigcode = &linux_szsigcode,
558 .sv_name = "Linux ELF64",
559 .sv_coredump = elf64_coredump,
560 .sv_elf_core_osabi = ELFOSABI_NONE,
561 .sv_elf_core_abi_vendor = LINUX_ABI_VENDOR,
562 .sv_elf_core_prepare_notes = linux64_prepare_notes,
563 .sv_imgact_try = linux_exec_imgact_try,
564 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
565 .sv_minuser = VM_MIN_ADDRESS,
566 .sv_maxuser = VM_MAXUSER_ADDRESS,
567 .sv_usrstack = LINUX_USRSTACK,
568 .sv_psstrings = LINUX_PS_STRINGS,
569 .sv_psstringssz = sizeof(struct ps_strings),
570 .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE,
571 .sv_copyout_auxargs = linux_copyout_auxargs,
572 .sv_copyout_strings = linux_copyout_strings,
573 .sv_setregs = linux_exec_setregs,
574 .sv_fixlimit = NULL,
575 .sv_maxssiz = NULL,
576 .sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP | SV_SIG_DISCIGN |
577 SV_SIG_WAITNDQ | SV_TIMEKEEP,
578 .sv_set_syscall_retval = linux_set_syscall_retval,
579 .sv_fetch_syscall_args = linux_fetch_syscall_args,
580 .sv_syscallnames = linux_syscallnames,
581 .sv_shared_page_base = LINUX_SHAREDPAGE,
582 .sv_shared_page_len = PAGE_SIZE,
583 .sv_schedtail = linux_schedtail,
584 .sv_thread_detach = linux_thread_detach,
585 .sv_trap = NULL,
586 .sv_hwcap = &elf_hwcap,
587 .sv_hwcap2 = &elf_hwcap2,
588 .sv_onexec = linux_on_exec_vmspace,
589 .sv_onexit = linux_on_exit,
590 .sv_ontdexit = linux_thread_dtor,
591 .sv_setid_allowed = &linux_setid_allowed_query,
592 };
593
594 static int
595 linux_on_exec_vmspace(struct proc *p, struct image_params *imgp)
596 {
597 int error;
598
599 error = linux_map_vdso(p, linux_vdso_obj, linux_vdso_base,
600 LINUX_VDSOPAGE_SIZE, imgp);
601 if (error == 0)
602 linux_on_exec(p, imgp);
603 return (error);
604 }
605
606 /*
607 * linux_vdso_install() and linux_exec_sysvec_init() must be called
608 * after exec_sysvec_init() which is SI_SUB_EXEC (SI_ORDER_ANY).
609 */
610 static void
611 linux_exec_sysvec_init(void *param)
612 {
613 l_uintptr_t *ktimekeep_base;
614 struct sysentvec *sv;
615 ptrdiff_t tkoff;
616
617 sv = param;
618 /* Fill timekeep_base */
619 exec_sysvec_init(sv);
620
621 tkoff = kern_timekeep_base - linux_vdso_base;
622 ktimekeep_base = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
623 *ktimekeep_base = sv->sv_shared_page_base + sv->sv_timekeep_offset;
624 }
625 SYSINIT(elf_linux_exec_sysvec_init, SI_SUB_EXEC + 1, SI_ORDER_ANY,
626 linux_exec_sysvec_init, &elf_linux_sysvec);
627
628 static void
629 linux_vdso_install(const void *param)
630 {
631 char *vdso_start = &_binary_linux_vdso_so_o_start;
632 char *vdso_end = &_binary_linux_vdso_so_o_end;
633
634 linux_szsigcode = vdso_end - vdso_start;
635 MPASS(linux_szsigcode <= LINUX_VDSOPAGE_SIZE);
636
637 linux_vdso_base = LINUX_VDSOPAGE;
638
639 __elfN(linux_vdso_fixup)(vdso_start, linux_vdso_base);
640
641 linux_vdso_obj = __elfN(linux_shared_page_init)
642 (&linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
643 bcopy(vdso_start, linux_vdso_mapping, linux_szsigcode);
644
645 linux_vdso_reloc(linux_vdso_mapping, linux_vdso_base);
646 }
647 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC + 1, SI_ORDER_FIRST,
648 linux_vdso_install, NULL);
649
650 static void
651 linux_vdso_deinstall(const void *param)
652 {
653
654 __elfN(linux_shared_page_fini)(linux_vdso_obj,
655 linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
656 }
657 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
658 linux_vdso_deinstall, NULL);
659
660 static void
661 linux_vdso_reloc(char *mapping, Elf_Addr offset)
662 {
663 Elf_Size rtype, symidx;
664 const Elf_Rela *rela;
665 const Elf_Shdr *shdr;
666 const Elf_Ehdr *ehdr;
667 Elf_Addr *where;
668 Elf_Addr addr, addend;
669 int i, relacnt;
670
671 MPASS(offset != 0);
672
673 relacnt = 0;
674 ehdr = (const Elf_Ehdr *)mapping;
675 shdr = (const Elf_Shdr *)(mapping + ehdr->e_shoff);
676 for (i = 0; i < ehdr->e_shnum; i++)
677 {
678 switch (shdr[i].sh_type) {
679 case SHT_REL:
680 printf("Linux Aarch64 vDSO: unexpected Rel section\n");
681 break;
682 case SHT_RELA:
683 rela = (const Elf_Rela *)(mapping + shdr[i].sh_offset);
684 relacnt = shdr[i].sh_size / sizeof(*rela);
685 }
686 }
687
688 for (i = 0; i < relacnt; i++, rela++) {
689 where = (Elf_Addr *)(mapping + rela->r_offset);
690 addend = rela->r_addend;
691 rtype = ELF_R_TYPE(rela->r_info);
692 symidx = ELF_R_SYM(rela->r_info);
693
694 switch (rtype) {
695 case R_AARCH64_NONE: /* none */
696 break;
697
698 case R_AARCH64_RELATIVE: /* B + A */
699 addr = (Elf_Addr)(mapping + addend);
700 if (*where != addr)
701 *where = addr;
702 break;
703 default:
704 printf("Linux Aarch64 vDSO: unexpected relocation type %ld, "
705 "symbol index %ld\n", rtype, symidx);
706 }
707 }
708 }
709
710 static char GNU_ABI_VENDOR[] = "GNU";
711 static int GNU_ABI_LINUX = 0;
712
713 /* LINUXTODO: deduplicate */
714 static bool
715 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
716 {
717 const Elf32_Word *desc;
718 uintptr_t p;
719
720 p = (uintptr_t)(note + 1);
721 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
722
723 desc = (const Elf32_Word *)p;
724 if (desc[0] != GNU_ABI_LINUX)
725 return (false);
726
727 *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]);
728 return (true);
729 }
730
731 static Elf_Brandnote linux64_brandnote = {
732 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
733 .hdr.n_descsz = 16,
734 .hdr.n_type = 1,
735 .vendor = GNU_ABI_VENDOR,
736 .flags = BN_TRANSLATE_OSREL,
737 .trans_osrel = linux_trans_osrel
738 };
739
740 static Elf64_Brandinfo linux_glibc2brand = {
741 .brand = ELFOSABI_LINUX,
742 .machine = EM_AARCH64,
743 .compat_3_brand = "Linux",
744 .emul_path = linux_emul_path,
745 .interp_path = "/lib64/ld-linux-x86-64.so.2",
746 .sysvec = &elf_linux_sysvec,
747 .interp_newpath = NULL,
748 .brand_note = &linux64_brandnote,
749 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
750 };
751
752 Elf64_Brandinfo *linux_brandlist[] = {
753 &linux_glibc2brand,
754 NULL
755 };
756
757 static int
758 linux64_elf_modevent(module_t mod, int type, void *data)
759 {
760 Elf64_Brandinfo **brandinfo;
761 struct linux_ioctl_handler**lihp;
762 int error;
763
764 error = 0;
765 switch(type) {
766 case MOD_LOAD:
767 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
768 ++brandinfo)
769 if (elf64_insert_brand_entry(*brandinfo) < 0)
770 error = EINVAL;
771 if (error == 0) {
772 SET_FOREACH(lihp, linux_ioctl_handler_set)
773 linux_ioctl_register_handler(*lihp);
774 stclohz = (stathz ? stathz : hz);
775 if (bootverbose)
776 printf("Linux arm64 ELF exec handler installed\n");
777 }
778 break;
779 case MOD_UNLOAD:
780 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
781 ++brandinfo)
782 if (elf64_brand_inuse(*brandinfo))
783 error = EBUSY;
784 if (error == 0) {
785 for (brandinfo = &linux_brandlist[0];
786 *brandinfo != NULL; ++brandinfo)
787 if (elf64_remove_brand_entry(*brandinfo) < 0)
788 error = EINVAL;
789 }
790 if (error == 0) {
791 SET_FOREACH(lihp, linux_ioctl_handler_set)
792 linux_ioctl_unregister_handler(*lihp);
793 if (bootverbose)
794 printf("Linux arm64 ELF exec handler removed\n");
795 } else
796 printf("Could not deinstall Linux arm64 ELF interpreter entry\n");
797 break;
798 default:
799 return (EOPNOTSUPP);
800 }
801 return (error);
802 }
803
804 static moduledata_t linux64_elf_mod = {
805 "linux64elf",
806 linux64_elf_modevent,
807 0
808 };
809
810 DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
811 MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1);
812 FEATURE(linux64, "AArch64 Linux 64bit support");
Cache object: 0dbeae146e813f6813f16db14be588dc
|