FreeBSD/Linux Kernel Cross Reference
sys/kern/imgact_elf.c
1 /*-
2 * Copyright (c) 2000 David O'Brien
3 * Copyright (c) 1995-1996 Søren Schmidt
4 * Copyright (c) 1996 Peter Wemm
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer
12 * in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD: releng/9.1/sys/kern/imgact_elf.c 238679 2012-07-22 00:44:22Z kib $");
33
34 #include "opt_capsicum.h"
35 #include "opt_compat.h"
36 #include "opt_core.h"
37
38 #include <sys/param.h>
39 #include <sys/capability.h>
40 #include <sys/exec.h>
41 #include <sys/fcntl.h>
42 #include <sys/imgact.h>
43 #include <sys/imgact_elf.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/mutex.h>
49 #include <sys/mman.h>
50 #include <sys/namei.h>
51 #include <sys/pioctl.h>
52 #include <sys/proc.h>
53 #include <sys/procfs.h>
54 #include <sys/racct.h>
55 #include <sys/resourcevar.h>
56 #include <sys/sf_buf.h>
57 #include <sys/smp.h>
58 #include <sys/systm.h>
59 #include <sys/signalvar.h>
60 #include <sys/stat.h>
61 #include <sys/sx.h>
62 #include <sys/syscall.h>
63 #include <sys/sysctl.h>
64 #include <sys/sysent.h>
65 #include <sys/vnode.h>
66 #include <sys/syslog.h>
67 #include <sys/eventhandler.h>
68
69 #include <net/zlib.h>
70
71 #include <vm/vm.h>
72 #include <vm/vm_kern.h>
73 #include <vm/vm_param.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_object.h>
77 #include <vm/vm_extern.h>
78
79 #include <machine/elf.h>
80 #include <machine/md_var.h>
81
82 #define OLD_EI_BRAND 8
83
84 static int __elfN(check_header)(const Elf_Ehdr *hdr);
85 static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
86 const char *interp, int interp_name_len, int32_t *osrel);
87 static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
88 u_long *entry, size_t pagesize);
89 static int __elfN(load_section)(struct vmspace *vmspace, vm_object_t object,
90 vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
91 vm_prot_t prot, size_t pagesize);
92 static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
93 static boolean_t __elfN(freebsd_trans_osrel)(const Elf_Note *note,
94 int32_t *osrel);
95 static boolean_t kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel);
96 static boolean_t __elfN(check_note)(struct image_params *imgp,
97 Elf_Brandnote *checknote, int32_t *osrel);
98 static vm_prot_t __elfN(trans_prot)(Elf_Word);
99 static Elf_Word __elfN(untrans_prot)(vm_prot_t);
100
101 SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0,
102 "");
103
104 #ifdef COMPRESS_USER_CORES
105 static int compress_core(gzFile, char *, char *, unsigned int,
106 struct thread * td);
107 #define CORE_BUF_SIZE (16 * 1024)
108 #endif
109
110 int __elfN(fallback_brand) = -1;
111 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
112 fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0,
113 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
114 TUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand",
115 &__elfN(fallback_brand));
116
117 static int elf_legacy_coredump = 0;
118 SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW,
119 &elf_legacy_coredump, 0, "");
120
121 int __elfN(nxstack) =
122 #if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */
123 1;
124 #else
125 0;
126 #endif
127 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
128 nxstack, CTLFLAG_RW, &__elfN(nxstack), 0,
129 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable non-executable stack");
130
131 #if __ELF_WORD_SIZE == 32
132 #if defined(__amd64__) || defined(__ia64__)
133 int i386_read_exec = 0;
134 SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0,
135 "enable execution from readable segments");
136 #endif
137 #endif
138
139 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
140
141 #define trunc_page_ps(va, ps) ((va) & ~(ps - 1))
142 #define round_page_ps(va, ps) (((va) + (ps - 1)) & ~(ps - 1))
143 #define aligned(a, t) (trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a))
144
145 static const char FREEBSD_ABI_VENDOR[] = "FreeBSD";
146
147 Elf_Brandnote __elfN(freebsd_brandnote) = {
148 .hdr.n_namesz = sizeof(FREEBSD_ABI_VENDOR),
149 .hdr.n_descsz = sizeof(int32_t),
150 .hdr.n_type = 1,
151 .vendor = FREEBSD_ABI_VENDOR,
152 .flags = BN_TRANSLATE_OSREL,
153 .trans_osrel = __elfN(freebsd_trans_osrel)
154 };
155
156 static boolean_t
157 __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel)
158 {
159 uintptr_t p;
160
161 p = (uintptr_t)(note + 1);
162 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
163 *osrel = *(const int32_t *)(p);
164
165 return (TRUE);
166 }
167
168 static const char GNU_ABI_VENDOR[] = "GNU";
169 static int GNU_KFREEBSD_ABI_DESC = 3;
170
171 Elf_Brandnote __elfN(kfreebsd_brandnote) = {
172 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
173 .hdr.n_descsz = 16, /* XXX at least 16 */
174 .hdr.n_type = 1,
175 .vendor = GNU_ABI_VENDOR,
176 .flags = BN_TRANSLATE_OSREL,
177 .trans_osrel = kfreebsd_trans_osrel
178 };
179
180 static boolean_t
181 kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel)
182 {
183 const Elf32_Word *desc;
184 uintptr_t p;
185
186 p = (uintptr_t)(note + 1);
187 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
188
189 desc = (const Elf32_Word *)p;
190 if (desc[0] != GNU_KFREEBSD_ABI_DESC)
191 return (FALSE);
192
193 /*
194 * Debian GNU/kFreeBSD embed the earliest compatible kernel version
195 * (__FreeBSD_version: <major><two digit minor>Rxx) in the LSB way.
196 */
197 *osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3];
198
199 return (TRUE);
200 }
201
202 int
203 __elfN(insert_brand_entry)(Elf_Brandinfo *entry)
204 {
205 int i;
206
207 for (i = 0; i < MAX_BRANDS; i++) {
208 if (elf_brand_list[i] == NULL) {
209 elf_brand_list[i] = entry;
210 break;
211 }
212 }
213 if (i == MAX_BRANDS) {
214 printf("WARNING: %s: could not insert brandinfo entry: %p\n",
215 __func__, entry);
216 return (-1);
217 }
218 return (0);
219 }
220
221 int
222 __elfN(remove_brand_entry)(Elf_Brandinfo *entry)
223 {
224 int i;
225
226 for (i = 0; i < MAX_BRANDS; i++) {
227 if (elf_brand_list[i] == entry) {
228 elf_brand_list[i] = NULL;
229 break;
230 }
231 }
232 if (i == MAX_BRANDS)
233 return (-1);
234 return (0);
235 }
236
237 int
238 __elfN(brand_inuse)(Elf_Brandinfo *entry)
239 {
240 struct proc *p;
241 int rval = FALSE;
242
243 sx_slock(&allproc_lock);
244 FOREACH_PROC_IN_SYSTEM(p) {
245 if (p->p_sysent == entry->sysvec) {
246 rval = TRUE;
247 break;
248 }
249 }
250 sx_sunlock(&allproc_lock);
251
252 return (rval);
253 }
254
255 static Elf_Brandinfo *
256 __elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
257 int interp_name_len, int32_t *osrel)
258 {
259 const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
260 Elf_Brandinfo *bi;
261 boolean_t ret;
262 int i;
263
264 /*
265 * We support four types of branding -- (1) the ELF EI_OSABI field
266 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
267 * branding w/in the ELF header, (3) path of the `interp_path'
268 * field, and (4) the ".note.ABI-tag" ELF section.
269 */
270
271 /* Look for an ".note.ABI-tag" ELF section */
272 for (i = 0; i < MAX_BRANDS; i++) {
273 bi = elf_brand_list[i];
274 if (bi == NULL)
275 continue;
276 if (hdr->e_machine == bi->machine && (bi->flags &
277 (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) {
278 ret = __elfN(check_note)(imgp, bi->brand_note, osrel);
279 if (ret)
280 return (bi);
281 }
282 }
283
284 /* If the executable has a brand, search for it in the brand list. */
285 for (i = 0; i < MAX_BRANDS; i++) {
286 bi = elf_brand_list[i];
287 if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
288 continue;
289 if (hdr->e_machine == bi->machine &&
290 (hdr->e_ident[EI_OSABI] == bi->brand ||
291 strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
292 bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0))
293 return (bi);
294 }
295
296 /* Lacking a known brand, search for a recognized interpreter. */
297 if (interp != NULL) {
298 for (i = 0; i < MAX_BRANDS; i++) {
299 bi = elf_brand_list[i];
300 if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
301 continue;
302 if (hdr->e_machine == bi->machine &&
303 /* ELF image p_filesz includes terminating zero */
304 strlen(bi->interp_path) + 1 == interp_name_len &&
305 strncmp(interp, bi->interp_path, interp_name_len)
306 == 0)
307 return (bi);
308 }
309 }
310
311 /* Lacking a recognized interpreter, try the default brand */
312 for (i = 0; i < MAX_BRANDS; i++) {
313 bi = elf_brand_list[i];
314 if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
315 continue;
316 if (hdr->e_machine == bi->machine &&
317 __elfN(fallback_brand) == bi->brand)
318 return (bi);
319 }
320 return (NULL);
321 }
322
323 static int
324 __elfN(check_header)(const Elf_Ehdr *hdr)
325 {
326 Elf_Brandinfo *bi;
327 int i;
328
329 if (!IS_ELF(*hdr) ||
330 hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
331 hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
332 hdr->e_ident[EI_VERSION] != EV_CURRENT ||
333 hdr->e_phentsize != sizeof(Elf_Phdr) ||
334 hdr->e_version != ELF_TARG_VER)
335 return (ENOEXEC);
336
337 /*
338 * Make sure we have at least one brand for this machine.
339 */
340
341 for (i = 0; i < MAX_BRANDS; i++) {
342 bi = elf_brand_list[i];
343 if (bi != NULL && bi->machine == hdr->e_machine)
344 break;
345 }
346 if (i == MAX_BRANDS)
347 return (ENOEXEC);
348
349 return (0);
350 }
351
352 static int
353 __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
354 vm_offset_t start, vm_offset_t end, vm_prot_t prot)
355 {
356 struct sf_buf *sf;
357 int error;
358 vm_offset_t off;
359
360 /*
361 * Create the page if it doesn't exist yet. Ignore errors.
362 */
363 vm_map_lock(map);
364 vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
365 VM_PROT_ALL, VM_PROT_ALL, 0);
366 vm_map_unlock(map);
367
368 /*
369 * Find the page from the underlying object.
370 */
371 if (object) {
372 sf = vm_imgact_map_page(object, offset);
373 if (sf == NULL)
374 return (KERN_FAILURE);
375 off = offset - trunc_page(offset);
376 error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
377 end - start);
378 vm_imgact_unmap_page(sf);
379 if (error) {
380 return (KERN_FAILURE);
381 }
382 }
383
384 return (KERN_SUCCESS);
385 }
386
387 static int
388 __elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
389 vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow)
390 {
391 struct sf_buf *sf;
392 vm_offset_t off;
393 vm_size_t sz;
394 int error, rv;
395
396 if (start != trunc_page(start)) {
397 rv = __elfN(map_partial)(map, object, offset, start,
398 round_page(start), prot);
399 if (rv)
400 return (rv);
401 offset += round_page(start) - start;
402 start = round_page(start);
403 }
404 if (end != round_page(end)) {
405 rv = __elfN(map_partial)(map, object, offset +
406 trunc_page(end) - start, trunc_page(end), end, prot);
407 if (rv)
408 return (rv);
409 end = trunc_page(end);
410 }
411 if (end > start) {
412 if (offset & PAGE_MASK) {
413 /*
414 * The mapping is not page aligned. This means we have
415 * to copy the data. Sigh.
416 */
417 rv = vm_map_find(map, NULL, 0, &start, end - start,
418 FALSE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0);
419 if (rv)
420 return (rv);
421 if (object == NULL)
422 return (KERN_SUCCESS);
423 for (; start < end; start += sz) {
424 sf = vm_imgact_map_page(object, offset);
425 if (sf == NULL)
426 return (KERN_FAILURE);
427 off = offset - trunc_page(offset);
428 sz = end - start;
429 if (sz > PAGE_SIZE - off)
430 sz = PAGE_SIZE - off;
431 error = copyout((caddr_t)sf_buf_kva(sf) + off,
432 (caddr_t)start, sz);
433 vm_imgact_unmap_page(sf);
434 if (error) {
435 return (KERN_FAILURE);
436 }
437 offset += sz;
438 }
439 rv = KERN_SUCCESS;
440 } else {
441 vm_object_reference(object);
442 vm_map_lock(map);
443 rv = vm_map_insert(map, object, offset, start, end,
444 prot, VM_PROT_ALL, cow);
445 vm_map_unlock(map);
446 if (rv != KERN_SUCCESS)
447 vm_object_deallocate(object);
448 }
449 return (rv);
450 } else {
451 return (KERN_SUCCESS);
452 }
453 }
454
455 static int
456 __elfN(load_section)(struct vmspace *vmspace,
457 vm_object_t object, vm_offset_t offset,
458 caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
459 size_t pagesize)
460 {
461 struct sf_buf *sf;
462 size_t map_len;
463 vm_offset_t map_addr;
464 int error, rv, cow;
465 size_t copy_len;
466 vm_offset_t file_addr;
467
468 /*
469 * It's necessary to fail if the filsz + offset taken from the
470 * header is greater than the actual file pager object's size.
471 * If we were to allow this, then the vm_map_find() below would
472 * walk right off the end of the file object and into the ether.
473 *
474 * While I'm here, might as well check for something else that
475 * is invalid: filsz cannot be greater than memsz.
476 */
477 if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size ||
478 filsz > memsz) {
479 uprintf("elf_load_section: truncated ELF file\n");
480 return (ENOEXEC);
481 }
482
483 map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
484 file_addr = trunc_page_ps(offset, pagesize);
485
486 /*
487 * We have two choices. We can either clear the data in the last page
488 * of an oversized mapping, or we can start the anon mapping a page
489 * early and copy the initialized data into that first page. We
490 * choose the second..
491 */
492 if (memsz > filsz)
493 map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr;
494 else
495 map_len = round_page_ps(offset + filsz, pagesize) - file_addr;
496
497 if (map_len != 0) {
498 /* cow flags: don't dump readonly sections in core */
499 cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
500 (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
501
502 rv = __elfN(map_insert)(&vmspace->vm_map,
503 object,
504 file_addr, /* file offset */
505 map_addr, /* virtual start */
506 map_addr + map_len,/* virtual end */
507 prot,
508 cow);
509 if (rv != KERN_SUCCESS)
510 return (EINVAL);
511
512 /* we can stop now if we've covered it all */
513 if (memsz == filsz) {
514 return (0);
515 }
516 }
517
518
519 /*
520 * We have to get the remaining bit of the file into the first part
521 * of the oversized map segment. This is normally because the .data
522 * segment in the file is extended to provide bss. It's a neat idea
523 * to try and save a page, but it's a pain in the behind to implement.
524 */
525 copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
526 map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
527 map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) -
528 map_addr;
529
530 /* This had damn well better be true! */
531 if (map_len != 0) {
532 rv = __elfN(map_insert)(&vmspace->vm_map, NULL, 0, map_addr,
533 map_addr + map_len, VM_PROT_ALL, 0);
534 if (rv != KERN_SUCCESS) {
535 return (EINVAL);
536 }
537 }
538
539 if (copy_len != 0) {
540 vm_offset_t off;
541
542 sf = vm_imgact_map_page(object, offset + filsz);
543 if (sf == NULL)
544 return (EIO);
545
546 /* send the page fragment to user space */
547 off = trunc_page_ps(offset + filsz, pagesize) -
548 trunc_page(offset + filsz);
549 error = copyout((caddr_t)sf_buf_kva(sf) + off,
550 (caddr_t)map_addr, copy_len);
551 vm_imgact_unmap_page(sf);
552 if (error) {
553 return (error);
554 }
555 }
556
557 /*
558 * set it to the specified protection.
559 * XXX had better undo the damage from pasting over the cracks here!
560 */
561 vm_map_protect(&vmspace->vm_map, trunc_page(map_addr),
562 round_page(map_addr + map_len), prot, FALSE);
563
564 return (0);
565 }
566
567 /*
568 * Load the file "file" into memory. It may be either a shared object
569 * or an executable.
570 *
571 * The "addr" reference parameter is in/out. On entry, it specifies
572 * the address where a shared object should be loaded. If the file is
573 * an executable, this value is ignored. On exit, "addr" specifies
574 * where the file was actually loaded.
575 *
576 * The "entry" reference parameter is out only. On exit, it specifies
577 * the entry point for the loaded file.
578 */
579 static int
580 __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
581 u_long *entry, size_t pagesize)
582 {
583 struct {
584 struct nameidata nd;
585 struct vattr attr;
586 struct image_params image_params;
587 } *tempdata;
588 const Elf_Ehdr *hdr = NULL;
589 const Elf_Phdr *phdr = NULL;
590 struct nameidata *nd;
591 struct vmspace *vmspace = p->p_vmspace;
592 struct vattr *attr;
593 struct image_params *imgp;
594 vm_prot_t prot;
595 u_long rbase;
596 u_long base_addr = 0;
597 int vfslocked, error, i, numsegs;
598
599 #ifdef CAPABILITY_MODE
600 /*
601 * XXXJA: This check can go away once we are sufficiently confident
602 * that the checks in namei() are correct.
603 */
604 if (IN_CAPABILITY_MODE(curthread))
605 return (ECAPMODE);
606 #endif
607
608 tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
609 nd = &tempdata->nd;
610 attr = &tempdata->attr;
611 imgp = &tempdata->image_params;
612
613 /*
614 * Initialize part of the common data
615 */
616 imgp->proc = p;
617 imgp->attr = attr;
618 imgp->firstpage = NULL;
619 imgp->image_header = NULL;
620 imgp->object = NULL;
621 imgp->execlabel = NULL;
622
623 NDINIT(nd, LOOKUP, MPSAFE|LOCKLEAF|FOLLOW, UIO_SYSSPACE, file,
624 curthread);
625 vfslocked = 0;
626 if ((error = namei(nd)) != 0) {
627 nd->ni_vp = NULL;
628 goto fail;
629 }
630 vfslocked = NDHASGIANT(nd);
631 NDFREE(nd, NDF_ONLY_PNBUF);
632 imgp->vp = nd->ni_vp;
633
634 /*
635 * Check permissions, modes, uid, etc on the file, and "open" it.
636 */
637 error = exec_check_permissions(imgp);
638 if (error)
639 goto fail;
640
641 error = exec_map_first_page(imgp);
642 if (error)
643 goto fail;
644
645 /*
646 * Also make certain that the interpreter stays the same, so set
647 * its VV_TEXT flag, too.
648 */
649 nd->ni_vp->v_vflag |= VV_TEXT;
650
651 imgp->object = nd->ni_vp->v_object;
652
653 hdr = (const Elf_Ehdr *)imgp->image_header;
654 if ((error = __elfN(check_header)(hdr)) != 0)
655 goto fail;
656 if (hdr->e_type == ET_DYN)
657 rbase = *addr;
658 else if (hdr->e_type == ET_EXEC)
659 rbase = 0;
660 else {
661 error = ENOEXEC;
662 goto fail;
663 }
664
665 /* Only support headers that fit within first page for now */
666 /* (multiplication of two Elf_Half fields will not overflow) */
667 if ((hdr->e_phoff > PAGE_SIZE) ||
668 (hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE - hdr->e_phoff) {
669 error = ENOEXEC;
670 goto fail;
671 }
672
673 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
674 if (!aligned(phdr, Elf_Addr)) {
675 error = ENOEXEC;
676 goto fail;
677 }
678
679 for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
680 if (phdr[i].p_type == PT_LOAD && phdr[i].p_memsz != 0) {
681 /* Loadable segment */
682 prot = __elfN(trans_prot)(phdr[i].p_flags);
683 if ((error = __elfN(load_section)(vmspace,
684 imgp->object, phdr[i].p_offset,
685 (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
686 phdr[i].p_memsz, phdr[i].p_filesz, prot,
687 pagesize)) != 0)
688 goto fail;
689 /*
690 * Establish the base address if this is the
691 * first segment.
692 */
693 if (numsegs == 0)
694 base_addr = trunc_page(phdr[i].p_vaddr +
695 rbase);
696 numsegs++;
697 }
698 }
699 *addr = base_addr;
700 *entry = (unsigned long)hdr->e_entry + rbase;
701
702 fail:
703 if (imgp->firstpage)
704 exec_unmap_first_page(imgp);
705
706 if (nd->ni_vp)
707 vput(nd->ni_vp);
708
709 VFS_UNLOCK_GIANT(vfslocked);
710 free(tempdata, M_TEMP);
711
712 return (error);
713 }
714
715 static int
716 __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
717 {
718 const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
719 const Elf_Phdr *phdr;
720 Elf_Auxargs *elf_auxargs;
721 struct vmspace *vmspace;
722 vm_prot_t prot;
723 u_long text_size = 0, data_size = 0, total_size = 0;
724 u_long text_addr = 0, data_addr = 0;
725 u_long seg_size, seg_addr;
726 u_long addr, baddr, et_dyn_addr, entry = 0, proghdr = 0;
727 int32_t osrel = 0;
728 int error = 0, i, n, interp_name_len = 0;
729 const char *interp = NULL, *newinterp = NULL;
730 Elf_Brandinfo *brand_info;
731 char *path;
732 struct sysentvec *sv;
733
734 /*
735 * Do we have a valid ELF header ?
736 *
737 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
738 * if particular brand doesn't support it.
739 */
740 if (__elfN(check_header)(hdr) != 0 ||
741 (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
742 return (-1);
743
744 /*
745 * From here on down, we return an errno, not -1, as we've
746 * detected an ELF file.
747 */
748
749 if ((hdr->e_phoff > PAGE_SIZE) ||
750 (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
751 /* Only support headers in first page for now */
752 return (ENOEXEC);
753 }
754 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
755 if (!aligned(phdr, Elf_Addr))
756 return (ENOEXEC);
757 n = 0;
758 baddr = 0;
759 for (i = 0; i < hdr->e_phnum; i++) {
760 switch (phdr[i].p_type) {
761 case PT_LOAD:
762 if (n == 0)
763 baddr = phdr[i].p_vaddr;
764 n++;
765 break;
766 case PT_INTERP:
767 /* Path to interpreter */
768 if (phdr[i].p_filesz > MAXPATHLEN ||
769 phdr[i].p_offset >= PAGE_SIZE ||
770 phdr[i].p_offset + phdr[i].p_filesz >= PAGE_SIZE)
771 return (ENOEXEC);
772 interp = imgp->image_header + phdr[i].p_offset;
773 interp_name_len = phdr[i].p_filesz;
774 break;
775 case PT_GNU_STACK:
776 if (__elfN(nxstack))
777 imgp->stack_prot =
778 __elfN(trans_prot)(phdr[i].p_flags);
779 break;
780 }
781 }
782
783 brand_info = __elfN(get_brandinfo)(imgp, interp, interp_name_len,
784 &osrel);
785 if (brand_info == NULL) {
786 uprintf("ELF binary type \"%u\" not known.\n",
787 hdr->e_ident[EI_OSABI]);
788 return (ENOEXEC);
789 }
790 if (hdr->e_type == ET_DYN) {
791 if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0)
792 return (ENOEXEC);
793 /*
794 * Honour the base load address from the dso if it is
795 * non-zero for some reason.
796 */
797 if (baddr == 0)
798 et_dyn_addr = ET_DYN_LOAD_ADDR;
799 else
800 et_dyn_addr = 0;
801 } else
802 et_dyn_addr = 0;
803 sv = brand_info->sysvec;
804 if (interp != NULL && brand_info->interp_newpath != NULL)
805 newinterp = brand_info->interp_newpath;
806
807 /*
808 * Avoid a possible deadlock if the current address space is destroyed
809 * and that address space maps the locked vnode. In the common case,
810 * the locked vnode's v_usecount is decremented but remains greater
811 * than zero. Consequently, the vnode lock is not needed by vrele().
812 * However, in cases where the vnode lock is external, such as nullfs,
813 * v_usecount may become zero.
814 */
815 VOP_UNLOCK(imgp->vp, 0);
816
817 error = exec_new_vmspace(imgp, sv);
818 imgp->proc->p_sysent = sv;
819
820 vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
821 if (error)
822 return (error);
823
824 vmspace = imgp->proc->p_vmspace;
825
826 for (i = 0; i < hdr->e_phnum; i++) {
827 switch (phdr[i].p_type) {
828 case PT_LOAD: /* Loadable segment */
829 if (phdr[i].p_memsz == 0)
830 break;
831 prot = __elfN(trans_prot)(phdr[i].p_flags);
832
833 #if defined(__ia64__) && __ELF_WORD_SIZE == 32 && defined(IA32_ME_HARDER)
834 /*
835 * Some x86 binaries assume read == executable,
836 * notably the M3 runtime and therefore cvsup
837 */
838 if (prot & VM_PROT_READ)
839 prot |= VM_PROT_EXECUTE;
840 #endif
841
842 if ((error = __elfN(load_section)(vmspace,
843 imgp->object, phdr[i].p_offset,
844 (caddr_t)(uintptr_t)phdr[i].p_vaddr + et_dyn_addr,
845 phdr[i].p_memsz, phdr[i].p_filesz, prot,
846 sv->sv_pagesize)) != 0)
847 return (error);
848
849 /*
850 * If this segment contains the program headers,
851 * remember their virtual address for the AT_PHDR
852 * aux entry. Static binaries don't usually include
853 * a PT_PHDR entry.
854 */
855 if (phdr[i].p_offset == 0 &&
856 hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
857 <= phdr[i].p_filesz)
858 proghdr = phdr[i].p_vaddr + hdr->e_phoff +
859 et_dyn_addr;
860
861 seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr);
862 seg_size = round_page(phdr[i].p_memsz +
863 phdr[i].p_vaddr + et_dyn_addr - seg_addr);
864
865 /*
866 * Make the largest executable segment the official
867 * text segment and all others data.
868 *
869 * Note that obreak() assumes that data_addr +
870 * data_size == end of data load area, and the ELF
871 * file format expects segments to be sorted by
872 * address. If multiple data segments exist, the
873 * last one will be used.
874 */
875
876 if (phdr[i].p_flags & PF_X && text_size < seg_size) {
877 text_size = seg_size;
878 text_addr = seg_addr;
879 } else {
880 data_size = seg_size;
881 data_addr = seg_addr;
882 }
883 total_size += seg_size;
884 break;
885 case PT_PHDR: /* Program header table info */
886 proghdr = phdr[i].p_vaddr + et_dyn_addr;
887 break;
888 default:
889 break;
890 }
891 }
892
893 if (data_addr == 0 && data_size == 0) {
894 data_addr = text_addr;
895 data_size = text_size;
896 }
897
898 entry = (u_long)hdr->e_entry + et_dyn_addr;
899
900 /*
901 * Check limits. It should be safe to check the
902 * limits after loading the segments since we do
903 * not actually fault in all the segments pages.
904 */
905 PROC_LOCK(imgp->proc);
906 if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) ||
907 text_size > maxtsiz ||
908 total_size > lim_cur(imgp->proc, RLIMIT_VMEM) ||
909 racct_set(imgp->proc, RACCT_DATA, data_size) != 0 ||
910 racct_set(imgp->proc, RACCT_VMEM, total_size) != 0) {
911 PROC_UNLOCK(imgp->proc);
912 return (ENOMEM);
913 }
914
915 vmspace->vm_tsize = text_size >> PAGE_SHIFT;
916 vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
917 vmspace->vm_dsize = data_size >> PAGE_SHIFT;
918 vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
919
920 /*
921 * We load the dynamic linker where a userland call
922 * to mmap(0, ...) would put it. The rationale behind this
923 * calculation is that it leaves room for the heap to grow to
924 * its maximum allowed size.
925 */
926 addr = round_page((vm_offset_t)imgp->proc->p_vmspace->vm_daddr +
927 lim_max(imgp->proc, RLIMIT_DATA));
928 PROC_UNLOCK(imgp->proc);
929
930 imgp->entry_addr = entry;
931
932 if (interp != NULL) {
933 int have_interp = FALSE;
934 VOP_UNLOCK(imgp->vp, 0);
935 if (brand_info->emul_path != NULL &&
936 brand_info->emul_path[0] != '\0') {
937 path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
938 snprintf(path, MAXPATHLEN, "%s%s",
939 brand_info->emul_path, interp);
940 error = __elfN(load_file)(imgp->proc, path, &addr,
941 &imgp->entry_addr, sv->sv_pagesize);
942 free(path, M_TEMP);
943 if (error == 0)
944 have_interp = TRUE;
945 }
946 if (!have_interp && newinterp != NULL) {
947 error = __elfN(load_file)(imgp->proc, newinterp, &addr,
948 &imgp->entry_addr, sv->sv_pagesize);
949 if (error == 0)
950 have_interp = TRUE;
951 }
952 if (!have_interp) {
953 error = __elfN(load_file)(imgp->proc, interp, &addr,
954 &imgp->entry_addr, sv->sv_pagesize);
955 }
956 vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
957 if (error != 0) {
958 uprintf("ELF interpreter %s not found\n", interp);
959 return (error);
960 }
961 } else
962 addr = et_dyn_addr;
963
964 /*
965 * Construct auxargs table (used by the fixup routine)
966 */
967 elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
968 elf_auxargs->execfd = -1;
969 elf_auxargs->phdr = proghdr;
970 elf_auxargs->phent = hdr->e_phentsize;
971 elf_auxargs->phnum = hdr->e_phnum;
972 elf_auxargs->pagesz = PAGE_SIZE;
973 elf_auxargs->base = addr;
974 elf_auxargs->flags = 0;
975 elf_auxargs->entry = entry;
976
977 imgp->auxargs = elf_auxargs;
978 imgp->interpreted = 0;
979 imgp->reloc_base = addr;
980 imgp->proc->p_osrel = osrel;
981
982 return (error);
983 }
984
985 #define suword __CONCAT(suword, __ELF_WORD_SIZE)
986
987 int
988 __elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp)
989 {
990 Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
991 Elf_Addr *base;
992 Elf_Addr *pos;
993
994 base = (Elf_Addr *)*stack_base;
995 pos = base + (imgp->args->argc + imgp->args->envc + 2);
996
997 if (args->execfd != -1)
998 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
999 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
1000 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
1001 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
1002 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
1003 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
1004 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
1005 AUXARGS_ENTRY(pos, AT_BASE, args->base);
1006 if (imgp->execpathp != 0)
1007 AUXARGS_ENTRY(pos, AT_EXECPATH, imgp->execpathp);
1008 AUXARGS_ENTRY(pos, AT_OSRELDATE, osreldate);
1009 if (imgp->canary != 0) {
1010 AUXARGS_ENTRY(pos, AT_CANARY, imgp->canary);
1011 AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen);
1012 }
1013 AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus);
1014 if (imgp->pagesizes != 0) {
1015 AUXARGS_ENTRY(pos, AT_PAGESIZES, imgp->pagesizes);
1016 AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen);
1017 }
1018 AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj
1019 != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
1020 imgp->sysent->sv_stackprot);
1021 AUXARGS_ENTRY(pos, AT_NULL, 0);
1022
1023 free(imgp->auxargs, M_TEMP);
1024 imgp->auxargs = NULL;
1025
1026 base--;
1027 suword(base, (long)imgp->args->argc);
1028 *stack_base = (register_t *)base;
1029 return (0);
1030 }
1031
1032 /*
1033 * Code for generating ELF core dumps.
1034 */
1035
1036 typedef void (*segment_callback)(vm_map_entry_t, void *);
1037
1038 /* Closure for cb_put_phdr(). */
1039 struct phdr_closure {
1040 Elf_Phdr *phdr; /* Program header to fill in */
1041 Elf_Off offset; /* Offset of segment in core file */
1042 };
1043
1044 /* Closure for cb_size_segment(). */
1045 struct sseg_closure {
1046 int count; /* Count of writable segments. */
1047 size_t size; /* Total size of all writable segments. */
1048 };
1049
1050 static void cb_put_phdr(vm_map_entry_t, void *);
1051 static void cb_size_segment(vm_map_entry_t, void *);
1052 static void each_writable_segment(struct thread *, segment_callback, void *);
1053 static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
1054 int, void *, size_t, gzFile);
1055 static void __elfN(puthdr)(struct thread *, void *, size_t *, int);
1056 static void __elfN(putnote)(void *, size_t *, const char *, int,
1057 const void *, size_t);
1058
1059 #ifdef COMPRESS_USER_CORES
1060 extern int compress_user_cores;
1061 extern int compress_user_cores_gzlevel;
1062 #endif
1063
1064 static int
1065 core_output(struct vnode *vp, void *base, size_t len, off_t offset,
1066 struct ucred *active_cred, struct ucred *file_cred,
1067 struct thread *td, char *core_buf, gzFile gzfile) {
1068
1069 int error;
1070 if (gzfile) {
1071 #ifdef COMPRESS_USER_CORES
1072 error = compress_core(gzfile, base, core_buf, len, td);
1073 #else
1074 panic("shouldn't be here");
1075 #endif
1076 } else {
1077 error = vn_rdwr_inchunks(UIO_WRITE, vp, base, len, offset,
1078 UIO_USERSPACE, IO_UNIT | IO_DIRECT, active_cred, file_cred,
1079 NULL, td);
1080 }
1081 return (error);
1082 }
1083
1084 int
1085 __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
1086 {
1087 struct ucred *cred = td->td_ucred;
1088 int error = 0;
1089 struct sseg_closure seginfo;
1090 void *hdr;
1091 size_t hdrsize;
1092
1093 gzFile gzfile = Z_NULL;
1094 char *core_buf = NULL;
1095 #ifdef COMPRESS_USER_CORES
1096 char gzopen_flags[8];
1097 char *p;
1098 int doing_compress = flags & IMGACT_CORE_COMPRESS;
1099 #endif
1100
1101 hdr = NULL;
1102
1103 #ifdef COMPRESS_USER_CORES
1104 if (doing_compress) {
1105 p = gzopen_flags;
1106 *p++ = 'w';
1107 if (compress_user_cores_gzlevel >= 0 &&
1108 compress_user_cores_gzlevel <= 9)
1109 *p++ = '' + compress_user_cores_gzlevel;
1110 *p = 0;
1111 gzfile = gz_open("", gzopen_flags, vp);
1112 if (gzfile == Z_NULL) {
1113 error = EFAULT;
1114 goto done;
1115 }
1116 core_buf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO);
1117 if (!core_buf) {
1118 error = ENOMEM;
1119 goto done;
1120 }
1121 }
1122 #endif
1123
1124 /* Size the program segments. */
1125 seginfo.count = 0;
1126 seginfo.size = 0;
1127 each_writable_segment(td, cb_size_segment, &seginfo);
1128
1129 /*
1130 * Calculate the size of the core file header area by making
1131 * a dry run of generating it. Nothing is written, but the
1132 * size is calculated.
1133 */
1134 hdrsize = 0;
1135 __elfN(puthdr)(td, (void *)NULL, &hdrsize, seginfo.count);
1136
1137 #ifdef RACCT
1138 PROC_LOCK(td->td_proc);
1139 error = racct_add(td->td_proc, RACCT_CORE, hdrsize + seginfo.size);
1140 PROC_UNLOCK(td->td_proc);
1141 if (error != 0) {
1142 error = EFAULT;
1143 goto done;
1144 }
1145 #endif
1146 if (hdrsize + seginfo.size >= limit) {
1147 error = EFAULT;
1148 goto done;
1149 }
1150
1151 /*
1152 * Allocate memory for building the header, fill it up,
1153 * and write it out.
1154 */
1155 hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
1156 if (hdr == NULL) {
1157 error = EINVAL;
1158 goto done;
1159 }
1160 error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize,
1161 gzfile);
1162
1163 /* Write the contents of all of the writable segments. */
1164 if (error == 0) {
1165 Elf_Phdr *php;
1166 off_t offset;
1167 int i;
1168
1169 php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
1170 offset = hdrsize;
1171 for (i = 0; i < seginfo.count; i++) {
1172 error = core_output(vp, (caddr_t)(uintptr_t)php->p_vaddr,
1173 php->p_filesz, offset, cred, NOCRED, curthread, core_buf, gzfile);
1174 if (error != 0)
1175 break;
1176 offset += php->p_filesz;
1177 php++;
1178 }
1179 }
1180 if (error) {
1181 log(LOG_WARNING,
1182 "Failed to write core file for process %s (error %d)\n",
1183 curproc->p_comm, error);
1184 }
1185
1186 done:
1187 #ifdef COMPRESS_USER_CORES
1188 if (core_buf)
1189 free(core_buf, M_TEMP);
1190 if (gzfile)
1191 gzclose(gzfile);
1192 #endif
1193
1194 free(hdr, M_TEMP);
1195
1196 return (error);
1197 }
1198
1199 /*
1200 * A callback for each_writable_segment() to write out the segment's
1201 * program header entry.
1202 */
1203 static void
1204 cb_put_phdr(entry, closure)
1205 vm_map_entry_t entry;
1206 void *closure;
1207 {
1208 struct phdr_closure *phc = (struct phdr_closure *)closure;
1209 Elf_Phdr *phdr = phc->phdr;
1210
1211 phc->offset = round_page(phc->offset);
1212
1213 phdr->p_type = PT_LOAD;
1214 phdr->p_offset = phc->offset;
1215 phdr->p_vaddr = entry->start;
1216 phdr->p_paddr = 0;
1217 phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
1218 phdr->p_align = PAGE_SIZE;
1219 phdr->p_flags = __elfN(untrans_prot)(entry->protection);
1220
1221 phc->offset += phdr->p_filesz;
1222 phc->phdr++;
1223 }
1224
1225 /*
1226 * A callback for each_writable_segment() to gather information about
1227 * the number of segments and their total size.
1228 */
1229 static void
1230 cb_size_segment(entry, closure)
1231 vm_map_entry_t entry;
1232 void *closure;
1233 {
1234 struct sseg_closure *ssc = (struct sseg_closure *)closure;
1235
1236 ssc->count++;
1237 ssc->size += entry->end - entry->start;
1238 }
1239
1240 /*
1241 * For each writable segment in the process's memory map, call the given
1242 * function with a pointer to the map entry and some arbitrary
1243 * caller-supplied data.
1244 */
1245 static void
1246 each_writable_segment(td, func, closure)
1247 struct thread *td;
1248 segment_callback func;
1249 void *closure;
1250 {
1251 struct proc *p = td->td_proc;
1252 vm_map_t map = &p->p_vmspace->vm_map;
1253 vm_map_entry_t entry;
1254 vm_object_t backing_object, object;
1255 boolean_t ignore_entry;
1256
1257 vm_map_lock_read(map);
1258 for (entry = map->header.next; entry != &map->header;
1259 entry = entry->next) {
1260 /*
1261 * Don't dump inaccessible mappings, deal with legacy
1262 * coredump mode.
1263 *
1264 * Note that read-only segments related to the elf binary
1265 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
1266 * need to arbitrarily ignore such segments.
1267 */
1268 if (elf_legacy_coredump) {
1269 if ((entry->protection & VM_PROT_RW) != VM_PROT_RW)
1270 continue;
1271 } else {
1272 if ((entry->protection & VM_PROT_ALL) == 0)
1273 continue;
1274 }
1275
1276 /*
1277 * Dont include memory segment in the coredump if
1278 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1279 * madvise(2). Do not dump submaps (i.e. parts of the
1280 * kernel map).
1281 */
1282 if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP))
1283 continue;
1284
1285 if ((object = entry->object.vm_object) == NULL)
1286 continue;
1287
1288 /* Ignore memory-mapped devices and such things. */
1289 VM_OBJECT_LOCK(object);
1290 while ((backing_object = object->backing_object) != NULL) {
1291 VM_OBJECT_LOCK(backing_object);
1292 VM_OBJECT_UNLOCK(object);
1293 object = backing_object;
1294 }
1295 ignore_entry = object->type != OBJT_DEFAULT &&
1296 object->type != OBJT_SWAP && object->type != OBJT_VNODE;
1297 VM_OBJECT_UNLOCK(object);
1298 if (ignore_entry)
1299 continue;
1300
1301 (*func)(entry, closure);
1302 }
1303 vm_map_unlock_read(map);
1304 }
1305
1306 /*
1307 * Write the core file header to the file, including padding up to
1308 * the page boundary.
1309 */
1310 static int
1311 __elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize, gzfile)
1312 struct thread *td;
1313 struct vnode *vp;
1314 struct ucred *cred;
1315 int numsegs;
1316 size_t hdrsize;
1317 void *hdr;
1318 gzFile gzfile;
1319 {
1320 size_t off;
1321
1322 /* Fill in the header. */
1323 bzero(hdr, hdrsize);
1324 off = 0;
1325 __elfN(puthdr)(td, hdr, &off, numsegs);
1326
1327 if (!gzfile) {
1328 /* Write it to the core file. */
1329 return (vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
1330 UIO_SYSSPACE, IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
1331 td));
1332 } else {
1333 #ifdef COMPRESS_USER_CORES
1334 if (gzwrite(gzfile, hdr, hdrsize) != hdrsize) {
1335 log(LOG_WARNING,
1336 "Failed to compress core file header for process"
1337 " %s.\n", curproc->p_comm);
1338 return (EFAULT);
1339 }
1340 else {
1341 return (0);
1342 }
1343 #else
1344 panic("shouldn't be here");
1345 #endif
1346 }
1347 }
1348
1349 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1350 #include <compat/freebsd32/freebsd32.h>
1351
1352 typedef struct prstatus32 elf_prstatus_t;
1353 typedef struct prpsinfo32 elf_prpsinfo_t;
1354 typedef struct fpreg32 elf_prfpregset_t;
1355 typedef struct fpreg32 elf_fpregset_t;
1356 typedef struct reg32 elf_gregset_t;
1357 typedef struct thrmisc32 elf_thrmisc_t;
1358 #else
1359 typedef prstatus_t elf_prstatus_t;
1360 typedef prpsinfo_t elf_prpsinfo_t;
1361 typedef prfpregset_t elf_prfpregset_t;
1362 typedef prfpregset_t elf_fpregset_t;
1363 typedef gregset_t elf_gregset_t;
1364 typedef thrmisc_t elf_thrmisc_t;
1365 #endif
1366
1367 static void
1368 __elfN(puthdr)(struct thread *td, void *dst, size_t *off, int numsegs)
1369 {
1370 struct {
1371 elf_prstatus_t status;
1372 elf_prfpregset_t fpregset;
1373 elf_prpsinfo_t psinfo;
1374 elf_thrmisc_t thrmisc;
1375 } *tempdata;
1376 elf_prstatus_t *status;
1377 elf_prfpregset_t *fpregset;
1378 elf_prpsinfo_t *psinfo;
1379 elf_thrmisc_t *thrmisc;
1380 struct proc *p;
1381 struct thread *thr;
1382 size_t ehoff, noteoff, notesz, phoff;
1383
1384 p = td->td_proc;
1385
1386 ehoff = *off;
1387 *off += sizeof(Elf_Ehdr);
1388
1389 phoff = *off;
1390 *off += (numsegs + 1) * sizeof(Elf_Phdr);
1391
1392 noteoff = *off;
1393 /*
1394 * Don't allocate space for the notes if we're just calculating
1395 * the size of the header. We also don't collect the data.
1396 */
1397 if (dst != NULL) {
1398 tempdata = malloc(sizeof(*tempdata), M_TEMP, M_ZERO|M_WAITOK);
1399 status = &tempdata->status;
1400 fpregset = &tempdata->fpregset;
1401 psinfo = &tempdata->psinfo;
1402 thrmisc = &tempdata->thrmisc;
1403 } else {
1404 tempdata = NULL;
1405 status = NULL;
1406 fpregset = NULL;
1407 psinfo = NULL;
1408 thrmisc = NULL;
1409 }
1410
1411 if (dst != NULL) {
1412 psinfo->pr_version = PRPSINFO_VERSION;
1413 psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
1414 strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
1415 /*
1416 * XXX - We don't fill in the command line arguments properly
1417 * yet.
1418 */
1419 strlcpy(psinfo->pr_psargs, p->p_comm,
1420 sizeof(psinfo->pr_psargs));
1421 }
1422 __elfN(putnote)(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
1423 sizeof *psinfo);
1424
1425 /*
1426 * To have the debugger select the right thread (LWP) as the initial
1427 * thread, we dump the state of the thread passed to us in td first.
1428 * This is the thread that causes the core dump and thus likely to
1429 * be the right thread one wants to have selected in the debugger.
1430 */
1431 thr = td;
1432 while (thr != NULL) {
1433 if (dst != NULL) {
1434 status->pr_version = PRSTATUS_VERSION;
1435 status->pr_statussz = sizeof(elf_prstatus_t);
1436 status->pr_gregsetsz = sizeof(elf_gregset_t);
1437 status->pr_fpregsetsz = sizeof(elf_fpregset_t);
1438 status->pr_osreldate = osreldate;
1439 status->pr_cursig = p->p_sig;
1440 status->pr_pid = thr->td_tid;
1441 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1442 fill_regs32(thr, &status->pr_reg);
1443 fill_fpregs32(thr, fpregset);
1444 #else
1445 fill_regs(thr, &status->pr_reg);
1446 fill_fpregs(thr, fpregset);
1447 #endif
1448 memset(&thrmisc->_pad, 0, sizeof (thrmisc->_pad));
1449 strcpy(thrmisc->pr_tname, thr->td_name);
1450 }
1451 __elfN(putnote)(dst, off, "FreeBSD", NT_PRSTATUS, status,
1452 sizeof *status);
1453 __elfN(putnote)(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
1454 sizeof *fpregset);
1455 __elfN(putnote)(dst, off, "FreeBSD", NT_THRMISC, thrmisc,
1456 sizeof *thrmisc);
1457 /*
1458 * Allow for MD specific notes, as well as any MD
1459 * specific preparations for writing MI notes.
1460 */
1461 __elfN(dump_thread)(thr, dst, off);
1462
1463 thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) :
1464 TAILQ_NEXT(thr, td_plist);
1465 if (thr == td)
1466 thr = TAILQ_NEXT(thr, td_plist);
1467 }
1468
1469 notesz = *off - noteoff;
1470
1471 if (dst != NULL)
1472 free(tempdata, M_TEMP);
1473
1474 /* Align up to a page boundary for the program segments. */
1475 *off = round_page(*off);
1476
1477 if (dst != NULL) {
1478 Elf_Ehdr *ehdr;
1479 Elf_Phdr *phdr;
1480 struct phdr_closure phc;
1481
1482 /*
1483 * Fill in the ELF header.
1484 */
1485 ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
1486 ehdr->e_ident[EI_MAG0] = ELFMAG0;
1487 ehdr->e_ident[EI_MAG1] = ELFMAG1;
1488 ehdr->e_ident[EI_MAG2] = ELFMAG2;
1489 ehdr->e_ident[EI_MAG3] = ELFMAG3;
1490 ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1491 ehdr->e_ident[EI_DATA] = ELF_DATA;
1492 ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1493 ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
1494 ehdr->e_ident[EI_ABIVERSION] = 0;
1495 ehdr->e_ident[EI_PAD] = 0;
1496 ehdr->e_type = ET_CORE;
1497 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1498 ehdr->e_machine = ELF_ARCH32;
1499 #else
1500 ehdr->e_machine = ELF_ARCH;
1501 #endif
1502 ehdr->e_version = EV_CURRENT;
1503 ehdr->e_entry = 0;
1504 ehdr->e_phoff = phoff;
1505 ehdr->e_flags = 0;
1506 ehdr->e_ehsize = sizeof(Elf_Ehdr);
1507 ehdr->e_phentsize = sizeof(Elf_Phdr);
1508 ehdr->e_phnum = numsegs + 1;
1509 ehdr->e_shentsize = sizeof(Elf_Shdr);
1510 ehdr->e_shnum = 0;
1511 ehdr->e_shstrndx = SHN_UNDEF;
1512
1513 /*
1514 * Fill in the program header entries.
1515 */
1516 phdr = (Elf_Phdr *)((char *)dst + phoff);
1517
1518 /* The note segement. */
1519 phdr->p_type = PT_NOTE;
1520 phdr->p_offset = noteoff;
1521 phdr->p_vaddr = 0;
1522 phdr->p_paddr = 0;
1523 phdr->p_filesz = notesz;
1524 phdr->p_memsz = 0;
1525 phdr->p_flags = 0;
1526 phdr->p_align = 0;
1527 phdr++;
1528
1529 /* All the writable segments from the program. */
1530 phc.phdr = phdr;
1531 phc.offset = *off;
1532 each_writable_segment(td, cb_put_phdr, &phc);
1533 }
1534 }
1535
1536 static void
1537 __elfN(putnote)(void *dst, size_t *off, const char *name, int type,
1538 const void *desc, size_t descsz)
1539 {
1540 Elf_Note note;
1541
1542 note.n_namesz = strlen(name) + 1;
1543 note.n_descsz = descsz;
1544 note.n_type = type;
1545 if (dst != NULL)
1546 bcopy(¬e, (char *)dst + *off, sizeof note);
1547 *off += sizeof note;
1548 if (dst != NULL)
1549 bcopy(name, (char *)dst + *off, note.n_namesz);
1550 *off += roundup2(note.n_namesz, sizeof(Elf_Size));
1551 if (dst != NULL)
1552 bcopy(desc, (char *)dst + *off, note.n_descsz);
1553 *off += roundup2(note.n_descsz, sizeof(Elf_Size));
1554 }
1555
1556 static boolean_t
1557 __elfN(parse_notes)(struct image_params *imgp, Elf_Brandnote *checknote,
1558 int32_t *osrel, const Elf_Phdr *pnote)
1559 {
1560 const Elf_Note *note, *note0, *note_end;
1561 const char *note_name;
1562 int i;
1563
1564 if (pnote == NULL || pnote->p_offset >= PAGE_SIZE ||
1565 pnote->p_filesz > PAGE_SIZE ||
1566 pnote->p_offset + pnote->p_filesz >= PAGE_SIZE)
1567 return (FALSE);
1568
1569 note = note0 = (const Elf_Note *)(imgp->image_header + pnote->p_offset);
1570 note_end = (const Elf_Note *)(imgp->image_header +
1571 pnote->p_offset + pnote->p_filesz);
1572 for (i = 0; i < 100 && note >= note0 && note < note_end; i++) {
1573 if (!aligned(note, Elf32_Addr) || (const char *)note_end -
1574 (const char *)note < sizeof(Elf_Note))
1575 return (FALSE);
1576 if (note->n_namesz != checknote->hdr.n_namesz ||
1577 note->n_descsz != checknote->hdr.n_descsz ||
1578 note->n_type != checknote->hdr.n_type)
1579 goto nextnote;
1580 note_name = (const char *)(note + 1);
1581 if (note_name + checknote->hdr.n_namesz >=
1582 (const char *)note_end || strncmp(checknote->vendor,
1583 note_name, checknote->hdr.n_namesz) != 0)
1584 goto nextnote;
1585
1586 /*
1587 * Fetch the osreldate for binary
1588 * from the ELF OSABI-note if necessary.
1589 */
1590 if ((checknote->flags & BN_TRANSLATE_OSREL) != 0 &&
1591 checknote->trans_osrel != NULL)
1592 return (checknote->trans_osrel(note, osrel));
1593 return (TRUE);
1594
1595 nextnote:
1596 note = (const Elf_Note *)((const char *)(note + 1) +
1597 roundup2(note->n_namesz, sizeof(Elf32_Addr)) +
1598 roundup2(note->n_descsz, sizeof(Elf32_Addr)));
1599 }
1600
1601 return (FALSE);
1602 }
1603
1604 /*
1605 * Try to find the appropriate ABI-note section for checknote,
1606 * fetch the osreldate for binary from the ELF OSABI-note. Only the
1607 * first page of the image is searched, the same as for headers.
1608 */
1609 static boolean_t
1610 __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote,
1611 int32_t *osrel)
1612 {
1613 const Elf_Phdr *phdr;
1614 const Elf_Ehdr *hdr;
1615 int i;
1616
1617 hdr = (const Elf_Ehdr *)imgp->image_header;
1618 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
1619
1620 for (i = 0; i < hdr->e_phnum; i++) {
1621 if (phdr[i].p_type == PT_NOTE &&
1622 __elfN(parse_notes)(imgp, checknote, osrel, &phdr[i]))
1623 return (TRUE);
1624 }
1625 return (FALSE);
1626
1627 }
1628
1629 /*
1630 * Tell kern_execve.c about it, with a little help from the linker.
1631 */
1632 static struct execsw __elfN(execsw) = {
1633 __CONCAT(exec_, __elfN(imgact)),
1634 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
1635 };
1636 EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
1637
1638 #ifdef COMPRESS_USER_CORES
1639 /*
1640 * Compress and write out a core segment for a user process.
1641 *
1642 * 'inbuf' is the starting address of a VM segment in the process' address
1643 * space that is to be compressed and written out to the core file. 'dest_buf'
1644 * is a buffer in the kernel's address space. The segment is copied from
1645 * 'inbuf' to 'dest_buf' first before being processed by the compression
1646 * routine gzwrite(). This copying is necessary because the content of the VM
1647 * segment may change between the compression pass and the crc-computation pass
1648 * in gzwrite(). This is because realtime threads may preempt the UNIX kernel.
1649 */
1650 static int
1651 compress_core (gzFile file, char *inbuf, char *dest_buf, unsigned int len,
1652 struct thread *td)
1653 {
1654 int len_compressed;
1655 int error = 0;
1656 unsigned int chunk_len;
1657
1658 while (len) {
1659 chunk_len = (len > CORE_BUF_SIZE) ? CORE_BUF_SIZE : len;
1660 copyin(inbuf, dest_buf, chunk_len);
1661 len_compressed = gzwrite(file, dest_buf, chunk_len);
1662
1663 EVENTHANDLER_INVOKE(app_coredump_progress, td, len_compressed);
1664
1665 if ((unsigned int)len_compressed != chunk_len) {
1666 log(LOG_WARNING,
1667 "compress_core: length mismatch (0x%x returned, "
1668 "0x%x expected)\n", len_compressed, chunk_len);
1669 EVENTHANDLER_INVOKE(app_coredump_error, td,
1670 "compress_core: length mismatch %x -> %x",
1671 chunk_len, len_compressed);
1672 error = EFAULT;
1673 break;
1674 }
1675 inbuf += chunk_len;
1676 len -= chunk_len;
1677 maybe_yield();
1678 }
1679
1680 return (error);
1681 }
1682 #endif /* COMPRESS_USER_CORES */
1683
1684 static vm_prot_t
1685 __elfN(trans_prot)(Elf_Word flags)
1686 {
1687 vm_prot_t prot;
1688
1689 prot = 0;
1690 if (flags & PF_X)
1691 prot |= VM_PROT_EXECUTE;
1692 if (flags & PF_W)
1693 prot |= VM_PROT_WRITE;
1694 if (flags & PF_R)
1695 prot |= VM_PROT_READ;
1696 #if __ELF_WORD_SIZE == 32
1697 #if defined(__amd64__) || defined(__ia64__)
1698 if (i386_read_exec && (flags & PF_R))
1699 prot |= VM_PROT_EXECUTE;
1700 #endif
1701 #endif
1702 return (prot);
1703 }
1704
1705 static Elf_Word
1706 __elfN(untrans_prot)(vm_prot_t prot)
1707 {
1708 Elf_Word flags;
1709
1710 flags = 0;
1711 if (prot & VM_PROT_EXECUTE)
1712 flags |= PF_X;
1713 if (prot & VM_PROT_READ)
1714 flags |= PF_R;
1715 if (prot & VM_PROT_WRITE)
1716 flags |= PF_W;
1717 return (flags);
1718 }
Cache object: aefd2ec82ffaf3ddfe2bb5675d4e61a6
|