1 /*-
2 * Copyright (c) 2005 Olivier Houchard. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25 #include <sys/cdefs.h>
26 __FBSDID("$FreeBSD: releng/6.2/sys/arm/arm/elf_trampoline.c 159904 2006-06-23 23:29:01Z cognet $");
27 #include <machine/asm.h>
28 #include <sys/types.h>
29 #include <sys/elf32.h>
30 #include <sys/param.h>
31 #include <sys/inflate.h>
32 #include <machine/elf.h>
33 #include <machine/pte.h>
34 #include <machine/cpufunc.h>
35 #include <machine/armreg.h>
36
37 #include <stdlib.h>
38
39 #include "opt_global.h"
40 #include "opt_kernname.h"
41
42 extern char kernel_start[];
43 extern char kernel_end[];
44
45 extern void *_end;
46
47 void __start(void);
48
49 #define GZ_HEAD 0xa
50
51 #ifdef CPU_ARM7TDMI
52 #define cpu_idcache_wbinv_all arm7tdmi_cache_flushID
53 #elif defined(CPU_ARM8)
54 #define cpu_idcache_wbinv_all arm8_cache_purgeID
55 #elif defined(CPU_ARM9)
56 #define cpu_idcache_wbinv_all arm9_idcache_wbinv_all
57 #elif defined(CPU_ARM10)
58 #define cpu_idcache_wbinv_all arm10_idcache_wbinv_all
59 #elif defined(CPU_SA110) || defined(CPU_SA1110) || defined(CPU_SA1100) || \
60 defined(CPU_IXP12X0)
61 #define cpu_idcache_wbinv_all sa1_cache_purgeID
62 #elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
63 defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425)
64 #define cpu_idcache_wbinv_all xscale_cache_purgeID
65 #endif
66
67
68 int arm_picache_size;
69 int arm_picache_line_size;
70 int arm_picache_ways;
71
72 int arm_pdcache_size; /* and unified */
73 int arm_pdcache_line_size = 32;
74 int arm_pdcache_ways;
75
76 int arm_pcache_type;
77 int arm_pcache_unified;
78
79 int arm_dcache_align;
80 int arm_dcache_align_mask;
81
82 /* Additional cache information local to this file. Log2 of some of the
83 above numbers. */
84 static int arm_dcache_l2_nsets;
85 static int arm_dcache_l2_assoc;
86 static int arm_dcache_l2_linesize;
87
88
89 int block_userspace_access = 0;
90 extern int arm9_dcache_sets_inc;
91 extern int arm9_dcache_sets_max;
92 extern int arm9_dcache_index_max;
93 extern int arm9_dcache_index_inc;
94
95 static __inline void *
96 memcpy(void *dst, const void *src, int len)
97 {
98 const char *s = src;
99 char *d = dst;
100
101 while (len) {
102 if (len >= 4 && !((vm_offset_t)d & 3) &&
103 !((vm_offset_t)s & 3)) {
104 *(uint32_t *)d = *(uint32_t *)s;
105 s += 4;
106 d += 4;
107 len -= 4;
108 } else {
109 *d++ = *s++;
110 len--;
111 }
112 }
113 return (dst);
114 }
115
116 static __inline void
117 bzero(void *addr, int count)
118 {
119 char *tmp = (char *)addr;
120
121 while (count > 0) {
122 if (count >= 4 && !((vm_offset_t)tmp & 3)) {
123 *(uint32_t *)tmp = 0;
124 tmp += 4;
125 count -= 4;
126 } else {
127 *tmp = 0;
128 tmp++;
129 count--;
130 }
131 }
132 }
133
134 static void arm9_setup(void);
135
136 void
137 _start(void)
138 {
139 int physaddr = KERNPHYSADDR;
140 int tmp1;
141 unsigned int sp = ((unsigned int)&_end & ~3) + 4;
142 #ifdef KZIP
143 sp += KERNSIZE + 0x100;
144 sp &= ~(L1_TABLE_SIZE - 1);
145 sp += 2 * L1_TABLE_SIZE;
146 #endif
147 sp += 1024 * 1024; /* Should be enough for a stack */
148
149 __asm __volatile("adr %0, 2f\n"
150 "bic %0, %0, #0xff000000\n"
151 "and %1, %1, #0xff000000\n"
152 "orr %0, %0, %1\n"
153 "mrc p15, 0, %1, c1, c0, 0\n"
154 "bic %1, %1, #1\n" /* Disable MMU */
155 "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
156 WBUF enable */
157 "orr %1, %1, #0x1000\n" /* Add IC enable */
158 "orr %1, %1, #(0x800)\n" /* BPRD enable */
159
160 "mcr p15, 0, %1, c1, c0, 0\n"
161 "nop\n"
162 "nop\n"
163 "nop\n"
164 "mov pc, %0\n"
165 "2: nop\n"
166 "mov sp, %2\n"
167 : "=r" (tmp1), "+r" (physaddr), "+r" (sp));
168 #ifndef KZIP
169 #ifdef CPU_ARM9
170 /* So that idcache_wbinv works; */
171 if ((cpufunc_id() & 0x0000f000) == 0x00009000)
172 arm9_setup();
173 #endif
174 cpu_idcache_wbinv_all();
175 #endif
176 __start();
177 }
178
179 static void
180 get_cachetype_cp15()
181 {
182 u_int ctype, isize, dsize;
183 u_int multiplier;
184
185 __asm __volatile("mrc p15, 0, %0, c0, c0, 1"
186 : "=r" (ctype));
187
188 /*
189 * ...and thus spake the ARM ARM:
190 *
191 * If an <opcode2> value corresponding to an unimplemented or
192 * reserved ID register is encountered, the System Control
193 * processor returns the value of the main ID register.
194 */
195 if (ctype == cpufunc_id())
196 goto out;
197
198 if ((ctype & CPU_CT_S) == 0)
199 arm_pcache_unified = 1;
200
201 /*
202 * If you want to know how this code works, go read the ARM ARM.
203 */
204
205 arm_pcache_type = CPU_CT_CTYPE(ctype);
206 if (arm_pcache_unified == 0) {
207 isize = CPU_CT_ISIZE(ctype);
208 multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2;
209 arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3);
210 if (CPU_CT_xSIZE_ASSOC(isize) == 0) {
211 if (isize & CPU_CT_xSIZE_M)
212 arm_picache_line_size = 0; /* not present */
213 else
214 arm_picache_ways = 1;
215 } else {
216 arm_picache_ways = multiplier <<
217 (CPU_CT_xSIZE_ASSOC(isize) - 1);
218 }
219 arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8);
220 }
221
222 dsize = CPU_CT_DSIZE(ctype);
223 multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2;
224 arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3);
225 if (CPU_CT_xSIZE_ASSOC(dsize) == 0) {
226 if (dsize & CPU_CT_xSIZE_M)
227 arm_pdcache_line_size = 0; /* not present */
228 else
229 arm_pdcache_ways = 1;
230 } else {
231 arm_pdcache_ways = multiplier <<
232 (CPU_CT_xSIZE_ASSOC(dsize) - 1);
233 }
234 arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8);
235
236 arm_dcache_align = arm_pdcache_line_size;
237
238 arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2;
239 arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3;
240 arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) -
241 CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize);
242 out:
243 arm_dcache_align_mask = arm_dcache_align - 1;
244 }
245
246 static void
247 arm9_setup(void)
248 {
249
250 get_cachetype_cp15();
251 arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize;
252 arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize +
253 arm_dcache_l2_nsets)) - arm9_dcache_sets_inc;
254 arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc);
255 arm9_dcache_index_max = 0U - arm9_dcache_index_inc;
256 }
257
258
259 #ifdef KZIP
260 static unsigned char *orig_input, *i_input, *i_output;
261
262
263 static u_int memcnt; /* Memory allocated: blocks */
264 static size_t memtot; /* Memory allocated: bytes */
265 /*
266 * Library functions required by inflate().
267 */
268
269 #define MEMSIZ 0x8000
270
271 /*
272 * Allocate memory block.
273 */
274 unsigned char *
275 kzipmalloc(int size)
276 {
277 void *ptr;
278 static u_char mem[MEMSIZ];
279
280 if (memtot + size > MEMSIZ)
281 return NULL;
282 ptr = mem + memtot;
283 memtot += size;
284 memcnt++;
285 return ptr;
286 }
287
288 /*
289 * Free allocated memory block.
290 */
291 void
292 kzipfree(void *ptr)
293 {
294 memcnt--;
295 if (!memcnt)
296 memtot = 0;
297 }
298
299 void
300 putstr(char *dummy)
301 {
302 }
303
304 static int
305 input(void *dummy)
306 {
307 if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
308 return (GZ_EOF);
309 }
310 return *i_input++;
311 }
312
313 static int
314 output(void *dummy, unsigned char *ptr, unsigned long len)
315 {
316
317
318 memcpy(i_output, ptr, len);
319 i_output += len;
320 return (0);
321 }
322
323 static void *
324 inflate_kernel(void *kernel, void *startaddr)
325 {
326 struct inflate infl;
327 char slide[GZ_WSIZE];
328
329 orig_input = kernel;
330 memcnt = memtot = 0;
331 i_input = (char *)kernel + GZ_HEAD;
332 if (((char *)kernel)[3] & 0x18) {
333 while (*i_input)
334 i_input++;
335 i_input++;
336 }
337 i_output = startaddr;
338 bzero(&infl, sizeof(infl));
339 infl.gz_input = input;
340 infl.gz_output = output;
341 infl.gz_slide = slide;
342 inflate(&infl);
343 return ((char *)(((vm_offset_t)i_output & ~3) + 4));
344 }
345
346 #endif
347
348 void *
349 load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
350 int d)
351 {
352 Elf32_Ehdr *eh;
353 Elf32_Phdr phdr[64] /* XXX */, *php;
354 Elf32_Shdr shdr[64] /* XXX */;
355 int i,j;
356 void *entry_point;
357 int symtabindex = -1;
358 int symstrindex = -1;
359 vm_offset_t lastaddr = 0;
360 Elf_Addr ssym = 0, esym = 0;
361 Elf_Dyn *dp;
362
363 eh = (Elf32_Ehdr *)kstart;
364 ssym = esym = 0;
365 entry_point = (void*)eh->e_entry;
366 memcpy(phdr, (void *)(kstart + eh->e_phoff ),
367 eh->e_phnum * sizeof(phdr[0]));
368
369 /* Determine lastaddr. */
370 for (i = 0; i < eh->e_phnum; i++) {
371 if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
372 + phdr[i].p_memsz))
373 lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
374 curaddr + phdr[i].p_memsz;
375 }
376
377 /* Save the symbol tables, as there're about to be scratched. */
378 memcpy(shdr, (void *)(kstart + eh->e_shoff),
379 sizeof(*shdr) * eh->e_shnum);
380 if (eh->e_shnum * eh->e_shentsize != 0 &&
381 eh->e_shoff != 0) {
382 for (i = 0; i < eh->e_shnum; i++) {
383 if (shdr[i].sh_type == SHT_SYMTAB) {
384 for (j = 0; j < eh->e_phnum; j++) {
385 if (phdr[j].p_type == PT_LOAD &&
386 shdr[i].sh_offset >=
387 phdr[j].p_offset &&
388 (shdr[i].sh_offset +
389 shdr[i].sh_size <=
390 phdr[j].p_offset +
391 phdr[j].p_filesz)) {
392 shdr[i].sh_offset = 0;
393 shdr[i].sh_size = 0;
394 j = eh->e_phnum;
395 }
396 }
397 if (shdr[i].sh_offset != 0 &&
398 shdr[i].sh_size != 0) {
399 symtabindex = i;
400 symstrindex = shdr[i].sh_link;
401 }
402 }
403 }
404 func_end = roundup(func_end, sizeof(long));
405 if (symtabindex >= 0 && symstrindex >= 0) {
406 ssym = lastaddr;
407 if (d) {
408 memcpy((void *)func_end, (void *)(
409 shdr[symtabindex].sh_offset + kstart),
410 shdr[symtabindex].sh_size);
411 memcpy((void *)(func_end +
412 shdr[symtabindex].sh_size),
413 (void *)(shdr[symstrindex].sh_offset +
414 kstart), shdr[symstrindex].sh_size);
415 } else {
416 lastaddr += shdr[symtabindex].sh_size;
417 lastaddr = roundup(lastaddr,
418 sizeof(shdr[symtabindex].sh_size));
419 lastaddr += sizeof(shdr[symstrindex].sh_size);
420 lastaddr += shdr[symstrindex].sh_size;
421 lastaddr = roundup(lastaddr,
422 sizeof(shdr[symstrindex].sh_size));
423 }
424
425 }
426 }
427 if (!d)
428 return ((void *)lastaddr);
429
430 j = eh->e_phnum;
431 for (i = 0; i < j; i++) {
432 volatile char c;
433
434 if (phdr[i].p_type != PT_LOAD)
435 continue;
436 memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
437 (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
438 /* Clean space from oversized segments, eg: bss. */
439 if (phdr[i].p_filesz < phdr[i].p_memsz)
440 bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
441 curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
442 phdr[i].p_filesz);
443 }
444 /* Now grab the symbol tables. */
445 if (symtabindex >= 0 && symstrindex >= 0) {
446 *(Elf_Size *)lastaddr =
447 shdr[symtabindex].sh_size;
448 lastaddr += sizeof(shdr[symtabindex].sh_size);
449 memcpy((void*)lastaddr,
450 (void *)func_end,
451 shdr[symtabindex].sh_size);
452 lastaddr += shdr[symtabindex].sh_size;
453 lastaddr = roundup(lastaddr,
454 sizeof(shdr[symtabindex].sh_size));
455 *(Elf_Size *)lastaddr =
456 shdr[symstrindex].sh_size;
457 lastaddr += sizeof(shdr[symstrindex].sh_size);
458 memcpy((void*)lastaddr,
459 (void*)(func_end +
460 shdr[symtabindex].sh_size),
461 shdr[symstrindex].sh_size);
462 lastaddr += shdr[symstrindex].sh_size;
463 lastaddr = roundup(lastaddr,
464 sizeof(shdr[symstrindex].sh_size));
465 *(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
466 *((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
467 *((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
468 } else
469 *(Elf_Addr *)curaddr = 0;
470 /* Invalidate the instruction cache. */
471 __asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
472 "mcr p15, 0, %0, c7, c10, 4\n"
473 : : "r" (curaddr));
474 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
475 "bic %0, %0, #1\n" /* MMU_ENABLE */
476 "mcr p15, 0, %0, c1, c0, 0\n"
477 : "=r" (ssym));
478 /* Jump to the entry point. */
479 ((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
480 __asm __volatile(".globl func_end\n"
481 "func_end:");
482
483 }
484
485 extern char func_end[];
486
487
488 #define PMAP_DOMAIN_KERNEL 15 /*
489 * Just define it instead of including the
490 * whole VM headers set.
491 */
492 int __hack;
493 static __inline void
494 setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
495 int write_back)
496 {
497 unsigned int *pd = (unsigned int *)pt_addr;
498 vm_paddr_t addr;
499 int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
500 int tmp;
501
502 bzero(pd, L1_TABLE_SIZE);
503 for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
504 pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
505 L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
506 if (write_back)
507 pd[addr >> L1_S_SHIFT] |= L1_S_B;
508 }
509 /* XXX: See below */
510 if (0xfff00000 < physstart || 0xfff00000 > physend)
511 pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
512 L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
513 __asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
514 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
515 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
516 "mrc p15, 0, %0, c1, c0, 0\n"
517 "orr %0, %0, #1\n" /* MMU_ENABLE */
518 "mcr p15, 0, %0, c1, c0, 0\n"
519 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
520 "mov r0, r0\n"
521 "sub pc, pc, #4\n" :
522 "=r" (tmp) : "r" (pd), "r" (domain));
523
524 /*
525 * XXX: This is the most stupid workaround I've ever wrote.
526 * For some reason, the KB9202 won't boot the kernel unless
527 * we access an address which is not in the
528 * 0x20000000 - 0x20ffffff range. I hope I'll understand
529 * what's going on later.
530 */
531 __hack = *(volatile int *)0xfffff21c;
532 }
533
534 void
535 __start(void)
536 {
537 void *curaddr;
538 void *dst, *altdst;
539 char *kernel = (char *)&kernel_start;
540 int sp;
541 int pt_addr;
542
543 __asm __volatile("mov %0, pc" :
544 "=r" (curaddr));
545 curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
546 #ifdef KZIP
547 if (*kernel == 0x1f && kernel[1] == 0x8b) {
548 pt_addr = (((int)&_end + KERNSIZE + 0x100) &
549 ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
550
551 #ifdef CPU_ARM9
552 /* So that idcache_wbinv works; */
553 if ((cpufunc_id() & 0x0000f000) == 0x00009000)
554 arm9_setup();
555 #endif
556 setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
557 (vm_paddr_t)curaddr + 0x10000000, 1);
558 /* Gzipped kernel */
559 dst = inflate_kernel(kernel, &_end);
560 kernel = (char *)&_end;
561 altdst = 4 + load_kernel((unsigned int)kernel,
562 (unsigned int)curaddr,
563 (unsigned int)&func_end , 0);
564 if (altdst > dst)
565 dst = altdst;
566 cpu_idcache_wbinv_all();
567 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
568 "bic %0, %0, #1\n" /* MMU_ENABLE */
569 "mcr p15, 0, %0, c1, c0, 0\n"
570 : "=r" (pt_addr));
571 } else
572 #endif
573 dst = 4 + load_kernel((unsigned int)&kernel_start,
574 (unsigned int)curaddr,
575 (unsigned int)&func_end, 0);
576 dst = (void *)(((vm_offset_t)dst & ~3));
577 pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
578 setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
579 (vm_paddr_t)curaddr + 0x10000000, 0);
580 sp = pt_addr + L1_TABLE_SIZE + 8192;
581 sp = sp &~3;
582 dst = (void *)(sp + 4);
583 memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
584 (unsigned int)&load_kernel);
585 do_call(dst, kernel, dst + (unsigned int)(&func_end) -
586 (unsigned int)(&load_kernel), sp);
587 }
Cache object: 7701d1263d385c3e1273c1254cc9e21d
|