1 /*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: releng/5.0/sys/i386/i386/mp_machdep.c 105216 2002-10-16 08:57:14Z phk $
26 */
27
28 #include "opt_cpu.h"
29 #include "opt_kstack_pages.h"
30
31 #ifdef SMP
32 #include <machine/smptests.h>
33 #else
34 #error
35 #endif
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/bus.h>
40 #include <sys/cons.h> /* cngetc() */
41 #include <sys/dkstat.h>
42 #ifdef GPROF
43 #include <sys/gmon.h>
44 #endif
45 #include <sys/kernel.h>
46 #include <sys/ktr.h>
47 #include <sys/lock.h>
48 #include <sys/malloc.h>
49 #include <sys/memrange.h>
50 #include <sys/mutex.h>
51 #include <sys/pcpu.h>
52 #include <sys/proc.h>
53 #include <sys/smp.h>
54 #include <sys/sysctl.h>
55 #include <sys/user.h>
56
57 #include <vm/vm.h>
58 #include <vm/vm_param.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_kern.h>
61 #include <vm/vm_extern.h>
62 #include <vm/vm_map.h>
63
64 #include <machine/apic.h>
65 #include <machine/atomic.h>
66 #include <machine/cpu.h>
67 #include <machine/cpufunc.h>
68 #include <machine/mpapic.h>
69 #include <machine/psl.h>
70 #include <machine/segments.h>
71 #include <machine/smp.h>
72 #include <machine/smptests.h> /** TEST_DEFAULT_CONFIG, TEST_TEST1 */
73 #include <machine/tss.h>
74 #include <machine/specialreg.h>
75 #include <machine/privatespace.h>
76
77 #if defined(APIC_IO)
78 #include <machine/md_var.h> /* setidt() */
79 #include <i386/isa/icu.h> /* IPIs */
80 #include <i386/isa/intr_machdep.h> /* IPIs */
81 #endif /* APIC_IO */
82
83 #if defined(TEST_DEFAULT_CONFIG)
84 #define MPFPS_MPFB1 TEST_DEFAULT_CONFIG
85 #else
86 #define MPFPS_MPFB1 mpfps->mpfb1
87 #endif /* TEST_DEFAULT_CONFIG */
88
89 #define WARMBOOT_TARGET 0
90 #define WARMBOOT_OFF (KERNBASE + 0x0467)
91 #define WARMBOOT_SEG (KERNBASE + 0x0469)
92
93 #ifdef PC98
94 #define BIOS_BASE (0xe8000)
95 #define BIOS_SIZE (0x18000)
96 #else
97 #define BIOS_BASE (0xf0000)
98 #define BIOS_SIZE (0x10000)
99 #endif
100 #define BIOS_COUNT (BIOS_SIZE/4)
101
102 #define CMOS_REG (0x70)
103 #define CMOS_DATA (0x71)
104 #define BIOS_RESET (0x0f)
105 #define BIOS_WARM (0x0a)
106
107 #define PROCENTRY_FLAG_EN 0x01
108 #define PROCENTRY_FLAG_BP 0x02
109 #define IOAPICENTRY_FLAG_EN 0x01
110
111
112 /* MP Floating Pointer Structure */
113 typedef struct MPFPS {
114 char signature[4];
115 void *pap;
116 u_char length;
117 u_char spec_rev;
118 u_char checksum;
119 u_char mpfb1;
120 u_char mpfb2;
121 u_char mpfb3;
122 u_char mpfb4;
123 u_char mpfb5;
124 } *mpfps_t;
125
126 /* MP Configuration Table Header */
127 typedef struct MPCTH {
128 char signature[4];
129 u_short base_table_length;
130 u_char spec_rev;
131 u_char checksum;
132 u_char oem_id[8];
133 u_char product_id[12];
134 void *oem_table_pointer;
135 u_short oem_table_size;
136 u_short entry_count;
137 void *apic_address;
138 u_short extended_table_length;
139 u_char extended_table_checksum;
140 u_char reserved;
141 } *mpcth_t;
142
143
144 typedef struct PROCENTRY {
145 u_char type;
146 u_char apic_id;
147 u_char apic_version;
148 u_char cpu_flags;
149 u_long cpu_signature;
150 u_long feature_flags;
151 u_long reserved1;
152 u_long reserved2;
153 } *proc_entry_ptr;
154
155 typedef struct BUSENTRY {
156 u_char type;
157 u_char bus_id;
158 char bus_type[6];
159 } *bus_entry_ptr;
160
161 typedef struct IOAPICENTRY {
162 u_char type;
163 u_char apic_id;
164 u_char apic_version;
165 u_char apic_flags;
166 void *apic_address;
167 } *io_apic_entry_ptr;
168
169 typedef struct INTENTRY {
170 u_char type;
171 u_char int_type;
172 u_short int_flags;
173 u_char src_bus_id;
174 u_char src_bus_irq;
175 u_char dst_apic_id;
176 u_char dst_apic_int;
177 } *int_entry_ptr;
178
179 /* descriptions of MP basetable entries */
180 typedef struct BASETABLE_ENTRY {
181 u_char type;
182 u_char length;
183 char name[16];
184 } basetable_entry;
185
186 /*
187 * this code MUST be enabled here and in mpboot.s.
188 * it follows the very early stages of AP boot by placing values in CMOS ram.
189 * it NORMALLY will never be needed and thus the primitive method for enabling.
190 *
191 #define CHECK_POINTS
192 */
193
194 #if defined(CHECK_POINTS) && !defined(PC98)
195 #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
196 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
197
198 #define CHECK_INIT(D); \
199 CHECK_WRITE(0x34, (D)); \
200 CHECK_WRITE(0x35, (D)); \
201 CHECK_WRITE(0x36, (D)); \
202 CHECK_WRITE(0x37, (D)); \
203 CHECK_WRITE(0x38, (D)); \
204 CHECK_WRITE(0x39, (D));
205
206 #define CHECK_PRINT(S); \
207 printf("%s: %d, %d, %d, %d, %d, %d\n", \
208 (S), \
209 CHECK_READ(0x34), \
210 CHECK_READ(0x35), \
211 CHECK_READ(0x36), \
212 CHECK_READ(0x37), \
213 CHECK_READ(0x38), \
214 CHECK_READ(0x39));
215
216 #else /* CHECK_POINTS */
217
218 #define CHECK_INIT(D)
219 #define CHECK_PRINT(S)
220
221 #endif /* CHECK_POINTS */
222
223 /*
224 * Values to send to the POST hardware.
225 */
226 #define MP_BOOTADDRESS_POST 0x10
227 #define MP_PROBE_POST 0x11
228 #define MPTABLE_PASS1_POST 0x12
229
230 #define MP_START_POST 0x13
231 #define MP_ENABLE_POST 0x14
232 #define MPTABLE_PASS2_POST 0x15
233
234 #define START_ALL_APS_POST 0x16
235 #define INSTALL_AP_TRAMP_POST 0x17
236 #define START_AP_POST 0x18
237
238 #define MP_ANNOUNCE_POST 0x19
239
240 /* used to hold the AP's until we are ready to release them */
241 static struct mtx ap_boot_mtx;
242
243 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
244 int current_postcode;
245
246 /** XXX FIXME: what system files declare these??? */
247 extern struct region_descriptor r_gdt, r_idt;
248
249 int bsp_apic_ready = 0; /* flags useability of BSP apic */
250 int mp_naps; /* # of Applications processors */
251 int mp_nbusses; /* # of busses */
252 int mp_napics; /* # of IO APICs */
253 int boot_cpu_id; /* designated BSP */
254 vm_offset_t cpu_apic_address;
255 vm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */
256 extern int nkpt;
257
258 u_int32_t cpu_apic_versions[MAXCPU];
259 u_int32_t *io_apic_versions;
260
261 #ifdef APIC_INTR_REORDER
262 struct {
263 volatile int *location;
264 int bit;
265 } apic_isrbit_location[32];
266 #endif
267
268 struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE];
269
270 /*
271 * APIC ID logical/physical mapping structures.
272 * We oversize these to simplify boot-time config.
273 */
274 int cpu_num_to_apic_id[NAPICID];
275 int io_num_to_apic_id[NAPICID];
276 int apic_id_to_logical[NAPICID];
277
278
279 /* AP uses this during bootstrap. Do not staticize. */
280 char *bootSTK;
281 static int bootAP;
282
283 /* Hotwire a 0->4MB V==P mapping */
284 extern pt_entry_t *KPTphys;
285
286 /* SMP page table page */
287 extern pt_entry_t *SMPpt;
288
289 struct pcb stoppcbs[MAXCPU];
290
291 #ifdef APIC_IO
292 /* Variables needed for SMP tlb shootdown. */
293 vm_offset_t smp_tlb_addr1;
294 vm_offset_t smp_tlb_addr2;
295 volatile int smp_tlb_wait;
296 static struct mtx smp_tlb_mtx;
297 #endif
298
299 /*
300 * Local data and functions.
301 */
302
303 /* Set to 1 once we're ready to let the APs out of the pen. */
304 static volatile int aps_ready = 0;
305
306 static int mp_capable;
307 static u_int boot_address;
308 static u_int base_memory;
309
310 static int picmode; /* 0: virtual wire mode, 1: PIC mode */
311 static mpfps_t mpfps;
312 static int search_for_sig(u_int32_t target, int count);
313 static void mp_enable(u_int boot_addr);
314
315 static void mptable_pass1(void);
316 static int mptable_pass2(void);
317 static void default_mp_table(int type);
318 static void fix_mp_table(void);
319 static void setup_apic_irq_mapping(void);
320 static void init_locks(void);
321 static int start_all_aps(u_int boot_addr);
322 static void install_ap_tramp(u_int boot_addr);
323 static int start_ap(int logicalCpu, u_int boot_addr);
324 void ap_init(void);
325 static int apic_int_is_bus_type(int intr, int bus_type);
326 static void release_aps(void *dummy);
327
328 /*
329 * initialize all the SMP locks
330 */
331
332 /* lock region used by kernel profiling */
333 int mcount_lock;
334
335 #ifdef USE_COMLOCK
336 /* locks com (tty) data/hardware accesses: a FASTINTR() */
337 struct mtx com_mtx;
338 #endif /* USE_COMLOCK */
339
340 static void
341 init_locks(void)
342 {
343
344 #ifdef USE_COMLOCK
345 mtx_init(&com_mtx, "com", NULL, MTX_SPIN);
346 #endif /* USE_COMLOCK */
347 #ifdef APIC_IO
348 mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
349 #endif
350 }
351
352 /*
353 * Calculate usable address in base memory for AP trampoline code.
354 */
355 u_int
356 mp_bootaddress(u_int basemem)
357 {
358 POSTCODE(MP_BOOTADDRESS_POST);
359
360 base_memory = basemem * 1024; /* convert to bytes */
361
362 boot_address = base_memory & ~0xfff; /* round down to 4k boundary */
363 if ((base_memory - boot_address) < bootMP_size)
364 boot_address -= 4096; /* not enough, lower by 4k */
365
366 return boot_address;
367 }
368
369
370 /*
371 * Look for an Intel MP spec table (ie, SMP capable hardware).
372 */
373 void
374 i386_mp_probe(void)
375 {
376 int x;
377 u_long segment;
378 u_int32_t target;
379
380 POSTCODE(MP_PROBE_POST);
381
382 /* see if EBDA exists */
383 if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
384 /* search first 1K of EBDA */
385 target = (u_int32_t) (segment << 4);
386 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
387 goto found;
388 } else {
389 /* last 1K of base memory, effective 'top of base' passed in */
390 target = (u_int32_t) (base_memory - 0x400);
391 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
392 goto found;
393 }
394
395 /* search the BIOS */
396 target = (u_int32_t) BIOS_BASE;
397 if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
398 goto found;
399
400 /* nothing found */
401 mpfps = (mpfps_t)0;
402 mp_capable = 0;
403 return;
404
405 found:
406 /* calculate needed resources */
407 mpfps = (mpfps_t)x;
408 mptable_pass1();
409
410 /* flag fact that we are running multiple processors */
411 mp_capable = 1;
412 }
413
414 int
415 cpu_mp_probe(void)
416 {
417 /*
418 * Record BSP in CPU map
419 * This is done here so that MBUF init code works correctly.
420 */
421 all_cpus = 1;
422
423 return (mp_capable);
424 }
425
426 /*
427 * Initialize the SMP hardware and the APIC and start up the AP's.
428 */
429 void
430 cpu_mp_start(void)
431 {
432 POSTCODE(MP_START_POST);
433
434 /* look for MP capable motherboard */
435 if (mp_capable)
436 mp_enable(boot_address);
437 else
438 panic("MP hardware not found!");
439
440 cpu_setregs();
441 }
442
443
444 /*
445 * Print various information about the SMP system hardware and setup.
446 */
447 void
448 cpu_mp_announce(void)
449 {
450 int x;
451
452 POSTCODE(MP_ANNOUNCE_POST);
453
454 printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
455 printf(", version: 0x%08x", cpu_apic_versions[0]);
456 printf(", at 0x%08x\n", cpu_apic_address);
457 for (x = 1; x <= mp_naps; ++x) {
458 printf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x));
459 printf(", version: 0x%08x", cpu_apic_versions[x]);
460 printf(", at 0x%08x\n", cpu_apic_address);
461 }
462
463 #if defined(APIC_IO)
464 for (x = 0; x < mp_napics; ++x) {
465 printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
466 printf(", version: 0x%08x", io_apic_versions[x]);
467 printf(", at 0x%08x\n", io_apic_address[x]);
468 }
469 #else
470 printf(" Warning: APIC I/O disabled\n");
471 #endif /* APIC_IO */
472 }
473
474 /*
475 * AP cpu's call this to sync up protected mode.
476 */
477 void
478 init_secondary(void)
479 {
480 int gsel_tss;
481 int x, myid = bootAP;
482 u_int cr0;
483
484 gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
485 gdt_segs[GPROC0_SEL].ssd_base =
486 (int) &SMP_prvspace[myid].pcpu.pc_common_tss;
487 SMP_prvspace[myid].pcpu.pc_prvspace =
488 &SMP_prvspace[myid].pcpu;
489
490 for (x = 0; x < NGDT; x++) {
491 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
492 }
493
494 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
495 r_gdt.rd_base = (int) &gdt[myid * NGDT];
496 lgdt(&r_gdt); /* does magic intra-segment return */
497
498 lidt(&r_idt);
499
500 lldt(_default_ldt);
501 PCPU_SET(currentldt, _default_ldt);
502
503 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
504 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
505 PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
506 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
507 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
508 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
509 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
510 ltr(gsel_tss);
511
512 /*
513 * Set to a known state:
514 * Set by mpboot.s: CR0_PG, CR0_PE
515 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
516 */
517 cr0 = rcr0();
518 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
519 load_cr0(cr0);
520
521 pmap_set_opt();
522 }
523
524
525 #if defined(APIC_IO)
526 /*
527 * Final configuration of the BSP's local APIC:
528 * - disable 'pic mode'.
529 * - disable 'virtual wire mode'.
530 * - enable NMI.
531 */
532 void
533 bsp_apic_configure(void)
534 {
535 u_char byte;
536 u_int32_t temp;
537
538 /* leave 'pic mode' if necessary */
539 if (picmode) {
540 outb(0x22, 0x70); /* select IMCR */
541 byte = inb(0x23); /* current contents */
542 byte |= 0x01; /* mask external INTR */
543 outb(0x23, byte); /* disconnect 8259s/NMI */
544 }
545
546 /* mask lint0 (the 8259 'virtual wire' connection) */
547 temp = lapic.lvt_lint0;
548 temp |= APIC_LVT_M; /* set the mask */
549 lapic.lvt_lint0 = temp;
550
551 /* setup lint1 to handle NMI */
552 temp = lapic.lvt_lint1;
553 temp &= ~APIC_LVT_M; /* clear the mask */
554 lapic.lvt_lint1 = temp;
555
556 if (bootverbose)
557 apic_dump("bsp_apic_configure()");
558 }
559 #endif /* APIC_IO */
560
561
562 /*******************************************************************
563 * local functions and data
564 */
565
566 /*
567 * start the SMP system
568 */
569 static void
570 mp_enable(u_int boot_addr)
571 {
572 int x;
573 #if defined(APIC_IO)
574 int apic;
575 u_int ux;
576 #endif /* APIC_IO */
577
578 POSTCODE(MP_ENABLE_POST);
579
580 /* turn on 4MB of V == P addressing so we can get to MP table */
581 *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
582 invltlb();
583
584 /* examine the MP table for needed info, uses physical addresses */
585 x = mptable_pass2();
586
587 *(int *)PTD = 0;
588 invltlb();
589
590 /* can't process default configs till the CPU APIC is pmapped */
591 if (x)
592 default_mp_table(x);
593
594 /* post scan cleanup */
595 fix_mp_table();
596 setup_apic_irq_mapping();
597
598 #if defined(APIC_IO)
599
600 /* fill the LOGICAL io_apic_versions table */
601 for (apic = 0; apic < mp_napics; ++apic) {
602 ux = io_apic_read(apic, IOAPIC_VER);
603 io_apic_versions[apic] = ux;
604 io_apic_set_id(apic, IO_TO_ID(apic));
605 }
606
607 /* program each IO APIC in the system */
608 for (apic = 0; apic < mp_napics; ++apic)
609 if (io_apic_setup(apic) < 0)
610 panic("IO APIC setup failure");
611
612 /* install a 'Spurious INTerrupt' vector */
613 setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
614 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
615
616 /* install an inter-CPU IPI for TLB invalidation */
617 setidt(XINVLTLB_OFFSET, Xinvltlb,
618 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
619 setidt(XINVLPG_OFFSET, Xinvlpg,
620 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
621 setidt(XINVLRNG_OFFSET, Xinvlrng,
622 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
623
624 /* install an inter-CPU IPI for forwarding hardclock() */
625 setidt(XHARDCLOCK_OFFSET, Xhardclock,
626 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
627
628 /* install an inter-CPU IPI for forwarding statclock() */
629 setidt(XSTATCLOCK_OFFSET, Xstatclock,
630 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
631
632 /* install an inter-CPU IPI for all-CPU rendezvous */
633 setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
634 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
635
636 /* install an inter-CPU IPI for forcing an additional software trap */
637 setidt(XCPUAST_OFFSET, Xcpuast,
638 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
639
640 /* install an inter-CPU IPI for CPU stop/restart */
641 setidt(XCPUSTOP_OFFSET, Xcpustop,
642 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
643
644 #if defined(TEST_TEST1)
645 /* install a "fake hardware INTerrupt" vector */
646 setidt(XTEST1_OFFSET, Xtest1,
647 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
648 #endif /** TEST_TEST1 */
649
650 #endif /* APIC_IO */
651
652 /* initialize all SMP locks */
653 init_locks();
654
655 /* start each Application Processor */
656 start_all_aps(boot_addr);
657 }
658
659
660 /*
661 * look for the MP spec signature
662 */
663
664 /* string defined by the Intel MP Spec as identifying the MP table */
665 #define MP_SIG 0x5f504d5f /* _MP_ */
666 #define NEXT(X) ((X) += 4)
667 static int
668 search_for_sig(u_int32_t target, int count)
669 {
670 int x;
671 u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
672
673 for (x = 0; x < count; NEXT(x))
674 if (addr[x] == MP_SIG)
675 /* make array index a byte index */
676 return (target + (x * sizeof(u_int32_t)));
677
678 return -1;
679 }
680
681
682 static basetable_entry basetable_entry_types[] =
683 {
684 {0, 20, "Processor"},
685 {1, 8, "Bus"},
686 {2, 8, "I/O APIC"},
687 {3, 8, "I/O INT"},
688 {4, 8, "Local INT"}
689 };
690
691 typedef struct BUSDATA {
692 u_char bus_id;
693 enum busTypes bus_type;
694 } bus_datum;
695
696 typedef struct INTDATA {
697 u_char int_type;
698 u_short int_flags;
699 u_char src_bus_id;
700 u_char src_bus_irq;
701 u_char dst_apic_id;
702 u_char dst_apic_int;
703 u_char int_vector;
704 } io_int, local_int;
705
706 typedef struct BUSTYPENAME {
707 u_char type;
708 char name[7];
709 } bus_type_name;
710
711 static bus_type_name bus_type_table[] =
712 {
713 {CBUS, "CBUS"},
714 {CBUSII, "CBUSII"},
715 {EISA, "EISA"},
716 {MCA, "MCA"},
717 {UNKNOWN_BUSTYPE, "---"},
718 {ISA, "ISA"},
719 {MCA, "MCA"},
720 {UNKNOWN_BUSTYPE, "---"},
721 {UNKNOWN_BUSTYPE, "---"},
722 {UNKNOWN_BUSTYPE, "---"},
723 {UNKNOWN_BUSTYPE, "---"},
724 {UNKNOWN_BUSTYPE, "---"},
725 {PCI, "PCI"},
726 {UNKNOWN_BUSTYPE, "---"},
727 {UNKNOWN_BUSTYPE, "---"},
728 {UNKNOWN_BUSTYPE, "---"},
729 {UNKNOWN_BUSTYPE, "---"},
730 {XPRESS, "XPRESS"},
731 {UNKNOWN_BUSTYPE, "---"}
732 };
733 /* from MP spec v1.4, table 5-1 */
734 static int default_data[7][5] =
735 {
736 /* nbus, id0, type0, id1, type1 */
737 {1, 0, ISA, 255, 255},
738 {1, 0, EISA, 255, 255},
739 {1, 0, EISA, 255, 255},
740 {1, 0, MCA, 255, 255},
741 {2, 0, ISA, 1, PCI},
742 {2, 0, EISA, 1, PCI},
743 {2, 0, MCA, 1, PCI}
744 };
745
746
747 /* the bus data */
748 static bus_datum *bus_data;
749
750 /* the IO INT data, one entry per possible APIC INTerrupt */
751 static io_int *io_apic_ints;
752
753 static int nintrs;
754
755 static int processor_entry(proc_entry_ptr entry, int cpu);
756 static int bus_entry(bus_entry_ptr entry, int bus);
757 static int io_apic_entry(io_apic_entry_ptr entry, int apic);
758 static int int_entry(int_entry_ptr entry, int intr);
759 static int lookup_bus_type(char *name);
760
761
762 /*
763 * 1st pass on motherboard's Intel MP specification table.
764 *
765 * initializes:
766 * mp_ncpus = 1
767 *
768 * determines:
769 * cpu_apic_address (common to all CPUs)
770 * io_apic_address[N]
771 * mp_naps
772 * mp_nbusses
773 * mp_napics
774 * nintrs
775 */
776 static void
777 mptable_pass1(void)
778 {
779 int x;
780 mpcth_t cth;
781 int totalSize;
782 void* position;
783 int count;
784 int type;
785
786 POSTCODE(MPTABLE_PASS1_POST);
787
788 /* clear various tables */
789 for (x = 0; x < NAPICID; ++x) {
790 io_apic_address[x] = ~0; /* IO APIC address table */
791 }
792
793 /* init everything to empty */
794 mp_naps = 0;
795 mp_nbusses = 0;
796 mp_napics = 0;
797 nintrs = 0;
798
799 /* check for use of 'default' configuration */
800 if (MPFPS_MPFB1 != 0) {
801 /* use default addresses */
802 cpu_apic_address = DEFAULT_APIC_BASE;
803 io_apic_address[0] = DEFAULT_IO_APIC_BASE;
804
805 /* fill in with defaults */
806 mp_naps = 2; /* includes BSP */
807 mp_maxid = 1;
808 mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
809 #if defined(APIC_IO)
810 mp_napics = 1;
811 nintrs = 16;
812 #endif /* APIC_IO */
813 }
814 else {
815 if ((cth = mpfps->pap) == 0)
816 panic("MP Configuration Table Header MISSING!");
817
818 cpu_apic_address = (vm_offset_t) cth->apic_address;
819
820 /* walk the table, recording info of interest */
821 totalSize = cth->base_table_length - sizeof(struct MPCTH);
822 position = (u_char *) cth + sizeof(struct MPCTH);
823 count = cth->entry_count;
824
825 while (count--) {
826 switch (type = *(u_char *) position) {
827 case 0: /* processor_entry */
828 if (((proc_entry_ptr)position)->cpu_flags
829 & PROCENTRY_FLAG_EN) {
830 ++mp_naps;
831 mp_maxid++;
832 }
833 break;
834 case 1: /* bus_entry */
835 ++mp_nbusses;
836 break;
837 case 2: /* io_apic_entry */
838 if (((io_apic_entry_ptr)position)->apic_flags
839 & IOAPICENTRY_FLAG_EN)
840 io_apic_address[mp_napics++] =
841 (vm_offset_t)((io_apic_entry_ptr)
842 position)->apic_address;
843 break;
844 case 3: /* int_entry */
845 ++nintrs;
846 break;
847 case 4: /* int_entry */
848 break;
849 default:
850 panic("mpfps Base Table HOSED!");
851 /* NOTREACHED */
852 }
853
854 totalSize -= basetable_entry_types[type].length;
855 (u_char*)position += basetable_entry_types[type].length;
856 }
857 }
858
859 /* qualify the numbers */
860 if (mp_naps > MAXCPU) {
861 printf("Warning: only using %d of %d available CPUs!\n",
862 MAXCPU, mp_naps);
863 mp_naps = MAXCPU;
864 }
865
866 /*
867 * Count the BSP.
868 * This is also used as a counter while starting the APs.
869 */
870 mp_ncpus = 1;
871
872 --mp_naps; /* subtract the BSP */
873 }
874
875
876 /*
877 * 2nd pass on motherboard's Intel MP specification table.
878 *
879 * sets:
880 * boot_cpu_id
881 * ID_TO_IO(N), phy APIC ID to log CPU/IO table
882 * CPU_TO_ID(N), logical CPU to APIC ID table
883 * IO_TO_ID(N), logical IO to APIC ID table
884 * bus_data[N]
885 * io_apic_ints[N]
886 */
887 static int
888 mptable_pass2(void)
889 {
890 int x;
891 mpcth_t cth;
892 int totalSize;
893 void* position;
894 int count;
895 int type;
896 int apic, bus, cpu, intr;
897 int i, j;
898 int pgeflag;
899
900 POSTCODE(MPTABLE_PASS2_POST);
901
902 pgeflag = 0; /* XXX - Not used under SMP yet. */
903
904 MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
905 M_DEVBUF, M_WAITOK);
906 MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
907 M_DEVBUF, M_WAITOK);
908 MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
909 M_DEVBUF, M_WAITOK);
910 MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
911 M_DEVBUF, M_WAITOK);
912
913 bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
914
915 for (i = 0; i < mp_napics; i++) {
916 for (j = 0; j < mp_napics; j++) {
917 /* same page frame as a previous IO apic? */
918 if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
919 (io_apic_address[i] & PG_FRAME)) {
920 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
921 + (NPTEPG-2-j) * PAGE_SIZE
922 + (io_apic_address[i] & PAGE_MASK));
923 break;
924 }
925 /* use this slot if available */
926 if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
927 SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
928 pgeflag | (io_apic_address[i] & PG_FRAME));
929 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
930 + (NPTEPG-2-j) * PAGE_SIZE
931 + (io_apic_address[i] & PAGE_MASK));
932 break;
933 }
934 }
935 }
936
937 /* clear various tables */
938 for (x = 0; x < NAPICID; ++x) {
939 ID_TO_IO(x) = -1; /* phy APIC ID to log CPU/IO table */
940 CPU_TO_ID(x) = -1; /* logical CPU to APIC ID table */
941 IO_TO_ID(x) = -1; /* logical IO to APIC ID table */
942 }
943
944 /* clear bus data table */
945 for (x = 0; x < mp_nbusses; ++x)
946 bus_data[x].bus_id = 0xff;
947
948 /* clear IO APIC INT table */
949 for (x = 0; x < (nintrs + 1); ++x) {
950 io_apic_ints[x].int_type = 0xff;
951 io_apic_ints[x].int_vector = 0xff;
952 }
953
954 /* setup the cpu/apic mapping arrays */
955 boot_cpu_id = -1;
956
957 /* record whether PIC or virtual-wire mode */
958 picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
959
960 /* check for use of 'default' configuration */
961 if (MPFPS_MPFB1 != 0)
962 return MPFPS_MPFB1; /* return default configuration type */
963
964 if ((cth = mpfps->pap) == 0)
965 panic("MP Configuration Table Header MISSING!");
966
967 /* walk the table, recording info of interest */
968 totalSize = cth->base_table_length - sizeof(struct MPCTH);
969 position = (u_char *) cth + sizeof(struct MPCTH);
970 count = cth->entry_count;
971 apic = bus = intr = 0;
972 cpu = 1; /* pre-count the BSP */
973
974 while (count--) {
975 switch (type = *(u_char *) position) {
976 case 0:
977 if (processor_entry(position, cpu))
978 ++cpu;
979 break;
980 case 1:
981 if (bus_entry(position, bus))
982 ++bus;
983 break;
984 case 2:
985 if (io_apic_entry(position, apic))
986 ++apic;
987 break;
988 case 3:
989 if (int_entry(position, intr))
990 ++intr;
991 break;
992 case 4:
993 /* int_entry(position); */
994 break;
995 default:
996 panic("mpfps Base Table HOSED!");
997 /* NOTREACHED */
998 }
999
1000 totalSize -= basetable_entry_types[type].length;
1001 (u_char *) position += basetable_entry_types[type].length;
1002 }
1003
1004 if (boot_cpu_id == -1)
1005 panic("NO BSP found!");
1006
1007 /* report fact that its NOT a default configuration */
1008 return 0;
1009 }
1010
1011
1012 void
1013 assign_apic_irq(int apic, int intpin, int irq)
1014 {
1015 int x;
1016
1017 if (int_to_apicintpin[irq].ioapic != -1)
1018 panic("assign_apic_irq: inconsistent table");
1019
1020 int_to_apicintpin[irq].ioapic = apic;
1021 int_to_apicintpin[irq].int_pin = intpin;
1022 int_to_apicintpin[irq].apic_address = ioapic[apic];
1023 int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
1024
1025 for (x = 0; x < nintrs; x++) {
1026 if ((io_apic_ints[x].int_type == 0 ||
1027 io_apic_ints[x].int_type == 3) &&
1028 io_apic_ints[x].int_vector == 0xff &&
1029 io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
1030 io_apic_ints[x].dst_apic_int == intpin)
1031 io_apic_ints[x].int_vector = irq;
1032 }
1033 }
1034
1035 void
1036 revoke_apic_irq(int irq)
1037 {
1038 int x;
1039 int oldapic;
1040 int oldintpin;
1041
1042 if (int_to_apicintpin[irq].ioapic == -1)
1043 panic("revoke_apic_irq: inconsistent table");
1044
1045 oldapic = int_to_apicintpin[irq].ioapic;
1046 oldintpin = int_to_apicintpin[irq].int_pin;
1047
1048 int_to_apicintpin[irq].ioapic = -1;
1049 int_to_apicintpin[irq].int_pin = 0;
1050 int_to_apicintpin[irq].apic_address = NULL;
1051 int_to_apicintpin[irq].redirindex = 0;
1052
1053 for (x = 0; x < nintrs; x++) {
1054 if ((io_apic_ints[x].int_type == 0 ||
1055 io_apic_ints[x].int_type == 3) &&
1056 io_apic_ints[x].int_vector != 0xff &&
1057 io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
1058 io_apic_ints[x].dst_apic_int == oldintpin)
1059 io_apic_ints[x].int_vector = 0xff;
1060 }
1061 }
1062
1063
1064 static void
1065 allocate_apic_irq(int intr)
1066 {
1067 int apic;
1068 int intpin;
1069 int irq;
1070
1071 if (io_apic_ints[intr].int_vector != 0xff)
1072 return; /* Interrupt handler already assigned */
1073
1074 if (io_apic_ints[intr].int_type != 0 &&
1075 (io_apic_ints[intr].int_type != 3 ||
1076 (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
1077 io_apic_ints[intr].dst_apic_int == 0)))
1078 return; /* Not INT or ExtInt on != (0, 0) */
1079
1080 irq = 0;
1081 while (irq < APIC_INTMAPSIZE &&
1082 int_to_apicintpin[irq].ioapic != -1)
1083 irq++;
1084
1085 if (irq >= APIC_INTMAPSIZE)
1086 return; /* No free interrupt handlers */
1087
1088 apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
1089 intpin = io_apic_ints[intr].dst_apic_int;
1090
1091 assign_apic_irq(apic, intpin, irq);
1092 io_apic_setup_intpin(apic, intpin);
1093 }
1094
1095
1096 static void
1097 swap_apic_id(int apic, int oldid, int newid)
1098 {
1099 int x;
1100 int oapic;
1101
1102
1103 if (oldid == newid)
1104 return; /* Nothing to do */
1105
1106 printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
1107 apic, oldid, newid);
1108
1109 /* Swap physical APIC IDs in interrupt entries */
1110 for (x = 0; x < nintrs; x++) {
1111 if (io_apic_ints[x].dst_apic_id == oldid)
1112 io_apic_ints[x].dst_apic_id = newid;
1113 else if (io_apic_ints[x].dst_apic_id == newid)
1114 io_apic_ints[x].dst_apic_id = oldid;
1115 }
1116
1117 /* Swap physical APIC IDs in IO_TO_ID mappings */
1118 for (oapic = 0; oapic < mp_napics; oapic++)
1119 if (IO_TO_ID(oapic) == newid)
1120 break;
1121
1122 if (oapic < mp_napics) {
1123 printf("Changing APIC ID for IO APIC #%d from "
1124 "%d to %d in MP table\n",
1125 oapic, newid, oldid);
1126 IO_TO_ID(oapic) = oldid;
1127 }
1128 IO_TO_ID(apic) = newid;
1129 }
1130
1131
1132 static void
1133 fix_id_to_io_mapping(void)
1134 {
1135 int x;
1136
1137 for (x = 0; x < NAPICID; x++)
1138 ID_TO_IO(x) = -1;
1139
1140 for (x = 0; x <= mp_naps; x++)
1141 if (CPU_TO_ID(x) < NAPICID)
1142 ID_TO_IO(CPU_TO_ID(x)) = x;
1143
1144 for (x = 0; x < mp_napics; x++)
1145 if (IO_TO_ID(x) < NAPICID)
1146 ID_TO_IO(IO_TO_ID(x)) = x;
1147 }
1148
1149
1150 static int
1151 first_free_apic_id(void)
1152 {
1153 int freeid, x;
1154
1155 for (freeid = 0; freeid < NAPICID; freeid++) {
1156 for (x = 0; x <= mp_naps; x++)
1157 if (CPU_TO_ID(x) == freeid)
1158 break;
1159 if (x <= mp_naps)
1160 continue;
1161 for (x = 0; x < mp_napics; x++)
1162 if (IO_TO_ID(x) == freeid)
1163 break;
1164 if (x < mp_napics)
1165 continue;
1166 return freeid;
1167 }
1168 return freeid;
1169 }
1170
1171
1172 static int
1173 io_apic_id_acceptable(int apic, int id)
1174 {
1175 int cpu; /* Logical CPU number */
1176 int oapic; /* Logical IO APIC number for other IO APIC */
1177
1178 if (id >= NAPICID)
1179 return 0; /* Out of range */
1180
1181 for (cpu = 0; cpu <= mp_naps; cpu++)
1182 if (CPU_TO_ID(cpu) == id)
1183 return 0; /* Conflict with CPU */
1184
1185 for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
1186 if (IO_TO_ID(oapic) == id)
1187 return 0; /* Conflict with other APIC */
1188
1189 return 1; /* ID is acceptable for IO APIC */
1190 }
1191
1192
1193 /*
1194 * parse an Intel MP specification table
1195 */
1196 static void
1197 fix_mp_table(void)
1198 {
1199 int x;
1200 int id;
1201 int bus_0 = 0; /* Stop GCC warning */
1202 int bus_pci = 0; /* Stop GCC warning */
1203 int num_pci_bus;
1204 int apic; /* IO APIC unit number */
1205 int freeid; /* Free physical APIC ID */
1206 int physid; /* Current physical IO APIC ID */
1207
1208 /*
1209 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1210 * did it wrong. The MP spec says that when more than 1 PCI bus
1211 * exists the BIOS must begin with bus entries for the PCI bus and use
1212 * actual PCI bus numbering. This implies that when only 1 PCI bus
1213 * exists the BIOS can choose to ignore this ordering, and indeed many
1214 * MP motherboards do ignore it. This causes a problem when the PCI
1215 * sub-system makes requests of the MP sub-system based on PCI bus
1216 * numbers. So here we look for the situation and renumber the
1217 * busses and associated INTs in an effort to "make it right".
1218 */
1219
1220 /* find bus 0, PCI bus, count the number of PCI busses */
1221 for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1222 if (bus_data[x].bus_id == 0) {
1223 bus_0 = x;
1224 }
1225 if (bus_data[x].bus_type == PCI) {
1226 ++num_pci_bus;
1227 bus_pci = x;
1228 }
1229 }
1230 /*
1231 * bus_0 == slot of bus with ID of 0
1232 * bus_pci == slot of last PCI bus encountered
1233 */
1234
1235 /* check the 1 PCI bus case for sanity */
1236 /* if it is number 0 all is well */
1237 if (num_pci_bus == 1 &&
1238 bus_data[bus_pci].bus_id != 0) {
1239
1240 /* mis-numbered, swap with whichever bus uses slot 0 */
1241
1242 /* swap the bus entry types */
1243 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1244 bus_data[bus_0].bus_type = PCI;
1245
1246 /* swap each relavant INTerrupt entry */
1247 id = bus_data[bus_pci].bus_id;
1248 for (x = 0; x < nintrs; ++x) {
1249 if (io_apic_ints[x].src_bus_id == id) {
1250 io_apic_ints[x].src_bus_id = 0;
1251 }
1252 else if (io_apic_ints[x].src_bus_id == 0) {
1253 io_apic_ints[x].src_bus_id = id;
1254 }
1255 }
1256 }
1257
1258 /* Assign IO APIC IDs.
1259 *
1260 * First try the existing ID. If a conflict is detected, try
1261 * the ID in the MP table. If a conflict is still detected, find
1262 * a free id.
1263 *
1264 * We cannot use the ID_TO_IO table before all conflicts has been
1265 * resolved and the table has been corrected.
1266 */
1267 for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
1268
1269 /* First try to use the value set by the BIOS */
1270 physid = io_apic_get_id(apic);
1271 if (io_apic_id_acceptable(apic, physid)) {
1272 if (IO_TO_ID(apic) != physid)
1273 swap_apic_id(apic, IO_TO_ID(apic), physid);
1274 continue;
1275 }
1276
1277 /* Then check if the value in the MP table is acceptable */
1278 if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
1279 continue;
1280
1281 /* Last resort, find a free APIC ID and use it */
1282 freeid = first_free_apic_id();
1283 if (freeid >= NAPICID)
1284 panic("No free physical APIC IDs found");
1285
1286 if (io_apic_id_acceptable(apic, freeid)) {
1287 swap_apic_id(apic, IO_TO_ID(apic), freeid);
1288 continue;
1289 }
1290 panic("Free physical APIC ID not usable");
1291 }
1292 fix_id_to_io_mapping();
1293
1294 /* detect and fix broken Compaq MP table */
1295 if (apic_int_type(0, 0) == -1) {
1296 printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
1297 io_apic_ints[nintrs].int_type = 3; /* ExtInt */
1298 io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */
1299 /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
1300 io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
1301 io_apic_ints[nintrs].dst_apic_int = 0; /* Pin 0 */
1302 nintrs++;
1303 }
1304 }
1305
1306
1307 /* Assign low level interrupt handlers */
1308 static void
1309 setup_apic_irq_mapping(void)
1310 {
1311 int x;
1312 int int_vector;
1313
1314 /* Clear array */
1315 for (x = 0; x < APIC_INTMAPSIZE; x++) {
1316 int_to_apicintpin[x].ioapic = -1;
1317 int_to_apicintpin[x].int_pin = 0;
1318 int_to_apicintpin[x].apic_address = NULL;
1319 int_to_apicintpin[x].redirindex = 0;
1320 }
1321
1322 /* First assign ISA/EISA interrupts */
1323 for (x = 0; x < nintrs; x++) {
1324 int_vector = io_apic_ints[x].src_bus_irq;
1325 if (int_vector < APIC_INTMAPSIZE &&
1326 io_apic_ints[x].int_vector == 0xff &&
1327 int_to_apicintpin[int_vector].ioapic == -1 &&
1328 (apic_int_is_bus_type(x, ISA) ||
1329 apic_int_is_bus_type(x, EISA)) &&
1330 io_apic_ints[x].int_type == 0) {
1331 assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1332 io_apic_ints[x].dst_apic_int,
1333 int_vector);
1334 }
1335 }
1336
1337 /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
1338 for (x = 0; x < nintrs; x++) {
1339 if (io_apic_ints[x].dst_apic_int == 0 &&
1340 io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1341 io_apic_ints[x].int_vector == 0xff &&
1342 int_to_apicintpin[0].ioapic == -1 &&
1343 io_apic_ints[x].int_type == 3) {
1344 assign_apic_irq(0, 0, 0);
1345 break;
1346 }
1347 }
1348 /* PCI interrupt assignment is deferred */
1349 }
1350
1351
1352 static int
1353 processor_entry(proc_entry_ptr entry, int cpu)
1354 {
1355 /* check for usability */
1356 if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
1357 return 0;
1358
1359 if(entry->apic_id >= NAPICID)
1360 panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
1361 /* check for BSP flag */
1362 if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
1363 boot_cpu_id = entry->apic_id;
1364 CPU_TO_ID(0) = entry->apic_id;
1365 ID_TO_CPU(entry->apic_id) = 0;
1366 return 0; /* its already been counted */
1367 }
1368
1369 /* add another AP to list, if less than max number of CPUs */
1370 else if (cpu < MAXCPU) {
1371 CPU_TO_ID(cpu) = entry->apic_id;
1372 ID_TO_CPU(entry->apic_id) = cpu;
1373 return 1;
1374 }
1375
1376 return 0;
1377 }
1378
1379
1380 static int
1381 bus_entry(bus_entry_ptr entry, int bus)
1382 {
1383 int x;
1384 char c, name[8];
1385
1386 /* encode the name into an index */
1387 for (x = 0; x < 6; ++x) {
1388 if ((c = entry->bus_type[x]) == ' ')
1389 break;
1390 name[x] = c;
1391 }
1392 name[x] = '\0';
1393
1394 if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1395 panic("unknown bus type: '%s'", name);
1396
1397 bus_data[bus].bus_id = entry->bus_id;
1398 bus_data[bus].bus_type = x;
1399
1400 return 1;
1401 }
1402
1403
1404 static int
1405 io_apic_entry(io_apic_entry_ptr entry, int apic)
1406 {
1407 if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1408 return 0;
1409
1410 IO_TO_ID(apic) = entry->apic_id;
1411 if (entry->apic_id < NAPICID)
1412 ID_TO_IO(entry->apic_id) = apic;
1413
1414 return 1;
1415 }
1416
1417
1418 static int
1419 lookup_bus_type(char *name)
1420 {
1421 int x;
1422
1423 for (x = 0; x < MAX_BUSTYPE; ++x)
1424 if (strcmp(bus_type_table[x].name, name) == 0)
1425 return bus_type_table[x].type;
1426
1427 return UNKNOWN_BUSTYPE;
1428 }
1429
1430
1431 static int
1432 int_entry(int_entry_ptr entry, int intr)
1433 {
1434 int apic;
1435
1436 io_apic_ints[intr].int_type = entry->int_type;
1437 io_apic_ints[intr].int_flags = entry->int_flags;
1438 io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1439 io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1440 if (entry->dst_apic_id == 255) {
1441 /* This signal goes to all IO APICS. Select an IO APIC
1442 with sufficient number of interrupt pins */
1443 for (apic = 0; apic < mp_napics; apic++)
1444 if (((io_apic_read(apic, IOAPIC_VER) &
1445 IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1446 entry->dst_apic_int)
1447 break;
1448 if (apic < mp_napics)
1449 io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1450 else
1451 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1452 } else
1453 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1454 io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1455
1456 return 1;
1457 }
1458
1459
1460 static int
1461 apic_int_is_bus_type(int intr, int bus_type)
1462 {
1463 int bus;
1464
1465 for (bus = 0; bus < mp_nbusses; ++bus)
1466 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1467 && ((int) bus_data[bus].bus_type == bus_type))
1468 return 1;
1469
1470 return 0;
1471 }
1472
1473
1474 /*
1475 * Given a traditional ISA INT mask, return an APIC mask.
1476 */
1477 u_int
1478 isa_apic_mask(u_int isa_mask)
1479 {
1480 int isa_irq;
1481 int apic_pin;
1482
1483 #if defined(SKIP_IRQ15_REDIRECT)
1484 if (isa_mask == (1 << 15)) {
1485 printf("skipping ISA IRQ15 redirect\n");
1486 return isa_mask;
1487 }
1488 #endif /* SKIP_IRQ15_REDIRECT */
1489
1490 isa_irq = ffs(isa_mask); /* find its bit position */
1491 if (isa_irq == 0) /* doesn't exist */
1492 return 0;
1493 --isa_irq; /* make it zero based */
1494
1495 apic_pin = isa_apic_irq(isa_irq); /* look for APIC connection */
1496 if (apic_pin == -1)
1497 return 0;
1498
1499 return (1 << apic_pin); /* convert pin# to a mask */
1500 }
1501
1502
1503 /*
1504 * Determine which APIC pin an ISA/EISA INT is attached to.
1505 */
1506 #define INTTYPE(I) (io_apic_ints[(I)].int_type)
1507 #define INTPIN(I) (io_apic_ints[(I)].dst_apic_int)
1508 #define INTIRQ(I) (io_apic_ints[(I)].int_vector)
1509 #define INTAPIC(I) (ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1510
1511 #define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq)
1512 int
1513 isa_apic_irq(int isa_irq)
1514 {
1515 int intr;
1516
1517 for (intr = 0; intr < nintrs; ++intr) { /* check each record */
1518 if (INTTYPE(intr) == 0) { /* standard INT */
1519 if (SRCBUSIRQ(intr) == isa_irq) {
1520 if (apic_int_is_bus_type(intr, ISA) ||
1521 apic_int_is_bus_type(intr, EISA)) {
1522 if (INTIRQ(intr) == 0xff)
1523 return -1; /* unassigned */
1524 return INTIRQ(intr); /* found */
1525 }
1526 }
1527 }
1528 }
1529 return -1; /* NOT found */
1530 }
1531
1532
1533 /*
1534 * Determine which APIC pin a PCI INT is attached to.
1535 */
1536 #define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id)
1537 #define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1538 #define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03)
1539 int
1540 pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1541 {
1542 int intr;
1543
1544 --pciInt; /* zero based */
1545
1546 for (intr = 0; intr < nintrs; ++intr) /* check each record */
1547 if ((INTTYPE(intr) == 0) /* standard INT */
1548 && (SRCBUSID(intr) == pciBus)
1549 && (SRCBUSDEVICE(intr) == pciDevice)
1550 && (SRCBUSLINE(intr) == pciInt)) /* a candidate IRQ */
1551 if (apic_int_is_bus_type(intr, PCI)) {
1552 if (INTIRQ(intr) == 0xff)
1553 allocate_apic_irq(intr);
1554 if (INTIRQ(intr) == 0xff)
1555 return -1; /* unassigned */
1556 return INTIRQ(intr); /* exact match */
1557 }
1558
1559 return -1; /* NOT found */
1560 }
1561
1562 int
1563 next_apic_irq(int irq)
1564 {
1565 int intr, ointr;
1566 int bus, bustype;
1567
1568 bus = 0;
1569 bustype = 0;
1570 for (intr = 0; intr < nintrs; intr++) {
1571 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1572 continue;
1573 bus = SRCBUSID(intr);
1574 bustype = apic_bus_type(bus);
1575 if (bustype != ISA &&
1576 bustype != EISA &&
1577 bustype != PCI)
1578 continue;
1579 break;
1580 }
1581 if (intr >= nintrs) {
1582 return -1;
1583 }
1584 for (ointr = intr + 1; ointr < nintrs; ointr++) {
1585 if (INTTYPE(ointr) != 0)
1586 continue;
1587 if (bus != SRCBUSID(ointr))
1588 continue;
1589 if (bustype == PCI) {
1590 if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1591 continue;
1592 if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1593 continue;
1594 }
1595 if (bustype == ISA || bustype == EISA) {
1596 if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1597 continue;
1598 }
1599 if (INTPIN(intr) == INTPIN(ointr))
1600 continue;
1601 break;
1602 }
1603 if (ointr >= nintrs) {
1604 return -1;
1605 }
1606 return INTIRQ(ointr);
1607 }
1608 #undef SRCBUSLINE
1609 #undef SRCBUSDEVICE
1610 #undef SRCBUSID
1611 #undef SRCBUSIRQ
1612
1613 #undef INTPIN
1614 #undef INTIRQ
1615 #undef INTAPIC
1616 #undef INTTYPE
1617
1618
1619 /*
1620 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1621 *
1622 * XXX FIXME:
1623 * Exactly what this means is unclear at this point. It is a solution
1624 * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard
1625 * could route any of the ISA INTs to upper (>15) IRQ values. But most would
1626 * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1627 * option.
1628 */
1629 int
1630 undirect_isa_irq(int rirq)
1631 {
1632 #if defined(READY)
1633 if (bootverbose)
1634 printf("Freeing redirected ISA irq %d.\n", rirq);
1635 /** FIXME: tickle the MB redirector chip */
1636 return -1;
1637 #else
1638 if (bootverbose)
1639 printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1640 return 0;
1641 #endif /* READY */
1642 }
1643
1644
1645 /*
1646 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1647 */
1648 int
1649 undirect_pci_irq(int rirq)
1650 {
1651 #if defined(READY)
1652 if (bootverbose)
1653 printf("Freeing redirected PCI irq %d.\n", rirq);
1654
1655 /** FIXME: tickle the MB redirector chip */
1656 return -1;
1657 #else
1658 if (bootverbose)
1659 printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1660 rirq);
1661 return 0;
1662 #endif /* READY */
1663 }
1664
1665
1666 /*
1667 * given a bus ID, return:
1668 * the bus type if found
1669 * -1 if NOT found
1670 */
1671 int
1672 apic_bus_type(int id)
1673 {
1674 int x;
1675
1676 for (x = 0; x < mp_nbusses; ++x)
1677 if (bus_data[x].bus_id == id)
1678 return bus_data[x].bus_type;
1679
1680 return -1;
1681 }
1682
1683
1684 /*
1685 * given a LOGICAL APIC# and pin#, return:
1686 * the associated src bus ID if found
1687 * -1 if NOT found
1688 */
1689 int
1690 apic_src_bus_id(int apic, int pin)
1691 {
1692 int x;
1693
1694 /* search each of the possible INTerrupt sources */
1695 for (x = 0; x < nintrs; ++x)
1696 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1697 (pin == io_apic_ints[x].dst_apic_int))
1698 return (io_apic_ints[x].src_bus_id);
1699
1700 return -1; /* NOT found */
1701 }
1702
1703
1704 /*
1705 * given a LOGICAL APIC# and pin#, return:
1706 * the associated src bus IRQ if found
1707 * -1 if NOT found
1708 */
1709 int
1710 apic_src_bus_irq(int apic, int pin)
1711 {
1712 int x;
1713
1714 for (x = 0; x < nintrs; x++)
1715 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1716 (pin == io_apic_ints[x].dst_apic_int))
1717 return (io_apic_ints[x].src_bus_irq);
1718
1719 return -1; /* NOT found */
1720 }
1721
1722
1723 /*
1724 * given a LOGICAL APIC# and pin#, return:
1725 * the associated INTerrupt type if found
1726 * -1 if NOT found
1727 */
1728 int
1729 apic_int_type(int apic, int pin)
1730 {
1731 int x;
1732
1733 /* search each of the possible INTerrupt sources */
1734 for (x = 0; x < nintrs; ++x)
1735 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1736 (pin == io_apic_ints[x].dst_apic_int))
1737 return (io_apic_ints[x].int_type);
1738
1739 return -1; /* NOT found */
1740 }
1741
1742 int
1743 apic_irq(int apic, int pin)
1744 {
1745 int x;
1746 int res;
1747
1748 for (x = 0; x < nintrs; ++x)
1749 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1750 (pin == io_apic_ints[x].dst_apic_int)) {
1751 res = io_apic_ints[x].int_vector;
1752 if (res == 0xff)
1753 return -1;
1754 if (apic != int_to_apicintpin[res].ioapic)
1755 panic("apic_irq: inconsistent table");
1756 if (pin != int_to_apicintpin[res].int_pin)
1757 panic("apic_irq inconsistent table (2)");
1758 return res;
1759 }
1760 return -1;
1761 }
1762
1763
1764 /*
1765 * given a LOGICAL APIC# and pin#, return:
1766 * the associated trigger mode if found
1767 * -1 if NOT found
1768 */
1769 int
1770 apic_trigger(int apic, int pin)
1771 {
1772 int x;
1773
1774 /* search each of the possible INTerrupt sources */
1775 for (x = 0; x < nintrs; ++x)
1776 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1777 (pin == io_apic_ints[x].dst_apic_int))
1778 return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1779
1780 return -1; /* NOT found */
1781 }
1782
1783
1784 /*
1785 * given a LOGICAL APIC# and pin#, return:
1786 * the associated 'active' level if found
1787 * -1 if NOT found
1788 */
1789 int
1790 apic_polarity(int apic, int pin)
1791 {
1792 int x;
1793
1794 /* search each of the possible INTerrupt sources */
1795 for (x = 0; x < nintrs; ++x)
1796 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1797 (pin == io_apic_ints[x].dst_apic_int))
1798 return (io_apic_ints[x].int_flags & 0x03);
1799
1800 return -1; /* NOT found */
1801 }
1802
1803
1804 /*
1805 * set data according to MP defaults
1806 * FIXME: probably not complete yet...
1807 */
1808 static void
1809 default_mp_table(int type)
1810 {
1811 int ap_cpu_id;
1812 #if defined(APIC_IO)
1813 int io_apic_id;
1814 int pin;
1815 #endif /* APIC_IO */
1816
1817 #if 0
1818 printf(" MP default config type: %d\n", type);
1819 switch (type) {
1820 case 1:
1821 printf(" bus: ISA, APIC: 82489DX\n");
1822 break;
1823 case 2:
1824 printf(" bus: EISA, APIC: 82489DX\n");
1825 break;
1826 case 3:
1827 printf(" bus: EISA, APIC: 82489DX\n");
1828 break;
1829 case 4:
1830 printf(" bus: MCA, APIC: 82489DX\n");
1831 break;
1832 case 5:
1833 printf(" bus: ISA+PCI, APIC: Integrated\n");
1834 break;
1835 case 6:
1836 printf(" bus: EISA+PCI, APIC: Integrated\n");
1837 break;
1838 case 7:
1839 printf(" bus: MCA+PCI, APIC: Integrated\n");
1840 break;
1841 default:
1842 printf(" future type\n");
1843 break;
1844 /* NOTREACHED */
1845 }
1846 #endif /* 0 */
1847
1848 boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
1849 ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1850
1851 /* BSP */
1852 CPU_TO_ID(0) = boot_cpu_id;
1853 ID_TO_CPU(boot_cpu_id) = 0;
1854
1855 /* one and only AP */
1856 CPU_TO_ID(1) = ap_cpu_id;
1857 ID_TO_CPU(ap_cpu_id) = 1;
1858
1859 #if defined(APIC_IO)
1860 /* one and only IO APIC */
1861 io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1862
1863 /*
1864 * sanity check, refer to MP spec section 3.6.6, last paragraph
1865 * necessary as some hardware isn't properly setting up the IO APIC
1866 */
1867 #if defined(REALLY_ANAL_IOAPICID_VALUE)
1868 if (io_apic_id != 2) {
1869 #else
1870 if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1871 #endif /* REALLY_ANAL_IOAPICID_VALUE */
1872 io_apic_set_id(0, 2);
1873 io_apic_id = 2;
1874 }
1875 IO_TO_ID(0) = io_apic_id;
1876 ID_TO_IO(io_apic_id) = 0;
1877 #endif /* APIC_IO */
1878
1879 /* fill out bus entries */
1880 switch (type) {
1881 case 1:
1882 case 2:
1883 case 3:
1884 case 4:
1885 case 5:
1886 case 6:
1887 case 7:
1888 bus_data[0].bus_id = default_data[type - 1][1];
1889 bus_data[0].bus_type = default_data[type - 1][2];
1890 bus_data[1].bus_id = default_data[type - 1][3];
1891 bus_data[1].bus_type = default_data[type - 1][4];
1892 break;
1893
1894 /* case 4: case 7: MCA NOT supported */
1895 default: /* illegal/reserved */
1896 panic("BAD default MP config: %d", type);
1897 /* NOTREACHED */
1898 }
1899
1900 #if defined(APIC_IO)
1901 /* general cases from MP v1.4, table 5-2 */
1902 for (pin = 0; pin < 16; ++pin) {
1903 io_apic_ints[pin].int_type = 0;
1904 io_apic_ints[pin].int_flags = 0x05; /* edge/active-hi */
1905 io_apic_ints[pin].src_bus_id = 0;
1906 io_apic_ints[pin].src_bus_irq = pin; /* IRQ2 caught below */
1907 io_apic_ints[pin].dst_apic_id = io_apic_id;
1908 io_apic_ints[pin].dst_apic_int = pin; /* 1-to-1 */
1909 }
1910
1911 /* special cases from MP v1.4, table 5-2 */
1912 if (type == 2) {
1913 io_apic_ints[2].int_type = 0xff; /* N/C */
1914 io_apic_ints[13].int_type = 0xff; /* N/C */
1915 #if !defined(APIC_MIXED_MODE)
1916 /** FIXME: ??? */
1917 panic("sorry, can't support type 2 default yet");
1918 #endif /* APIC_MIXED_MODE */
1919 }
1920 else
1921 io_apic_ints[2].src_bus_irq = 0; /* ISA IRQ0 is on APIC INT 2 */
1922
1923 if (type == 7)
1924 io_apic_ints[0].int_type = 0xff; /* N/C */
1925 else
1926 io_apic_ints[0].int_type = 3; /* vectored 8259 */
1927 #endif /* APIC_IO */
1928 }
1929
1930
1931 /*
1932 * start each AP in our list
1933 */
1934 static int
1935 start_all_aps(u_int boot_addr)
1936 {
1937 int x, i, pg;
1938 u_char mpbiosreason;
1939 u_long mpbioswarmvec;
1940 struct pcpu *pc;
1941 char *stack;
1942 uintptr_t kptbase;
1943
1944 POSTCODE(START_ALL_APS_POST);
1945
1946 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
1947
1948 /* initialize BSP's local APIC */
1949 apic_initialize();
1950 bsp_apic_ready = 1;
1951
1952 /* install the AP 1st level boot code */
1953 install_ap_tramp(boot_addr);
1954
1955
1956 /* save the current value of the warm-start vector */
1957 mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
1958 #ifndef PC98
1959 outb(CMOS_REG, BIOS_RESET);
1960 mpbiosreason = inb(CMOS_DATA);
1961 #endif
1962
1963 /* set up temporary P==V mapping for AP boot */
1964 /* XXX this is a hack, we should boot the AP on its own stack/PTD */
1965 kptbase = (uintptr_t)(void *)KPTphys;
1966 for (x = 0; x < NKPT; x++)
1967 PTD[x] = (pd_entry_t)(PG_V | PG_RW |
1968 ((kptbase + x * PAGE_SIZE) & PG_FRAME));
1969 invltlb();
1970
1971 /* start each AP */
1972 for (x = 1; x <= mp_naps; ++x) {
1973
1974 /* This is a bit verbose, it will go away soon. */
1975
1976 /* first page of AP's private space */
1977 pg = x * i386_btop(sizeof(struct privatespace));
1978
1979 /* allocate a new private data page */
1980 pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
1981
1982 /* wire it into the private page table page */
1983 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
1984
1985 /* allocate and set up an idle stack data page */
1986 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
1987 for (i = 0; i < KSTACK_PAGES; i++)
1988 SMPpt[pg + 1 + i] = (pt_entry_t)
1989 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1990
1991 /* prime data page for it to use */
1992 pcpu_init(pc, x, sizeof(struct pcpu));
1993
1994 /* setup a vector to our boot code */
1995 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
1996 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
1997 #ifndef PC98
1998 outb(CMOS_REG, BIOS_RESET);
1999 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
2000 #endif
2001
2002 bootSTK = &SMP_prvspace[x].idlekstack[KSTACK_PAGES * PAGE_SIZE];
2003 bootAP = x;
2004
2005 /* attempt to start the Application Processor */
2006 CHECK_INIT(99); /* setup checkpoints */
2007 if (!start_ap(x, boot_addr)) {
2008 printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
2009 CHECK_PRINT("trace"); /* show checkpoints */
2010 /* better panic as the AP may be running loose */
2011 printf("panic y/n? [y] ");
2012 if (cngetc() != 'n')
2013 panic("bye-bye");
2014 }
2015 CHECK_PRINT("trace"); /* show checkpoints */
2016
2017 /* record its version info */
2018 cpu_apic_versions[x] = cpu_apic_versions[0];
2019
2020 all_cpus |= (1 << x); /* record AP in CPU map */
2021 }
2022
2023 /* build our map of 'other' CPUs */
2024 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
2025
2026 /* fill in our (BSP) APIC version */
2027 cpu_apic_versions[0] = lapic.version;
2028
2029 /* restore the warmstart vector */
2030 *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
2031 #ifndef PC98
2032 outb(CMOS_REG, BIOS_RESET);
2033 outb(CMOS_DATA, mpbiosreason);
2034 #endif
2035
2036 /*
2037 * Set up the idle context for the BSP. Similar to above except
2038 * that some was done by locore, some by pmap.c and some is implicit
2039 * because the BSP is cpu#0 and the page is initially zero, and also
2040 * because we can refer to variables by name on the BSP..
2041 */
2042
2043 /* Allocate and setup BSP idle stack */
2044 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
2045 for (i = 0; i < KSTACK_PAGES; i++)
2046 SMPpt[1 + i] = (pt_entry_t)
2047 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
2048
2049 for (x = 0; x < NKPT; x++)
2050 PTD[x] = 0;
2051 pmap_set_opt();
2052
2053 /* number of APs actually started */
2054 return mp_ncpus - 1;
2055 }
2056
2057
2058 /*
2059 * load the 1st level AP boot code into base memory.
2060 */
2061
2062 /* targets for relocation */
2063 extern void bigJump(void);
2064 extern void bootCodeSeg(void);
2065 extern void bootDataSeg(void);
2066 extern void MPentry(void);
2067 extern u_int MP_GDT;
2068 extern u_int mp_gdtbase;
2069
2070 static void
2071 install_ap_tramp(u_int boot_addr)
2072 {
2073 int x;
2074 int size = *(int *) ((u_long) & bootMP_size);
2075 u_char *src = (u_char *) ((u_long) bootMP);
2076 u_char *dst = (u_char *) boot_addr + KERNBASE;
2077 u_int boot_base = (u_int) bootMP;
2078 u_int8_t *dst8;
2079 u_int16_t *dst16;
2080 u_int32_t *dst32;
2081
2082 POSTCODE(INSTALL_AP_TRAMP_POST);
2083
2084 for (x = 0; x < size; ++x)
2085 *dst++ = *src++;
2086
2087 /*
2088 * modify addresses in code we just moved to basemem. unfortunately we
2089 * need fairly detailed info about mpboot.s for this to work. changes
2090 * to mpboot.s might require changes here.
2091 */
2092
2093 /* boot code is located in KERNEL space */
2094 dst = (u_char *) boot_addr + KERNBASE;
2095
2096 /* modify the lgdt arg */
2097 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
2098 *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
2099
2100 /* modify the ljmp target for MPentry() */
2101 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
2102 *dst32 = ((u_int) MPentry - KERNBASE);
2103
2104 /* modify the target for boot code segment */
2105 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
2106 dst8 = (u_int8_t *) (dst16 + 1);
2107 *dst16 = (u_int) boot_addr & 0xffff;
2108 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2109
2110 /* modify the target for boot data segment */
2111 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
2112 dst8 = (u_int8_t *) (dst16 + 1);
2113 *dst16 = (u_int) boot_addr & 0xffff;
2114 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2115 }
2116
2117
2118 /*
2119 * this function starts the AP (application processor) identified
2120 * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
2121 * to accomplish this. This is necessary because of the nuances
2122 * of the different hardware we might encounter. It ain't pretty,
2123 * but it seems to work.
2124 */
2125 static int
2126 start_ap(int logical_cpu, u_int boot_addr)
2127 {
2128 int physical_cpu;
2129 int vector;
2130 int cpus;
2131 u_long icr_lo, icr_hi;
2132
2133 POSTCODE(START_AP_POST);
2134
2135 /* get the PHYSICAL APIC ID# */
2136 physical_cpu = CPU_TO_ID(logical_cpu);
2137
2138 /* calculate the vector */
2139 vector = (boot_addr >> 12) & 0xff;
2140
2141 /* used as a watchpoint to signal AP startup */
2142 cpus = mp_ncpus;
2143
2144 /*
2145 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
2146 * and running the target CPU. OR this INIT IPI might be latched (P5
2147 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
2148 * ignored.
2149 */
2150
2151 /* setup the address for the target AP */
2152 icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
2153 icr_hi |= (physical_cpu << 24);
2154 lapic.icr_hi = icr_hi;
2155
2156 /* do an INIT IPI: assert RESET */
2157 icr_lo = lapic.icr_lo & 0xfff00000;
2158 lapic.icr_lo = icr_lo | 0x0000c500;
2159
2160 /* wait for pending status end */
2161 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2162 /* spin */ ;
2163
2164 /* do an INIT IPI: deassert RESET */
2165 lapic.icr_lo = icr_lo | 0x00008500;
2166
2167 /* wait for pending status end */
2168 u_sleep(10000); /* wait ~10mS */
2169 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2170 /* spin */ ;
2171
2172 /*
2173 * next we do a STARTUP IPI: the previous INIT IPI might still be
2174 * latched, (P5 bug) this 1st STARTUP would then terminate
2175 * immediately, and the previously started INIT IPI would continue. OR
2176 * the previous INIT IPI has already run. and this STARTUP IPI will
2177 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2178 * will run.
2179 */
2180
2181 /* do a STARTUP IPI */
2182 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2183 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2184 /* spin */ ;
2185 u_sleep(200); /* wait ~200uS */
2186
2187 /*
2188 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2189 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2190 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2191 * recognized after hardware RESET or INIT IPI.
2192 */
2193
2194 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2195 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2196 /* spin */ ;
2197 u_sleep(200); /* wait ~200uS */
2198
2199 /* wait for it to start */
2200 set_apic_timer(5000000);/* == 5 seconds */
2201 while (read_apic_timer())
2202 if (mp_ncpus > cpus)
2203 return 1; /* return SUCCESS */
2204
2205 return 0; /* return FAILURE */
2206 }
2207
2208 #if defined(APIC_IO)
2209
2210 #ifdef COUNT_XINVLTLB_HITS
2211 u_int xhits_gbl[MAXCPU];
2212 u_int xhits_pg[MAXCPU];
2213 u_int xhits_rng[MAXCPU];
2214 SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
2215 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
2216 sizeof(xhits_gbl), "IU", "");
2217 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
2218 sizeof(xhits_pg), "IU", "");
2219 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
2220 sizeof(xhits_rng), "IU", "");
2221
2222 u_int ipi_global;
2223 u_int ipi_page;
2224 u_int ipi_range;
2225 u_int ipi_range_size;
2226 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
2227 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
2228 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
2229 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
2230 0, "");
2231
2232 u_int ipi_masked_global;
2233 u_int ipi_masked_page;
2234 u_int ipi_masked_range;
2235 u_int ipi_masked_range_size;
2236 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
2237 &ipi_masked_global, 0, "");
2238 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
2239 &ipi_masked_page, 0, "");
2240 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
2241 &ipi_masked_range, 0, "");
2242 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
2243 &ipi_masked_range_size, 0, "");
2244 #endif
2245
2246 /*
2247 * Flush the TLB on all other CPU's
2248 */
2249 static void
2250 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
2251 {
2252 u_int ncpu;
2253 register_t eflags;
2254
2255 ncpu = mp_ncpus - 1; /* does not shootdown self */
2256 if (ncpu < 1)
2257 return; /* no other cpus */
2258 eflags = read_eflags();
2259 if ((eflags & PSL_I) == 0)
2260 panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
2261 mtx_lock_spin(&smp_tlb_mtx);
2262 smp_tlb_addr1 = addr1;
2263 smp_tlb_addr2 = addr2;
2264 atomic_store_rel_int(&smp_tlb_wait, 0);
2265 ipi_all_but_self(vector);
2266 while (smp_tlb_wait < ncpu)
2267 ia32_pause();
2268 mtx_unlock_spin(&smp_tlb_mtx);
2269 }
2270
2271 /*
2272 * This is about as magic as it gets. fortune(1) has got similar code
2273 * for reversing bits in a word. Who thinks up this stuff??
2274 *
2275 * Yes, it does appear to be consistently faster than:
2276 * while (i = ffs(m)) {
2277 * m >>= i;
2278 * bits++;
2279 * }
2280 * and
2281 * while (lsb = (m & -m)) { // This is magic too
2282 * m &= ~lsb; // or: m ^= lsb
2283 * bits++;
2284 * }
2285 * Both of these latter forms do some very strange things on gcc-3.1 with
2286 * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
2287 * There is probably an SSE or MMX popcnt instruction.
2288 *
2289 * I wonder if this should be in libkern?
2290 *
2291 * XXX Stop the presses! Another one:
2292 * static __inline u_int32_t
2293 * popcnt1(u_int32_t v)
2294 * {
2295 * v -= ((v >> 1) & 0x55555555);
2296 * v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
2297 * v = (v + (v >> 4)) & 0x0F0F0F0F;
2298 * return (v * 0x01010101) >> 24;
2299 * }
2300 * The downside is that it has a multiply. With a pentium3 with
2301 * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
2302 * an imull, and in that case it is faster. In most other cases
2303 * it appears slightly slower.
2304 */
2305 static __inline u_int32_t
2306 popcnt(u_int32_t m)
2307 {
2308
2309 m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
2310 m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
2311 m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
2312 m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
2313 m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
2314 return m;
2315 }
2316
2317 static void
2318 smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
2319 {
2320 int ncpu, othercpus;
2321 register_t eflags;
2322
2323 othercpus = mp_ncpus - 1;
2324 if (mask == (u_int)-1) {
2325 ncpu = othercpus;
2326 if (ncpu < 1)
2327 return;
2328 } else {
2329 /* XXX there should be a pcpu self mask */
2330 mask &= ~(1 << PCPU_GET(cpuid));
2331 if (mask == 0)
2332 return;
2333 ncpu = popcnt(mask);
2334 if (ncpu > othercpus) {
2335 /* XXX this should be a panic offence */
2336 printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
2337 ncpu, othercpus);
2338 ncpu = othercpus;
2339 }
2340 /* XXX should be a panic, implied by mask == 0 above */
2341 if (ncpu < 1)
2342 return;
2343 }
2344 eflags = read_eflags();
2345 if ((eflags & PSL_I) == 0)
2346 panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
2347 mtx_lock_spin(&smp_tlb_mtx);
2348 smp_tlb_addr1 = addr1;
2349 smp_tlb_addr2 = addr2;
2350 atomic_store_rel_int(&smp_tlb_wait, 0);
2351 if (mask == (u_int)-1)
2352 ipi_all_but_self(vector);
2353 else
2354 ipi_selected(mask, vector);
2355 while (smp_tlb_wait < ncpu)
2356 ia32_pause();
2357 mtx_unlock_spin(&smp_tlb_mtx);
2358 }
2359 #endif
2360
2361 void
2362 smp_invltlb(void)
2363 {
2364 #if defined(APIC_IO)
2365 if (smp_started) {
2366 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
2367 #ifdef COUNT_XINVLTLB_HITS
2368 ipi_global++;
2369 #endif
2370 }
2371 #endif /* APIC_IO */
2372 }
2373
2374 void
2375 smp_invlpg(vm_offset_t addr)
2376 {
2377 #if defined(APIC_IO)
2378 if (smp_started) {
2379 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
2380 #ifdef COUNT_XINVLTLB_HITS
2381 ipi_page++;
2382 #endif
2383 }
2384 #endif /* APIC_IO */
2385 }
2386
2387 void
2388 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
2389 {
2390 #if defined(APIC_IO)
2391 if (smp_started) {
2392 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
2393 #ifdef COUNT_XINVLTLB_HITS
2394 ipi_range++;
2395 ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
2396 #endif
2397 }
2398 #endif /* APIC_IO */
2399 }
2400
2401 void
2402 smp_masked_invltlb(u_int mask)
2403 {
2404 #if defined(APIC_IO)
2405 if (smp_started) {
2406 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
2407 #ifdef COUNT_XINVLTLB_HITS
2408 ipi_masked_global++;
2409 #endif
2410 }
2411 #endif /* APIC_IO */
2412 }
2413
2414 void
2415 smp_masked_invlpg(u_int mask, vm_offset_t addr)
2416 {
2417 #if defined(APIC_IO)
2418 if (smp_started) {
2419 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
2420 #ifdef COUNT_XINVLTLB_HITS
2421 ipi_masked_page++;
2422 #endif
2423 }
2424 #endif /* APIC_IO */
2425 }
2426
2427 void
2428 smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
2429 {
2430 #if defined(APIC_IO)
2431 if (smp_started) {
2432 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
2433 #ifdef COUNT_XINVLTLB_HITS
2434 ipi_masked_range++;
2435 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
2436 #endif
2437 }
2438 #endif /* APIC_IO */
2439 }
2440
2441
2442 /*
2443 * This is called once the rest of the system is up and running and we're
2444 * ready to let the AP's out of the pen.
2445 */
2446 extern void enable_sse(void);
2447
2448 void
2449 ap_init(void)
2450 {
2451 u_int apic_id;
2452
2453 /* spin until all the AP's are ready */
2454 while (!aps_ready)
2455 ia32_pause();
2456
2457 /* BSP may have changed PTD while we were waiting */
2458 invltlb();
2459
2460 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
2461 lidt(&r_idt);
2462 #endif
2463
2464 /* set up CPU registers and state */
2465 cpu_setregs();
2466
2467 /* set up FPU state on the AP */
2468 npxinit(__INITIAL_NPXCW__);
2469
2470 /* set up SSE registers */
2471 enable_sse();
2472
2473 /* A quick check from sanity claus */
2474 apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2475 if (PCPU_GET(cpuid) != apic_id) {
2476 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
2477 printf("SMP: apic_id = %d\n", apic_id);
2478 printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
2479 panic("cpuid mismatch! boom!!");
2480 }
2481
2482 /* Init local apic for irq's */
2483 apic_initialize();
2484
2485 /* Set memory range attributes for this CPU to match the BSP */
2486 mem_range_AP_init();
2487
2488 mtx_lock_spin(&ap_boot_mtx);
2489
2490 smp_cpus++;
2491
2492 CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
2493 printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
2494
2495 /* Build our map of 'other' CPUs. */
2496 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
2497
2498 if (bootverbose)
2499 apic_dump("ap_init()");
2500
2501 if (smp_cpus == mp_ncpus) {
2502 /* enable IPI's, tlb shootdown, freezes etc */
2503 atomic_store_rel_int(&smp_started, 1);
2504 smp_active = 1; /* historic */
2505 }
2506
2507 mtx_unlock_spin(&ap_boot_mtx);
2508
2509 /* wait until all the AP's are up */
2510 while (smp_started == 0)
2511 ia32_pause();
2512
2513 /* ok, now grab sched_lock and enter the scheduler */
2514 mtx_lock_spin(&sched_lock);
2515
2516 binuptime(PCPU_PTR(switchtime));
2517 PCPU_SET(switchticks, ticks);
2518
2519 cpu_throw(); /* doesn't return */
2520
2521 panic("scheduler returned us to %s", __func__);
2522 }
2523
2524 /*
2525 * For statclock, we send an IPI to all CPU's to have them call this
2526 * function.
2527 *
2528 * WARNING! unpend() will call statclock_process() directly and skip this
2529 * routine.
2530 */
2531 void
2532 forwarded_statclock(struct trapframe frame)
2533 {
2534
2535 mtx_lock_spin(&sched_lock);
2536 statclock_process(curthread->td_kse, TRAPF_PC(&frame),
2537 TRAPF_USERMODE(&frame));
2538 mtx_unlock_spin(&sched_lock);
2539 }
2540
2541 void
2542 forward_statclock(void)
2543 {
2544 int map;
2545
2546 CTR0(KTR_SMP, "forward_statclock");
2547
2548 if (!smp_started || cold || panicstr)
2549 return;
2550
2551 map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2552 if (map != 0)
2553 ipi_selected(map, IPI_STATCLOCK);
2554 }
2555
2556 /*
2557 * For each hardclock(), we send an IPI to all other CPU's to have them
2558 * execute this function. It would be nice to reduce contention on
2559 * sched_lock if we could simply peek at the CPU to determine the user/kernel
2560 * state and call hardclock_process() on the CPU receiving the clock interrupt
2561 * and then just use a simple IPI to handle any ast's if needed.
2562 *
2563 * WARNING! unpend() will call hardclock_process() directly and skip this
2564 * routine.
2565 */
2566 void
2567 forwarded_hardclock(struct trapframe frame)
2568 {
2569
2570 mtx_lock_spin(&sched_lock);
2571 hardclock_process(curthread, TRAPF_USERMODE(&frame));
2572 mtx_unlock_spin(&sched_lock);
2573 }
2574
2575 void
2576 forward_hardclock(void)
2577 {
2578 u_int map;
2579
2580 CTR0(KTR_SMP, "forward_hardclock");
2581
2582 if (!smp_started || cold || panicstr)
2583 return;
2584
2585 map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2586 if (map != 0)
2587 ipi_selected(map, IPI_HARDCLOCK);
2588 }
2589
2590 #ifdef APIC_INTR_REORDER
2591 /*
2592 * Maintain mapping from softintr vector to isr bit in local apic.
2593 */
2594 void
2595 set_lapic_isrloc(int intr, int vector)
2596 {
2597 if (intr < 0 || intr > 32)
2598 panic("set_apic_isrloc: bad intr argument: %d",intr);
2599 if (vector < ICU_OFFSET || vector > 255)
2600 panic("set_apic_isrloc: bad vector argument: %d",vector);
2601 apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
2602 apic_isrbit_location[intr].bit = (1<<(vector & 31));
2603 }
2604 #endif
2605
2606 /*
2607 * send an IPI to a set of cpus.
2608 */
2609 void
2610 ipi_selected(u_int32_t cpus, u_int ipi)
2611 {
2612
2613 CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
2614 selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
2615 }
2616
2617 /*
2618 * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
2619 */
2620 void
2621 ipi_all(u_int ipi)
2622 {
2623
2624 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
2625 apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED);
2626 }
2627
2628 /*
2629 * send an IPI to all CPUs EXCEPT myself
2630 */
2631 void
2632 ipi_all_but_self(u_int ipi)
2633 {
2634
2635 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
2636 apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED);
2637 }
2638
2639 /*
2640 * send an IPI to myself
2641 */
2642 void
2643 ipi_self(u_int ipi)
2644 {
2645
2646 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
2647 apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED);
2648 }
2649
2650 static void
2651 release_aps(void *dummy __unused)
2652 {
2653
2654 mtx_lock_spin(&sched_lock);
2655 atomic_store_rel_int(&aps_ready, 1);
2656 while (smp_started == 0)
2657 ia32_pause();
2658 mtx_unlock_spin(&sched_lock);
2659 }
2660
2661 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
Cache object: cd644563bcc5fe4818be3fbf1aa55e2e
|