1 /*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD$
26 */
27
28 #include "opt_smp.h"
29 #include "opt_vm86.h"
30 #include "opt_cpu.h"
31 #include "opt_user_ldt.h"
32
33 #ifdef SMP
34 #include <machine/smptests.h>
35 #else
36 #error
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
43 #include <sys/memrange.h>
44 #include <sys/proc.h>
45 #include <sys/sysctl.h>
46 #ifdef BETTER_CLOCK
47 #include <sys/dkstat.h>
48 #endif
49
50 #include <vm/vm.h>
51 #include <vm/vm_param.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_extern.h>
55 #ifdef BETTER_CLOCK
56 #include <sys/lock.h>
57 #include <vm/vm_map.h>
58 #include <sys/user.h>
59 #ifdef GPROF
60 #include <sys/gmon.h>
61 #endif
62 #endif
63
64 #include <machine/smp.h>
65 #include <machine/apic.h>
66 #include <machine/atomic.h>
67 #include <machine/cpufunc.h>
68 #include <machine/mpapic.h>
69 #include <machine/psl.h>
70 #include <machine/segments.h>
71 #include <machine/smptests.h> /** TEST_DEFAULT_CONFIG, TEST_TEST1 */
72 #include <machine/tss.h>
73 #include <machine/specialreg.h>
74 #include <machine/cputypes.h>
75 #include <machine/globaldata.h>
76
77 #include <i386/i386/cons.h> /* cngetc() */
78
79 #if defined(APIC_IO)
80 #include <machine/md_var.h> /* setidt() */
81 #include <i386/isa/icu.h> /* IPIs */
82 #include <i386/isa/intr_machdep.h> /* IPIs */
83 #endif /* APIC_IO */
84
85 #if defined(TEST_DEFAULT_CONFIG)
86 #define MPFPS_MPFB1 TEST_DEFAULT_CONFIG
87 #else
88 #define MPFPS_MPFB1 mpfps->mpfb1
89 #endif /* TEST_DEFAULT_CONFIG */
90
91 #define WARMBOOT_TARGET 0
92 #define WARMBOOT_OFF (KERNBASE + 0x0467)
93 #define WARMBOOT_SEG (KERNBASE + 0x0469)
94
95 #ifdef PC98
96 #define BIOS_BASE (0xe8000)
97 #define BIOS_SIZE (0x18000)
98 #else
99 #define BIOS_BASE (0xf0000)
100 #define BIOS_SIZE (0x10000)
101 #endif
102 #define BIOS_COUNT (BIOS_SIZE/4)
103
104 #define CMOS_REG (0x70)
105 #define CMOS_DATA (0x71)
106 #define BIOS_RESET (0x0f)
107 #define BIOS_WARM (0x0a)
108
109 #define PROCENTRY_FLAG_EN 0x01
110 #define PROCENTRY_FLAG_BP 0x02
111 #define IOAPICENTRY_FLAG_EN 0x01
112
113
114 /* MP Floating Pointer Structure */
115 typedef struct MPFPS {
116 char signature[4];
117 void *pap;
118 u_char length;
119 u_char spec_rev;
120 u_char checksum;
121 u_char mpfb1;
122 u_char mpfb2;
123 u_char mpfb3;
124 u_char mpfb4;
125 u_char mpfb5;
126 } *mpfps_t;
127
128 /* MP Configuration Table Header */
129 typedef struct MPCTH {
130 char signature[4];
131 u_short base_table_length;
132 u_char spec_rev;
133 u_char checksum;
134 u_char oem_id[8];
135 u_char product_id[12];
136 void *oem_table_pointer;
137 u_short oem_table_size;
138 u_short entry_count;
139 void *apic_address;
140 u_short extended_table_length;
141 u_char extended_table_checksum;
142 u_char reserved;
143 } *mpcth_t;
144
145
146 typedef struct PROCENTRY {
147 u_char type;
148 u_char apic_id;
149 u_char apic_version;
150 u_char cpu_flags;
151 u_long cpu_signature;
152 u_long feature_flags;
153 u_long reserved1;
154 u_long reserved2;
155 } *proc_entry_ptr;
156
157 typedef struct BUSENTRY {
158 u_char type;
159 u_char bus_id;
160 char bus_type[6];
161 } *bus_entry_ptr;
162
163 typedef struct IOAPICENTRY {
164 u_char type;
165 u_char apic_id;
166 u_char apic_version;
167 u_char apic_flags;
168 void *apic_address;
169 } *io_apic_entry_ptr;
170
171 typedef struct INTENTRY {
172 u_char type;
173 u_char int_type;
174 u_short int_flags;
175 u_char src_bus_id;
176 u_char src_bus_irq;
177 u_char dst_apic_id;
178 u_char dst_apic_int;
179 } *int_entry_ptr;
180
181 /* descriptions of MP basetable entries */
182 typedef struct BASETABLE_ENTRY {
183 u_char type;
184 u_char length;
185 char name[16];
186 } basetable_entry;
187
188 /*
189 * this code MUST be enabled here and in mpboot.s.
190 * it follows the very early stages of AP boot by placing values in CMOS ram.
191 * it NORMALLY will never be needed and thus the primitive method for enabling.
192 *
193 #define CHECK_POINTS
194 */
195
196 #if defined(CHECK_POINTS) && !defined(PC98)
197 #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
198 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
199
200 #define CHECK_INIT(D); \
201 CHECK_WRITE(0x34, (D)); \
202 CHECK_WRITE(0x35, (D)); \
203 CHECK_WRITE(0x36, (D)); \
204 CHECK_WRITE(0x37, (D)); \
205 CHECK_WRITE(0x38, (D)); \
206 CHECK_WRITE(0x39, (D));
207
208 #define CHECK_PRINT(S); \
209 printf("%s: %d, %d, %d, %d, %d, %d\n", \
210 (S), \
211 CHECK_READ(0x34), \
212 CHECK_READ(0x35), \
213 CHECK_READ(0x36), \
214 CHECK_READ(0x37), \
215 CHECK_READ(0x38), \
216 CHECK_READ(0x39));
217
218 #else /* CHECK_POINTS */
219
220 #define CHECK_INIT(D)
221 #define CHECK_PRINT(S)
222
223 #endif /* CHECK_POINTS */
224
225 /*
226 * Values to send to the POST hardware.
227 */
228 #define MP_BOOTADDRESS_POST 0x10
229 #define MP_PROBE_POST 0x11
230 #define MPTABLE_PASS1_POST 0x12
231
232 #define MP_START_POST 0x13
233 #define MP_ENABLE_POST 0x14
234 #define MPTABLE_PASS2_POST 0x15
235
236 #define START_ALL_APS_POST 0x16
237 #define INSTALL_AP_TRAMP_POST 0x17
238 #define START_AP_POST 0x18
239
240 #define MP_ANNOUNCE_POST 0x19
241
242
243 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
244 int current_postcode;
245
246 /** XXX FIXME: what system files declare these??? */
247 extern struct region_descriptor r_gdt, r_idt;
248
249 int bsp_apic_ready = 0; /* flags useability of BSP apic */
250 int mp_ncpus; /* # of CPUs, including BSP */
251 int mp_naps; /* # of Applications processors */
252 int mp_nbusses; /* # of busses */
253 int mp_napics; /* # of IO APICs */
254 int boot_cpu_id; /* designated BSP */
255 vm_offset_t cpu_apic_address;
256 vm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */
257 extern int nkpt;
258
259 u_int32_t cpu_apic_versions[NCPU];
260 u_int32_t io_apic_versions[NAPIC];
261
262 #ifdef APIC_INTR_DIAGNOSTIC
263 int apic_itrace_enter[32];
264 int apic_itrace_tryisrlock[32];
265 int apic_itrace_gotisrlock[32];
266 int apic_itrace_active[32];
267 int apic_itrace_masked[32];
268 int apic_itrace_noisrlock[32];
269 int apic_itrace_masked2[32];
270 int apic_itrace_unmask[32];
271 int apic_itrace_noforward[32];
272 int apic_itrace_leave[32];
273 int apic_itrace_enter2[32];
274 int apic_itrace_doreti[32];
275 int apic_itrace_splz[32];
276 int apic_itrace_eoi[32];
277 #ifdef APIC_INTR_DIAGNOSTIC_IRQ
278 unsigned short apic_itrace_debugbuffer[32768];
279 int apic_itrace_debugbuffer_idx;
280 struct simplelock apic_itrace_debuglock;
281 #endif
282 #endif
283
284 #ifdef APIC_INTR_REORDER
285 struct {
286 volatile int *location;
287 int bit;
288 } apic_isrbit_location[32];
289 #endif
290
291 struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE];
292
293 /*
294 * APIC ID logical/physical mapping structures.
295 * We oversize these to simplify boot-time config.
296 */
297 int cpu_num_to_apic_id[NAPICID];
298 int io_num_to_apic_id[NAPICID];
299 int apic_id_to_logical[NAPICID];
300
301
302 /* Bitmap of all available CPUs */
303 u_int all_cpus;
304
305 /* AP uses this PTD during bootstrap. Do not staticize. */
306 pd_entry_t *bootPTD;
307
308 /* Hotwire a 0->4MB V==P mapping */
309 extern pt_entry_t *KPTphys;
310
311 /* Virtual address of per-cpu common_tss */
312 extern struct i386tss common_tss;
313 #ifdef VM86
314 extern struct segment_descriptor common_tssd;
315 extern u_int private_tss; /* flag indicating private tss */
316 extern u_int my_tr;
317 #endif /* VM86 */
318
319 /* IdlePTD per cpu */
320 pd_entry_t *IdlePTDS[NCPU];
321
322 /* "my" private page table page, for BSP init */
323 extern pt_entry_t SMP_prvpt[];
324
325 /* Private page pointer to curcpu's PTD, used during BSP init */
326 extern pd_entry_t *my_idlePTD;
327
328 struct pcb stoppcbs[NCPU];
329
330 int smp_started; /* has the system started? */
331
332 /*
333 * Local data and functions.
334 */
335
336 static int mp_capable;
337 static u_int boot_address;
338 static u_int base_memory;
339
340 static int picmode; /* 0: virtual wire mode, 1: PIC mode */
341 static mpfps_t mpfps;
342 static int search_for_sig(u_int32_t target, int count);
343 static void mp_enable(u_int boot_addr);
344
345 static int mptable_pass1(void);
346 static int mptable_pass2(void);
347 static void default_mp_table(int type);
348 static void fix_mp_table(void);
349 static void setup_apic_irq_mapping(void);
350 static void init_locks(void);
351 static int start_all_aps(u_int boot_addr);
352 static void install_ap_tramp(u_int boot_addr);
353 static int start_ap(int logicalCpu, u_int boot_addr);
354
355 /*
356 * Calculate usable address in base memory for AP trampoline code.
357 */
358 u_int
359 mp_bootaddress(u_int basemem)
360 {
361 POSTCODE(MP_BOOTADDRESS_POST);
362
363 base_memory = basemem * 1024; /* convert to bytes */
364
365 boot_address = base_memory & ~0xfff; /* round down to 4k boundary */
366 if ((base_memory - boot_address) < bootMP_size)
367 boot_address -= 4096; /* not enough, lower by 4k */
368
369 return boot_address;
370 }
371
372
373 /*
374 * Look for an Intel MP spec table (ie, SMP capable hardware).
375 */
376 int
377 mp_probe(void)
378 {
379 int x;
380 u_long segment;
381 u_int32_t target;
382
383 POSTCODE(MP_PROBE_POST);
384
385 /* see if EBDA exists */
386 if (segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) {
387 /* search first 1K of EBDA */
388 target = (u_int32_t) (segment << 4);
389 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
390 goto found;
391 } else {
392 /* last 1K of base memory, effective 'top of base' passed in */
393 target = (u_int32_t) (base_memory - 0x400);
394 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
395 goto found;
396 }
397
398 /* search the BIOS */
399 target = (u_int32_t) BIOS_BASE;
400 if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
401 goto found;
402
403 /* nothing found */
404 mpfps = (mpfps_t)0;
405 mp_capable = 0;
406 return 0;
407
408 found:
409 /* calculate needed resources */
410 mpfps = (mpfps_t)x;
411 if (mptable_pass1())
412 panic("you must reconfigure your kernel");
413
414 /* flag fact that we are running multiple processors */
415 mp_capable = 1;
416 return 1;
417 }
418
419
420 /*
421 * Startup the SMP processors.
422 */
423 void
424 mp_start(void)
425 {
426 POSTCODE(MP_START_POST);
427
428 /* look for MP capable motherboard */
429 if (mp_capable)
430 mp_enable(boot_address);
431 else
432 panic("MP hardware not found!");
433 }
434
435
436 /*
437 * Print various information about the SMP system hardware and setup.
438 */
439 void
440 mp_announce(void)
441 {
442 int x;
443
444 POSTCODE(MP_ANNOUNCE_POST);
445
446 printf("FreeBSD/SMP: Multiprocessor motherboard\n");
447 printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
448 printf(", version: 0x%08x", cpu_apic_versions[0]);
449 printf(", at 0x%08x\n", cpu_apic_address);
450 for (x = 1; x <= mp_naps; ++x) {
451 printf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x));
452 printf(", version: 0x%08x", cpu_apic_versions[x]);
453 printf(", at 0x%08x\n", cpu_apic_address);
454 }
455
456 #if defined(APIC_IO)
457 for (x = 0; x < mp_napics; ++x) {
458 printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
459 printf(", version: 0x%08x", io_apic_versions[x]);
460 printf(", at 0x%08x\n", io_apic_address[x]);
461 }
462 #else
463 printf(" Warning: APIC I/O disabled\n");
464 #endif /* APIC_IO */
465 }
466
467 /*
468 * AP cpu's call this to sync up protected mode.
469 */
470 void
471 init_secondary(void)
472 {
473 int gsel_tss;
474 #ifndef VM86
475 u_int my_tr;
476 #endif
477
478 r_gdt.rd_limit = sizeof(gdt[0]) * (NGDT + NCPU) - 1;
479 r_gdt.rd_base = (int) gdt;
480 lgdt(&r_gdt); /* does magic intra-segment return */
481 lidt(&r_idt);
482 lldt(_default_ldt);
483 #ifdef USER_LDT
484 currentldt = _default_ldt;
485 #endif
486
487 my_tr = NGDT + cpuid;
488 gsel_tss = GSEL(my_tr, SEL_KPL);
489 gdt[my_tr].sd.sd_type = SDT_SYS386TSS;
490 common_tss.tss_esp0 = 0; /* not used until after switch */
491 common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
492 common_tss.tss_ioopt = (sizeof common_tss) << 16;
493 #ifdef VM86
494 common_tssd = gdt[my_tr].sd;
495 private_tss = 0;
496 #endif /* VM86 */
497 ltr(gsel_tss);
498
499 load_cr0(0x8005003b); /* XXX! */
500
501 PTD[0] = 0;
502 pmap_set_opt((unsigned *)PTD);
503
504 invltlb();
505 }
506
507
508 #if defined(APIC_IO)
509 /*
510 * Final configuration of the BSP's local APIC:
511 * - disable 'pic mode'.
512 * - disable 'virtual wire mode'.
513 * - enable NMI.
514 */
515 void
516 bsp_apic_configure(void)
517 {
518 u_char byte;
519 u_int32_t temp;
520
521 /* leave 'pic mode' if necessary */
522 if (picmode) {
523 outb(0x22, 0x70); /* select IMCR */
524 byte = inb(0x23); /* current contents */
525 byte |= 0x01; /* mask external INTR */
526 outb(0x23, byte); /* disconnect 8259s/NMI */
527 }
528
529 /* mask lint0 (the 8259 'virtual wire' connection) */
530 temp = lapic.lvt_lint0;
531 temp |= APIC_LVT_M; /* set the mask */
532 lapic.lvt_lint0 = temp;
533
534 /* setup lint1 to handle NMI */
535 temp = lapic.lvt_lint1;
536 temp &= ~APIC_LVT_M; /* clear the mask */
537 lapic.lvt_lint1 = temp;
538
539 if (bootverbose)
540 apic_dump("bsp_apic_configure()");
541 }
542 #endif /* APIC_IO */
543
544
545 /*******************************************************************
546 * local functions and data
547 */
548
549 /*
550 * start the SMP system
551 */
552 static void
553 mp_enable(u_int boot_addr)
554 {
555 int x;
556 #if defined(APIC_IO)
557 int apic;
558 u_int ux;
559 #endif /* APIC_IO */
560
561 POSTCODE(MP_ENABLE_POST);
562
563 /* turn on 4MB of V == P addressing so we can get to MP table */
564 *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
565 invltlb();
566
567 /* examine the MP table for needed info, uses physical addresses */
568 x = mptable_pass2();
569
570 *(int *)PTD = 0;
571 invltlb();
572
573 /* can't process default configs till the CPU APIC is pmapped */
574 if (x)
575 default_mp_table(x);
576
577 /* post scan cleanup */
578 fix_mp_table();
579 setup_apic_irq_mapping();
580
581 #if defined(APIC_IO)
582
583 /* fill the LOGICAL io_apic_versions table */
584 for (apic = 0; apic < mp_napics; ++apic) {
585 ux = io_apic_read(apic, IOAPIC_VER);
586 io_apic_versions[apic] = ux;
587 }
588
589 /* program each IO APIC in the system */
590 for (apic = 0; apic < mp_napics; ++apic)
591 if (io_apic_setup(apic) < 0)
592 panic("IO APIC setup failure");
593
594 /* install a 'Spurious INTerrupt' vector */
595 setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
596 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
597
598 /* install an inter-CPU IPI for TLB invalidation */
599 setidt(XINVLTLB_OFFSET, Xinvltlb,
600 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
601
602 #ifdef BETTER_CLOCK
603 /* install an inter-CPU IPI for reading processor state */
604 setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate,
605 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
606 #endif
607
608 /* install an inter-CPU IPI for all-CPU rendezvous */
609 setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
610 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
611
612 /* install an inter-CPU IPI for forcing an additional software trap */
613 setidt(XCPUAST_OFFSET, Xcpuast,
614 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
615
616 /* install an inter-CPU IPI for interrupt forwarding */
617 setidt(XFORWARD_IRQ_OFFSET, Xforward_irq,
618 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
619
620 /* install an inter-CPU IPI for CPU stop/restart */
621 setidt(XCPUSTOP_OFFSET, Xcpustop,
622 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
623
624 #if defined(TEST_TEST1)
625 /* install a "fake hardware INTerrupt" vector */
626 setidt(XTEST1_OFFSET, Xtest1,
627 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
628 #endif /** TEST_TEST1 */
629
630 #endif /* APIC_IO */
631
632 /* initialize all SMP locks */
633 init_locks();
634
635 /* start each Application Processor */
636 start_all_aps(boot_addr);
637
638 /*
639 * The init process might be started on a different CPU now,
640 * and the boot CPU might not call prepare_usermode to get
641 * cr0 correctly configured. Thus we initialize cr0 here.
642 */
643 load_cr0(rcr0() | CR0_WP | CR0_AM);
644 }
645
646
647 /*
648 * look for the MP spec signature
649 */
650
651 /* string defined by the Intel MP Spec as identifying the MP table */
652 #define MP_SIG 0x5f504d5f /* _MP_ */
653 #define NEXT(X) ((X) += 4)
654 static int
655 search_for_sig(u_int32_t target, int count)
656 {
657 int x;
658 u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
659
660 for (x = 0; x < count; NEXT(x))
661 if (addr[x] == MP_SIG)
662 /* make array index a byte index */
663 return (target + (x * sizeof(u_int32_t)));
664
665 return -1;
666 }
667
668
669 static basetable_entry basetable_entry_types[] =
670 {
671 {0, 20, "Processor"},
672 {1, 8, "Bus"},
673 {2, 8, "I/O APIC"},
674 {3, 8, "I/O INT"},
675 {4, 8, "Local INT"}
676 };
677
678 typedef struct BUSDATA {
679 u_char bus_id;
680 enum busTypes bus_type;
681 } bus_datum;
682
683 typedef struct INTDATA {
684 u_char int_type;
685 u_short int_flags;
686 u_char src_bus_id;
687 u_char src_bus_irq;
688 u_char dst_apic_id;
689 u_char dst_apic_int;
690 u_char int_vector;
691 } io_int, local_int;
692
693 typedef struct BUSTYPENAME {
694 u_char type;
695 char name[7];
696 } bus_type_name;
697
698 static bus_type_name bus_type_table[] =
699 {
700 {CBUS, "CBUS"},
701 {CBUSII, "CBUSII"},
702 {EISA, "EISA"},
703 {UNKNOWN_BUSTYPE, "---"},
704 {UNKNOWN_BUSTYPE, "---"},
705 {ISA, "ISA"},
706 {UNKNOWN_BUSTYPE, "---"},
707 {UNKNOWN_BUSTYPE, "---"},
708 {UNKNOWN_BUSTYPE, "---"},
709 {UNKNOWN_BUSTYPE, "---"},
710 {UNKNOWN_BUSTYPE, "---"},
711 {UNKNOWN_BUSTYPE, "---"},
712 {PCI, "PCI"},
713 {UNKNOWN_BUSTYPE, "---"},
714 {UNKNOWN_BUSTYPE, "---"},
715 {UNKNOWN_BUSTYPE, "---"},
716 {UNKNOWN_BUSTYPE, "---"},
717 {XPRESS, "XPRESS"},
718 {UNKNOWN_BUSTYPE, "---"}
719 };
720 /* from MP spec v1.4, table 5-1 */
721 static int default_data[7][5] =
722 {
723 /* nbus, id0, type0, id1, type1 */
724 {1, 0, ISA, 255, 255},
725 {1, 0, EISA, 255, 255},
726 {1, 0, EISA, 255, 255},
727 {0, 255, 255, 255, 255},/* MCA not supported */
728 {2, 0, ISA, 1, PCI},
729 {2, 0, EISA, 1, PCI},
730 {0, 255, 255, 255, 255} /* MCA not supported */
731 };
732
733
734 /* the bus data */
735 static bus_datum bus_data[NBUS];
736
737 /* the IO INT data, one entry per possible APIC INTerrupt */
738 static io_int io_apic_ints[NINTR];
739
740 static int nintrs;
741
742 static int processor_entry __P((proc_entry_ptr entry, int cpu));
743 static int bus_entry __P((bus_entry_ptr entry, int bus));
744 static int io_apic_entry __P((io_apic_entry_ptr entry, int apic));
745 static int int_entry __P((int_entry_ptr entry, int intr));
746 static int lookup_bus_type __P((char *name));
747
748
749 /*
750 * 1st pass on motherboard's Intel MP specification table.
751 *
752 * initializes:
753 * mp_ncpus = 1
754 *
755 * determines:
756 * cpu_apic_address (common to all CPUs)
757 * io_apic_address[N]
758 * mp_naps
759 * mp_nbusses
760 * mp_napics
761 * nintrs
762 */
763 static int
764 mptable_pass1(void)
765 {
766 int x;
767 mpcth_t cth;
768 int totalSize;
769 void* position;
770 int count;
771 int type;
772 int mustpanic;
773
774 POSTCODE(MPTABLE_PASS1_POST);
775
776 mustpanic = 0;
777
778 /* clear various tables */
779 for (x = 0; x < NAPICID; ++x) {
780 io_apic_address[x] = ~0; /* IO APIC address table */
781 }
782
783 /* init everything to empty */
784 mp_naps = 0;
785 mp_nbusses = 0;
786 mp_napics = 0;
787 nintrs = 0;
788
789 /* check for use of 'default' configuration */
790 if (MPFPS_MPFB1 != 0) {
791 /* use default addresses */
792 cpu_apic_address = DEFAULT_APIC_BASE;
793 io_apic_address[0] = DEFAULT_IO_APIC_BASE;
794
795 /* fill in with defaults */
796 mp_naps = 2; /* includes BSP */
797 mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
798 #if defined(APIC_IO)
799 mp_napics = 1;
800 nintrs = 16;
801 #endif /* APIC_IO */
802 }
803 else {
804 if ((cth = mpfps->pap) == 0)
805 panic("MP Configuration Table Header MISSING!");
806
807 cpu_apic_address = (vm_offset_t) cth->apic_address;
808
809 /* walk the table, recording info of interest */
810 totalSize = cth->base_table_length - sizeof(struct MPCTH);
811 position = (u_char *) cth + sizeof(struct MPCTH);
812 count = cth->entry_count;
813
814 while (count--) {
815 switch (type = *(u_char *) position) {
816 case 0: /* processor_entry */
817 if (((proc_entry_ptr)position)->cpu_flags
818 & PROCENTRY_FLAG_EN)
819 ++mp_naps;
820 break;
821 case 1: /* bus_entry */
822 ++mp_nbusses;
823 break;
824 case 2: /* io_apic_entry */
825 if (((io_apic_entry_ptr)position)->apic_flags
826 & IOAPICENTRY_FLAG_EN)
827 io_apic_address[mp_napics++] =
828 (vm_offset_t)((io_apic_entry_ptr)
829 position)->apic_address;
830 break;
831 case 3: /* int_entry */
832 ++nintrs;
833 break;
834 case 4: /* int_entry */
835 break;
836 default:
837 panic("mpfps Base Table HOSED!");
838 /* NOTREACHED */
839 }
840
841 totalSize -= basetable_entry_types[type].length;
842 (u_char*)position += basetable_entry_types[type].length;
843 }
844 }
845
846 /* qualify the numbers */
847 if (mp_naps > NCPU)
848 #if 0 /* XXX FIXME: kern/4255 */
849 printf("Warning: only using %d of %d available CPUs!\n",
850 NCPU, mp_naps);
851 #else
852 {
853 printf("NCPU cannot be different than actual CPU count.\n");
854 printf(" add 'options NCPU=%d' to your kernel config file,\n",
855 mp_naps);
856 printf(" then rerun config & rebuild your SMP kernel\n");
857 mustpanic = 1;
858 }
859 #endif /* XXX FIXME: kern/4255 */
860 if (mp_nbusses > NBUS) {
861 printf("found %d busses, increase NBUS\n", mp_nbusses);
862 mustpanic = 1;
863 }
864 if (mp_napics > NAPIC) {
865 printf("found %d apics, increase NAPIC\n", mp_napics);
866 mustpanic = 1;
867 }
868 if (nintrs > NINTR) {
869 printf("found %d intrs, increase NINTR\n", nintrs);
870 mustpanic = 1;
871 }
872
873 /*
874 * Count the BSP.
875 * This is also used as a counter while starting the APs.
876 */
877 mp_ncpus = 1;
878
879 --mp_naps; /* subtract the BSP */
880
881 return mustpanic;
882 }
883
884
885 /*
886 * 2nd pass on motherboard's Intel MP specification table.
887 *
888 * sets:
889 * boot_cpu_id
890 * ID_TO_IO(N), phy APIC ID to log CPU/IO table
891 * CPU_TO_ID(N), logical CPU to APIC ID table
892 * IO_TO_ID(N), logical IO to APIC ID table
893 * bus_data[N]
894 * io_apic_ints[N]
895 */
896 static int
897 mptable_pass2(void)
898 {
899 int x;
900 mpcth_t cth;
901 int totalSize;
902 void* position;
903 int count;
904 int type;
905 int apic, bus, cpu, intr;
906
907 POSTCODE(MPTABLE_PASS2_POST);
908
909 /* clear various tables */
910 for (x = 0; x < NAPICID; ++x) {
911 ID_TO_IO(x) = -1; /* phy APIC ID to log CPU/IO table */
912 CPU_TO_ID(x) = -1; /* logical CPU to APIC ID table */
913 IO_TO_ID(x) = -1; /* logical IO to APIC ID table */
914 }
915
916 /* clear bus data table */
917 for (x = 0; x < NBUS; ++x)
918 bus_data[x].bus_id = 0xff;
919
920 /* clear IO APIC INT table */
921 for (x = 0; x < NINTR; ++x) {
922 io_apic_ints[x].int_type = 0xff;
923 io_apic_ints[x].int_vector = 0xff;
924 }
925
926 /* setup the cpu/apic mapping arrays */
927 boot_cpu_id = -1;
928
929 /* record whether PIC or virtual-wire mode */
930 picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
931
932 /* check for use of 'default' configuration */
933 if (MPFPS_MPFB1 != 0)
934 return MPFPS_MPFB1; /* return default configuration type */
935
936 if ((cth = mpfps->pap) == 0)
937 panic("MP Configuration Table Header MISSING!");
938
939 /* walk the table, recording info of interest */
940 totalSize = cth->base_table_length - sizeof(struct MPCTH);
941 position = (u_char *) cth + sizeof(struct MPCTH);
942 count = cth->entry_count;
943 apic = bus = intr = 0;
944 cpu = 1; /* pre-count the BSP */
945
946 while (count--) {
947 switch (type = *(u_char *) position) {
948 case 0:
949 if (processor_entry(position, cpu))
950 ++cpu;
951 break;
952 case 1:
953 if (bus_entry(position, bus))
954 ++bus;
955 break;
956 case 2:
957 if (io_apic_entry(position, apic))
958 ++apic;
959 break;
960 case 3:
961 if (int_entry(position, intr))
962 ++intr;
963 break;
964 case 4:
965 /* int_entry(position); */
966 break;
967 default:
968 panic("mpfps Base Table HOSED!");
969 /* NOTREACHED */
970 }
971
972 totalSize -= basetable_entry_types[type].length;
973 (u_char *) position += basetable_entry_types[type].length;
974 }
975
976 if (boot_cpu_id == -1)
977 panic("NO BSP found!");
978
979 /* report fact that its NOT a default configuration */
980 return 0;
981 }
982
983
984 static void
985 assign_apic_irq(int apic, int intpin, int irq)
986 {
987 int x;
988
989 if (int_to_apicintpin[irq].ioapic != -1)
990 panic("assign_apic_irq: inconsistent table");
991
992 int_to_apicintpin[irq].ioapic = apic;
993 int_to_apicintpin[irq].int_pin = intpin;
994 int_to_apicintpin[irq].apic_address = ioapic[apic];
995 int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
996
997 for (x = 0; x < nintrs; x++) {
998 if ((io_apic_ints[x].int_type == 0 ||
999 io_apic_ints[x].int_type == 3) &&
1000 io_apic_ints[x].int_vector == 0xff &&
1001 io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
1002 io_apic_ints[x].dst_apic_int == intpin)
1003 io_apic_ints[x].int_vector = irq;
1004 }
1005 }
1006
1007 /*
1008 * parse an Intel MP specification table
1009 */
1010 static void
1011 fix_mp_table(void)
1012 {
1013 int x;
1014 int id;
1015 int bus_0 = 0; /* Stop GCC warning */
1016 int bus_pci = 0; /* Stop GCC warning */
1017 int num_pci_bus;
1018
1019 /*
1020 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1021 * did it wrong. The MP spec says that when more than 1 PCI bus
1022 * exists the BIOS must begin with bus entries for the PCI bus and use
1023 * actual PCI bus numbering. This implies that when only 1 PCI bus
1024 * exists the BIOS can choose to ignore this ordering, and indeed many
1025 * MP motherboards do ignore it. This causes a problem when the PCI
1026 * sub-system makes requests of the MP sub-system based on PCI bus
1027 * numbers. So here we look for the situation and renumber the
1028 * busses and associated INTs in an effort to "make it right".
1029 */
1030
1031 /* find bus 0, PCI bus, count the number of PCI busses */
1032 for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1033 if (bus_data[x].bus_id == 0) {
1034 bus_0 = x;
1035 }
1036 if (bus_data[x].bus_type == PCI) {
1037 ++num_pci_bus;
1038 bus_pci = x;
1039 }
1040 }
1041 /*
1042 * bus_0 == slot of bus with ID of 0
1043 * bus_pci == slot of last PCI bus encountered
1044 */
1045
1046 /* check the 1 PCI bus case for sanity */
1047 if (num_pci_bus == 1) {
1048
1049 /* if it is number 0 all is well */
1050 if (bus_data[bus_pci].bus_id == 0)
1051 return;
1052
1053 /* mis-numbered, swap with whichever bus uses slot 0 */
1054
1055 /* swap the bus entry types */
1056 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1057 bus_data[bus_0].bus_type = PCI;
1058
1059 /* swap each relavant INTerrupt entry */
1060 id = bus_data[bus_pci].bus_id;
1061 for (x = 0; x < nintrs; ++x) {
1062 if (io_apic_ints[x].src_bus_id == id) {
1063 io_apic_ints[x].src_bus_id = 0;
1064 }
1065 else if (io_apic_ints[x].src_bus_id == 0) {
1066 io_apic_ints[x].src_bus_id = id;
1067 }
1068 }
1069 }
1070 }
1071
1072
1073 static void
1074 setup_apic_irq_mapping(void)
1075 {
1076 int x;
1077 int int_vector;
1078
1079 /* Assign low level interrupt handlers */
1080 for (x = 0; x < APIC_INTMAPSIZE; x++) {
1081 int_to_apicintpin[x].ioapic = -1;
1082 int_to_apicintpin[x].int_pin = 0;
1083 int_to_apicintpin[x].apic_address = NULL;
1084 int_to_apicintpin[x].redirindex = 0;
1085 }
1086 for (x = 0; x < nintrs; x++) {
1087 if (io_apic_ints[x].dst_apic_int < APIC_INTMAPSIZE &&
1088 io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1089 io_apic_ints[x].int_vector == 0xff &&
1090 (io_apic_ints[x].int_type == 0 ||
1091 io_apic_ints[x].int_type == 3)) {
1092 assign_apic_irq(0,
1093 io_apic_ints[x].dst_apic_int,
1094 io_apic_ints[x].dst_apic_int);
1095 }
1096 }
1097 int_vector = 0;
1098 while (int_vector < APIC_INTMAPSIZE &&
1099 int_to_apicintpin[int_vector].ioapic != -1)
1100 int_vector++;
1101 for (x = 0; x < nintrs && int_vector < APIC_INTMAPSIZE; x++) {
1102 if ((io_apic_ints[x].int_type == 0 ||
1103 io_apic_ints[x].int_type == 3) &&
1104 io_apic_ints[x].int_vector == 0xff) {
1105 assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1106 io_apic_ints[x].dst_apic_int,
1107 int_vector);
1108 int_vector++;
1109 while (int_vector < APIC_INTMAPSIZE &&
1110 int_to_apicintpin[int_vector].ioapic != -1)
1111 int_vector++;
1112 }
1113 }
1114 }
1115
1116
1117 static int
1118 processor_entry(proc_entry_ptr entry, int cpu)
1119 {
1120 /* check for usability */
1121 if ((cpu >= NCPU) || !(entry->cpu_flags & PROCENTRY_FLAG_EN))
1122 return 0;
1123
1124 /* check for BSP flag */
1125 if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
1126 boot_cpu_id = entry->apic_id;
1127 CPU_TO_ID(0) = entry->apic_id;
1128 ID_TO_CPU(entry->apic_id) = 0;
1129 return 0; /* its already been counted */
1130 }
1131
1132 /* add another AP to list, if less than max number of CPUs */
1133 else {
1134 CPU_TO_ID(cpu) = entry->apic_id;
1135 ID_TO_CPU(entry->apic_id) = cpu;
1136 return 1;
1137 }
1138 }
1139
1140
1141 static int
1142 bus_entry(bus_entry_ptr entry, int bus)
1143 {
1144 int x;
1145 char c, name[8];
1146
1147 /* encode the name into an index */
1148 for (x = 0; x < 6; ++x) {
1149 if ((c = entry->bus_type[x]) == ' ')
1150 break;
1151 name[x] = c;
1152 }
1153 name[x] = '\0';
1154
1155 if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1156 panic("unknown bus type: '%s'", name);
1157
1158 bus_data[bus].bus_id = entry->bus_id;
1159 bus_data[bus].bus_type = x;
1160
1161 return 1;
1162 }
1163
1164
1165 static int
1166 io_apic_entry(io_apic_entry_ptr entry, int apic)
1167 {
1168 if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1169 return 0;
1170
1171 IO_TO_ID(apic) = entry->apic_id;
1172 ID_TO_IO(entry->apic_id) = apic;
1173
1174 return 1;
1175 }
1176
1177
1178 static int
1179 lookup_bus_type(char *name)
1180 {
1181 int x;
1182
1183 for (x = 0; x < MAX_BUSTYPE; ++x)
1184 if (strcmp(bus_type_table[x].name, name) == 0)
1185 return bus_type_table[x].type;
1186
1187 return UNKNOWN_BUSTYPE;
1188 }
1189
1190
1191 static int
1192 int_entry(int_entry_ptr entry, int intr)
1193 {
1194 int apic;
1195
1196 io_apic_ints[intr].int_type = entry->int_type;
1197 io_apic_ints[intr].int_flags = entry->int_flags;
1198 io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1199 io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1200 if (entry->dst_apic_id == 255) {
1201 /* This signal goes to all IO APICS. Select an IO APIC
1202 with sufficient number of interrupt pins */
1203 for (apic = 0; apic < mp_napics; apic++)
1204 if (((io_apic_read(apic, IOAPIC_VER) &
1205 IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1206 entry->dst_apic_int)
1207 break;
1208 if (apic < mp_napics)
1209 io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1210 else
1211 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1212 } else
1213 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1214 io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1215
1216 return 1;
1217 }
1218
1219
1220 static int
1221 apic_int_is_bus_type(int intr, int bus_type)
1222 {
1223 int bus;
1224
1225 for (bus = 0; bus < mp_nbusses; ++bus)
1226 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1227 && ((int) bus_data[bus].bus_type == bus_type))
1228 return 1;
1229
1230 return 0;
1231 }
1232
1233
1234 /*
1235 * Given a traditional ISA INT mask, return an APIC mask.
1236 */
1237 u_int
1238 isa_apic_mask(u_int isa_mask)
1239 {
1240 int isa_irq;
1241 int apic_pin;
1242
1243 #if defined(SKIP_IRQ15_REDIRECT)
1244 if (isa_mask == (1 << 15)) {
1245 printf("skipping ISA IRQ15 redirect\n");
1246 return isa_mask;
1247 }
1248 #endif /* SKIP_IRQ15_REDIRECT */
1249
1250 isa_irq = ffs(isa_mask); /* find its bit position */
1251 if (isa_irq == 0) /* doesn't exist */
1252 return 0;
1253 --isa_irq; /* make it zero based */
1254
1255 apic_pin = isa_apic_irq(isa_irq); /* look for APIC connection */
1256 if (apic_pin == -1)
1257 return 0;
1258
1259 return (1 << apic_pin); /* convert pin# to a mask */
1260 }
1261
1262
1263 /*
1264 * Determine which APIC pin an ISA/EISA INT is attached to.
1265 */
1266 #define INTTYPE(I) (io_apic_ints[(I)].int_type)
1267 #define INTPIN(I) (io_apic_ints[(I)].dst_apic_int)
1268 #define INTIRQ(I) (io_apic_ints[(I)].int_vector)
1269 #define INTAPIC(I) (ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1270
1271 #define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq)
1272 int
1273 isa_apic_irq(int isa_irq)
1274 {
1275 int intr;
1276
1277 for (intr = 0; intr < nintrs; ++intr) { /* check each record */
1278 if (INTTYPE(intr) == 0) { /* standard INT */
1279 if (SRCBUSIRQ(intr) == isa_irq) {
1280 if (apic_int_is_bus_type(intr, ISA) ||
1281 apic_int_is_bus_type(intr, EISA))
1282 return INTIRQ(intr); /* found */
1283 }
1284 }
1285 }
1286 return -1; /* NOT found */
1287 }
1288
1289
1290 /*
1291 * Determine which APIC pin a PCI INT is attached to.
1292 */
1293 #define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id)
1294 #define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1295 #define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03)
1296 int
1297 pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1298 {
1299 int intr;
1300
1301 --pciInt; /* zero based */
1302
1303 for (intr = 0; intr < nintrs; ++intr) /* check each record */
1304 if ((INTTYPE(intr) == 0) /* standard INT */
1305 && (SRCBUSID(intr) == pciBus)
1306 && (SRCBUSDEVICE(intr) == pciDevice)
1307 && (SRCBUSLINE(intr) == pciInt)) /* a candidate IRQ */
1308 if (apic_int_is_bus_type(intr, PCI))
1309 return INTIRQ(intr); /* exact match */
1310
1311 return -1; /* NOT found */
1312 }
1313
1314 int
1315 next_apic_irq(int irq)
1316 {
1317 int intr, ointr;
1318 int bus, bustype;
1319
1320 bus = 0;
1321 bustype = 0;
1322 for (intr = 0; intr < nintrs; intr++) {
1323 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1324 continue;
1325 bus = SRCBUSID(intr);
1326 bustype = apic_bus_type(bus);
1327 if (bustype != ISA &&
1328 bustype != EISA &&
1329 bustype != PCI)
1330 continue;
1331 break;
1332 }
1333 if (intr >= nintrs) {
1334 return -1;
1335 }
1336 for (ointr = intr + 1; ointr < nintrs; ointr++) {
1337 if (INTTYPE(ointr) != 0)
1338 continue;
1339 if (bus != SRCBUSID(ointr))
1340 continue;
1341 if (bustype == PCI) {
1342 if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1343 continue;
1344 if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1345 continue;
1346 }
1347 if (bustype == ISA || bustype == EISA) {
1348 if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1349 continue;
1350 }
1351 if (INTPIN(intr) == INTPIN(ointr))
1352 continue;
1353 break;
1354 }
1355 if (ointr >= nintrs) {
1356 return -1;
1357 }
1358 return INTIRQ(ointr);
1359 }
1360 #undef SRCBUSLINE
1361 #undef SRCBUSDEVICE
1362 #undef SRCBUSID
1363 #undef SRCBUSIRQ
1364
1365 #undef INTPIN
1366 #undef INTIRQ
1367 #undef INTAPIC
1368 #undef INTTYPE
1369
1370
1371 /*
1372 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1373 *
1374 * XXX FIXME:
1375 * Exactly what this means is unclear at this point. It is a solution
1376 * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard
1377 * could route any of the ISA INTs to upper (>15) IRQ values. But most would
1378 * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1379 * option.
1380 */
1381 int
1382 undirect_isa_irq(int rirq)
1383 {
1384 #if defined(READY)
1385 if (bootverbose)
1386 printf("Freeing redirected ISA irq %d.\n", rirq);
1387 /** FIXME: tickle the MB redirector chip */
1388 return ???;
1389 #else
1390 if (bootverbose)
1391 printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1392 return 0;
1393 #endif /* READY */
1394 }
1395
1396
1397 /*
1398 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1399 */
1400 int
1401 undirect_pci_irq(int rirq)
1402 {
1403 #if defined(READY)
1404 if (bootverbose)
1405 printf("Freeing redirected PCI irq %d.\n", rirq);
1406
1407 /** FIXME: tickle the MB redirector chip */
1408 return ???;
1409 #else
1410 if (bootverbose)
1411 printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1412 rirq);
1413 return 0;
1414 #endif /* READY */
1415 }
1416
1417
1418 /*
1419 * given a bus ID, return:
1420 * the bus type if found
1421 * -1 if NOT found
1422 */
1423 int
1424 apic_bus_type(int id)
1425 {
1426 int x;
1427
1428 for (x = 0; x < mp_nbusses; ++x)
1429 if (bus_data[x].bus_id == id)
1430 return bus_data[x].bus_type;
1431
1432 return -1;
1433 }
1434
1435
1436 /*
1437 * given a LOGICAL APIC# and pin#, return:
1438 * the associated src bus ID if found
1439 * -1 if NOT found
1440 */
1441 int
1442 apic_src_bus_id(int apic, int pin)
1443 {
1444 int x;
1445
1446 /* search each of the possible INTerrupt sources */
1447 for (x = 0; x < nintrs; ++x)
1448 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1449 (pin == io_apic_ints[x].dst_apic_int))
1450 return (io_apic_ints[x].src_bus_id);
1451
1452 return -1; /* NOT found */
1453 }
1454
1455
1456 /*
1457 * given a LOGICAL APIC# and pin#, return:
1458 * the associated src bus IRQ if found
1459 * -1 if NOT found
1460 */
1461 int
1462 apic_src_bus_irq(int apic, int pin)
1463 {
1464 int x;
1465
1466 for (x = 0; x < nintrs; x++)
1467 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1468 (pin == io_apic_ints[x].dst_apic_int))
1469 return (io_apic_ints[x].src_bus_irq);
1470
1471 return -1; /* NOT found */
1472 }
1473
1474
1475 /*
1476 * given a LOGICAL APIC# and pin#, return:
1477 * the associated INTerrupt type if found
1478 * -1 if NOT found
1479 */
1480 int
1481 apic_int_type(int apic, int pin)
1482 {
1483 int x;
1484
1485 /* search each of the possible INTerrupt sources */
1486 for (x = 0; x < nintrs; ++x)
1487 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1488 (pin == io_apic_ints[x].dst_apic_int))
1489 return (io_apic_ints[x].int_type);
1490
1491 return -1; /* NOT found */
1492 }
1493
1494 int
1495 apic_irq(int apic, int pin)
1496 {
1497 int x;
1498 int res;
1499
1500 for (x = 0; x < nintrs; ++x)
1501 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1502 (pin == io_apic_ints[x].dst_apic_int)) {
1503 res = io_apic_ints[x].int_vector;
1504 if (res == 0xff)
1505 return -1;
1506 if (apic != int_to_apicintpin[res].ioapic)
1507 panic("apic_irq: inconsistent table");
1508 if (pin != int_to_apicintpin[res].int_pin)
1509 panic("apic_irq inconsistent table (2)");
1510 return res;
1511 }
1512 return -1;
1513 }
1514
1515
1516 /*
1517 * given a LOGICAL APIC# and pin#, return:
1518 * the associated trigger mode if found
1519 * -1 if NOT found
1520 */
1521 int
1522 apic_trigger(int apic, int pin)
1523 {
1524 int x;
1525
1526 /* search each of the possible INTerrupt sources */
1527 for (x = 0; x < nintrs; ++x)
1528 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1529 (pin == io_apic_ints[x].dst_apic_int))
1530 return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1531
1532 return -1; /* NOT found */
1533 }
1534
1535
1536 /*
1537 * given a LOGICAL APIC# and pin#, return:
1538 * the associated 'active' level if found
1539 * -1 if NOT found
1540 */
1541 int
1542 apic_polarity(int apic, int pin)
1543 {
1544 int x;
1545
1546 /* search each of the possible INTerrupt sources */
1547 for (x = 0; x < nintrs; ++x)
1548 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1549 (pin == io_apic_ints[x].dst_apic_int))
1550 return (io_apic_ints[x].int_flags & 0x03);
1551
1552 return -1; /* NOT found */
1553 }
1554
1555
1556 /*
1557 * set data according to MP defaults
1558 * FIXME: probably not complete yet...
1559 */
1560 static void
1561 default_mp_table(int type)
1562 {
1563 int ap_cpu_id;
1564 #if defined(APIC_IO)
1565 u_int32_t ux;
1566 int io_apic_id;
1567 int pin;
1568 #endif /* APIC_IO */
1569
1570 #if 0
1571 printf(" MP default config type: %d\n", type);
1572 switch (type) {
1573 case 1:
1574 printf(" bus: ISA, APIC: 82489DX\n");
1575 break;
1576 case 2:
1577 printf(" bus: EISA, APIC: 82489DX\n");
1578 break;
1579 case 3:
1580 printf(" bus: EISA, APIC: 82489DX\n");
1581 break;
1582 case 4:
1583 printf(" bus: MCA, APIC: 82489DX\n");
1584 break;
1585 case 5:
1586 printf(" bus: ISA+PCI, APIC: Integrated\n");
1587 break;
1588 case 6:
1589 printf(" bus: EISA+PCI, APIC: Integrated\n");
1590 break;
1591 case 7:
1592 printf(" bus: MCA+PCI, APIC: Integrated\n");
1593 break;
1594 default:
1595 printf(" future type\n");
1596 break;
1597 /* NOTREACHED */
1598 }
1599 #endif /* 0 */
1600
1601 boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
1602 ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1603
1604 /* BSP */
1605 CPU_TO_ID(0) = boot_cpu_id;
1606 ID_TO_CPU(boot_cpu_id) = 0;
1607
1608 /* one and only AP */
1609 CPU_TO_ID(1) = ap_cpu_id;
1610 ID_TO_CPU(ap_cpu_id) = 1;
1611
1612 #if defined(APIC_IO)
1613 /* one and only IO APIC */
1614 io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1615
1616 /*
1617 * sanity check, refer to MP spec section 3.6.6, last paragraph
1618 * necessary as some hardware isn't properly setting up the IO APIC
1619 */
1620 #if defined(REALLY_ANAL_IOAPICID_VALUE)
1621 if (io_apic_id != 2) {
1622 #else
1623 if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1624 #endif /* REALLY_ANAL_IOAPICID_VALUE */
1625 ux = io_apic_read(0, IOAPIC_ID); /* get current contents */
1626 ux &= ~APIC_ID_MASK; /* clear the ID field */
1627 ux |= 0x02000000; /* set it to '2' */
1628 io_apic_write(0, IOAPIC_ID, ux); /* write new value */
1629 ux = io_apic_read(0, IOAPIC_ID); /* re-read && test */
1630 if ((ux & APIC_ID_MASK) != 0x02000000)
1631 panic("can't control IO APIC ID, reg: 0x%08x", ux);
1632 io_apic_id = 2;
1633 }
1634 IO_TO_ID(0) = io_apic_id;
1635 ID_TO_IO(io_apic_id) = 0;
1636 #endif /* APIC_IO */
1637
1638 /* fill out bus entries */
1639 switch (type) {
1640 case 1:
1641 case 2:
1642 case 3:
1643 case 5:
1644 case 6:
1645 bus_data[0].bus_id = default_data[type - 1][1];
1646 bus_data[0].bus_type = default_data[type - 1][2];
1647 bus_data[1].bus_id = default_data[type - 1][3];
1648 bus_data[1].bus_type = default_data[type - 1][4];
1649 break;
1650
1651 /* case 4: case 7: MCA NOT supported */
1652 default: /* illegal/reserved */
1653 panic("BAD default MP config: %d", type);
1654 /* NOTREACHED */
1655 }
1656
1657 #if defined(APIC_IO)
1658 /* general cases from MP v1.4, table 5-2 */
1659 for (pin = 0; pin < 16; ++pin) {
1660 io_apic_ints[pin].int_type = 0;
1661 io_apic_ints[pin].int_flags = 0x05; /* edge/active-hi */
1662 io_apic_ints[pin].src_bus_id = 0;
1663 io_apic_ints[pin].src_bus_irq = pin; /* IRQ2 caught below */
1664 io_apic_ints[pin].dst_apic_id = io_apic_id;
1665 io_apic_ints[pin].dst_apic_int = pin; /* 1-to-1 */
1666 }
1667
1668 /* special cases from MP v1.4, table 5-2 */
1669 if (type == 2) {
1670 io_apic_ints[2].int_type = 0xff; /* N/C */
1671 io_apic_ints[13].int_type = 0xff; /* N/C */
1672 #if !defined(APIC_MIXED_MODE)
1673 /** FIXME: ??? */
1674 panic("sorry, can't support type 2 default yet");
1675 #endif /* APIC_MIXED_MODE */
1676 }
1677 else
1678 io_apic_ints[2].src_bus_irq = 0; /* ISA IRQ0 is on APIC INT 2 */
1679
1680 if (type == 7)
1681 io_apic_ints[0].int_type = 0xff; /* N/C */
1682 else
1683 io_apic_ints[0].int_type = 3; /* vectored 8259 */
1684 #endif /* APIC_IO */
1685 }
1686
1687
1688 /*
1689 * initialize all the SMP locks
1690 */
1691
1692 /* critical region around IO APIC, apic_imen */
1693 struct simplelock imen_lock;
1694
1695 /* critical region around splxx(), cpl, cml, cil, ipending */
1696 struct simplelock cpl_lock;
1697
1698 /* Make FAST_INTR() routines sequential */
1699 struct simplelock fast_intr_lock;
1700
1701 /* critical region around INTR() routines */
1702 struct simplelock intr_lock;
1703
1704 /* lock regions protected in UP kernel via cli/sti */
1705 struct simplelock mpintr_lock;
1706
1707 /* lock region used by kernel profiling */
1708 struct simplelock mcount_lock;
1709
1710 #ifdef USE_COMLOCK
1711 /* locks com (tty) data/hardware accesses: a FASTINTR() */
1712 struct simplelock com_lock;
1713 #endif /* USE_COMLOCK */
1714
1715 #ifdef USE_CLOCKLOCK
1716 /* lock regions around the clock hardware */
1717 struct simplelock clock_lock;
1718 #endif /* USE_CLOCKLOCK */
1719
1720 /* lock around the MP rendezvous */
1721 static struct simplelock smp_rv_lock;
1722
1723 static void
1724 init_locks(void)
1725 {
1726 /*
1727 * Get the initial mp_lock with a count of 1 for the BSP.
1728 * This uses a LOGICAL cpu ID, ie BSP == 0.
1729 */
1730 mp_lock = 0x00000001;
1731
1732 /* ISR uses its own "giant lock" */
1733 isr_lock = FREE_LOCK;
1734
1735 #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
1736 s_lock_init((struct simplelock*)&apic_itrace_debuglock);
1737 #endif
1738
1739 s_lock_init((struct simplelock*)&mpintr_lock);
1740
1741 s_lock_init((struct simplelock*)&mcount_lock);
1742
1743 s_lock_init((struct simplelock*)&fast_intr_lock);
1744 s_lock_init((struct simplelock*)&intr_lock);
1745 s_lock_init((struct simplelock*)&imen_lock);
1746 s_lock_init((struct simplelock*)&cpl_lock);
1747 s_lock_init(&smp_rv_lock);
1748
1749 #ifdef USE_COMLOCK
1750 s_lock_init((struct simplelock*)&com_lock);
1751 #endif /* USE_COMLOCK */
1752 #ifdef USE_CLOCKLOCK
1753 s_lock_init((struct simplelock*)&clock_lock);
1754 #endif /* USE_CLOCKLOCK */
1755 }
1756
1757
1758 /*
1759 * start each AP in our list
1760 */
1761 static int
1762 start_all_aps(u_int boot_addr)
1763 {
1764 int x, i;
1765 u_char mpbiosreason;
1766 u_long mpbioswarmvec;
1767 pd_entry_t *newptd;
1768 pt_entry_t *newpt;
1769 struct globaldata *gd;
1770 char *stack;
1771 pd_entry_t *myPTD;
1772
1773 POSTCODE(START_ALL_APS_POST);
1774
1775 /* initialize BSP's local APIC */
1776 apic_initialize();
1777 bsp_apic_ready = 1;
1778
1779 /* install the AP 1st level boot code */
1780 install_ap_tramp(boot_addr);
1781
1782
1783 /* save the current value of the warm-start vector */
1784 mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
1785 #ifndef PC98
1786 outb(CMOS_REG, BIOS_RESET);
1787 mpbiosreason = inb(CMOS_DATA);
1788 #endif
1789
1790 /* record BSP in CPU map */
1791 all_cpus = 1;
1792
1793 /* start each AP */
1794 for (x = 1; x <= mp_naps; ++x) {
1795
1796 /* This is a bit verbose, it will go away soon. */
1797
1798 /* alloc new page table directory */
1799 newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
1800
1801 /* Store the virtual PTD address for this CPU */
1802 IdlePTDS[x] = newptd;
1803
1804 /* clone currently active one (ie: IdlePTD) */
1805 bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */
1806
1807 /* set up 0 -> 4MB P==V mapping for AP boot */
1808 newptd[0] = (void *)(uintptr_t)(PG_V | PG_RW |
1809 ((uintptr_t)(void *)KPTphys & PG_FRAME));
1810
1811 /* store PTD for this AP's boot sequence */
1812 myPTD = (pd_entry_t *)vtophys(newptd);
1813
1814 /* alloc new page table page */
1815 newpt = (pt_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
1816
1817 /* set the new PTD's private page to point there */
1818 newptd[MPPTDI] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt));
1819
1820 /* install self referential entry */
1821 newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd));
1822
1823 /* allocate a new private data page */
1824 gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
1825
1826 /* wire it into the private page table page */
1827 newpt[0] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
1828
1829 /* wire the ptp into itself for access */
1830 newpt[1] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt));
1831
1832 /* copy in the pointer to the local apic */
1833 newpt[2] = SMP_prvpt[2];
1834
1835 /* and the IO apic mapping[s] */
1836 for (i = 16; i < 32; i++)
1837 newpt[i] = SMP_prvpt[i];
1838
1839 /* allocate and set up an idle stack data page */
1840 stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
1841 for (i = 0; i < UPAGES; i++)
1842 newpt[i + 3] = (pt_entry_t)(PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1843
1844 newpt[3 + UPAGES] = 0; /* *prv_CMAP1 */
1845 newpt[4 + UPAGES] = 0; /* *prv_CMAP2 */
1846 newpt[5 + UPAGES] = 0; /* *prv_CMAP3 */
1847 newpt[6 + UPAGES] = 0; /* *prv_PMAP1 */
1848
1849 /* prime data page for it to use */
1850 gd->cpuid = x;
1851 gd->cpu_lockid = x << 24;
1852 gd->my_idlePTD = myPTD;
1853 gd->prv_CMAP1 = &newpt[3 + UPAGES];
1854 gd->prv_CMAP2 = &newpt[4 + UPAGES];
1855 gd->prv_CMAP3 = &newpt[5 + UPAGES];
1856 gd->prv_PMAP1 = &newpt[6 + UPAGES];
1857
1858 /* setup a vector to our boot code */
1859 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
1860 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
1861 #ifndef PC98
1862 outb(CMOS_REG, BIOS_RESET);
1863 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
1864 #endif
1865
1866 bootPTD = myPTD;
1867 /* attempt to start the Application Processor */
1868 CHECK_INIT(99); /* setup checkpoints */
1869 if (!start_ap(x, boot_addr)) {
1870 printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
1871 CHECK_PRINT("trace"); /* show checkpoints */
1872 /* better panic as the AP may be running loose */
1873 printf("panic y/n? [y] ");
1874 if (cngetc() != 'n')
1875 panic("bye-bye");
1876 }
1877 CHECK_PRINT("trace"); /* show checkpoints */
1878
1879 /* record its version info */
1880 cpu_apic_versions[x] = cpu_apic_versions[0];
1881
1882 all_cpus |= (1 << x); /* record AP in CPU map */
1883 }
1884
1885 /* build our map of 'other' CPUs */
1886 other_cpus = all_cpus & ~(1 << cpuid);
1887
1888 /* fill in our (BSP) APIC version */
1889 cpu_apic_versions[0] = lapic.version;
1890
1891 /* restore the warmstart vector */
1892 *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
1893 #ifndef PC98
1894 outb(CMOS_REG, BIOS_RESET);
1895 outb(CMOS_DATA, mpbiosreason);
1896 #endif
1897
1898 /*
1899 * Set up the idle context for the BSP. Similar to above except
1900 * that some was done by locore, some by pmap.c and some is implicit
1901 * because the BSP is cpu#0 and the page is initially zero, and also
1902 * because we can refer to variables by name on the BSP..
1903 */
1904 newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
1905
1906 bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */
1907 IdlePTDS[0] = newptd;
1908
1909 /* Point PTD[] to this page instead of IdlePTD's physical page */
1910 newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd));
1911
1912 my_idlePTD = (pd_entry_t *)vtophys(newptd);
1913
1914 /* Allocate and setup BSP idle stack */
1915 stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
1916 for (i = 0; i < UPAGES; i++)
1917 SMP_prvpt[i + 3] = (pt_entry_t)(PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1918
1919 pmap_set_opt_bsp();
1920
1921 for (i = 0; i < mp_ncpus; i++) {
1922 bcopy( (int *) PTD + KPTDI, (int *) IdlePTDS[i] + KPTDI, NKPDE * sizeof (int));
1923 }
1924
1925 /* number of APs actually started */
1926 return mp_ncpus - 1;
1927 }
1928
1929
1930 /*
1931 * load the 1st level AP boot code into base memory.
1932 */
1933
1934 /* targets for relocation */
1935 extern void bigJump(void);
1936 extern void bootCodeSeg(void);
1937 extern void bootDataSeg(void);
1938 extern void MPentry(void);
1939 extern u_int MP_GDT;
1940 extern u_int mp_gdtbase;
1941
1942 static void
1943 install_ap_tramp(u_int boot_addr)
1944 {
1945 int x;
1946 int size = *(int *) ((u_long) & bootMP_size);
1947 u_char *src = (u_char *) ((u_long) bootMP);
1948 u_char *dst = (u_char *) boot_addr + KERNBASE;
1949 u_int boot_base = (u_int) bootMP;
1950 u_int8_t *dst8;
1951 u_int16_t *dst16;
1952 u_int32_t *dst32;
1953
1954 POSTCODE(INSTALL_AP_TRAMP_POST);
1955
1956 for (x = 0; x < size; ++x)
1957 *dst++ = *src++;
1958
1959 /*
1960 * modify addresses in code we just moved to basemem. unfortunately we
1961 * need fairly detailed info about mpboot.s for this to work. changes
1962 * to mpboot.s might require changes here.
1963 */
1964
1965 /* boot code is located in KERNEL space */
1966 dst = (u_char *) boot_addr + KERNBASE;
1967
1968 /* modify the lgdt arg */
1969 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
1970 *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
1971
1972 /* modify the ljmp target for MPentry() */
1973 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
1974 *dst32 = ((u_int) MPentry - KERNBASE);
1975
1976 /* modify the target for boot code segment */
1977 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
1978 dst8 = (u_int8_t *) (dst16 + 1);
1979 *dst16 = (u_int) boot_addr & 0xffff;
1980 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
1981
1982 /* modify the target for boot data segment */
1983 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
1984 dst8 = (u_int8_t *) (dst16 + 1);
1985 *dst16 = (u_int) boot_addr & 0xffff;
1986 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
1987 }
1988
1989
1990 /*
1991 * this function starts the AP (application processor) identified
1992 * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
1993 * to accomplish this. This is necessary because of the nuances
1994 * of the different hardware we might encounter. It ain't pretty,
1995 * but it seems to work.
1996 */
1997 static int
1998 start_ap(int logical_cpu, u_int boot_addr)
1999 {
2000 int physical_cpu;
2001 int vector;
2002 int cpus;
2003 u_long icr_lo, icr_hi;
2004
2005 POSTCODE(START_AP_POST);
2006
2007 /* get the PHYSICAL APIC ID# */
2008 physical_cpu = CPU_TO_ID(logical_cpu);
2009
2010 /* calculate the vector */
2011 vector = (boot_addr >> 12) & 0xff;
2012
2013 /* used as a watchpoint to signal AP startup */
2014 cpus = mp_ncpus;
2015
2016 /*
2017 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
2018 * and running the target CPU. OR this INIT IPI might be latched (P5
2019 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
2020 * ignored.
2021 */
2022
2023 /* setup the address for the target AP */
2024 icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
2025 icr_hi |= (physical_cpu << 24);
2026 lapic.icr_hi = icr_hi;
2027
2028 /* do an INIT IPI: assert RESET */
2029 icr_lo = lapic.icr_lo & 0xfff00000;
2030 lapic.icr_lo = icr_lo | 0x0000c500;
2031
2032 /* wait for pending status end */
2033 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2034 /* spin */ ;
2035
2036 /* do an INIT IPI: deassert RESET */
2037 lapic.icr_lo = icr_lo | 0x00008500;
2038
2039 /* wait for pending status end */
2040 u_sleep(10000); /* wait ~10mS */
2041 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2042 /* spin */ ;
2043
2044 /*
2045 * next we do a STARTUP IPI: the previous INIT IPI might still be
2046 * latched, (P5 bug) this 1st STARTUP would then terminate
2047 * immediately, and the previously started INIT IPI would continue. OR
2048 * the previous INIT IPI has already run. and this STARTUP IPI will
2049 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2050 * will run.
2051 */
2052
2053 /* do a STARTUP IPI */
2054 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2055 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2056 /* spin */ ;
2057 u_sleep(200); /* wait ~200uS */
2058
2059 /*
2060 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2061 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2062 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2063 * recognized after hardware RESET or INIT IPI.
2064 */
2065
2066 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2067 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2068 /* spin */ ;
2069 u_sleep(200); /* wait ~200uS */
2070
2071 /* wait for it to start */
2072 set_apic_timer(5000000);/* == 5 seconds */
2073 while (read_apic_timer())
2074 if (mp_ncpus > cpus)
2075 return 1; /* return SUCCESS */
2076
2077 return 0; /* return FAILURE */
2078 }
2079
2080
2081 /*
2082 * Flush the TLB on all other CPU's
2083 *
2084 * XXX: Needs to handshake and wait for completion before proceding.
2085 */
2086 void
2087 smp_invltlb(void)
2088 {
2089 #if defined(APIC_IO)
2090 if (smp_started && invltlb_ok)
2091 all_but_self_ipi(XINVLTLB_OFFSET);
2092 #endif /* APIC_IO */
2093 }
2094
2095 void
2096 invlpg(u_int addr)
2097 {
2098 __asm __volatile("invlpg (%0)"::"r"(addr):"memory");
2099
2100 /* send a message to the other CPUs */
2101 smp_invltlb();
2102 }
2103
2104 void
2105 invltlb(void)
2106 {
2107 u_long temp;
2108
2109 /*
2110 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
2111 * inlined.
2112 */
2113 __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
2114
2115 /* send a message to the other CPUs */
2116 smp_invltlb();
2117 }
2118
2119
2120 /*
2121 * When called the executing CPU will send an IPI to all other CPUs
2122 * requesting that they halt execution.
2123 *
2124 * Usually (but not necessarily) called with 'other_cpus' as its arg.
2125 *
2126 * - Signals all CPUs in map to stop.
2127 * - Waits for each to stop.
2128 *
2129 * Returns:
2130 * -1: error
2131 * 0: NA
2132 * 1: ok
2133 *
2134 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
2135 * from executing at same time.
2136 */
2137 int
2138 stop_cpus(u_int map)
2139 {
2140 if (!smp_started)
2141 return 0;
2142
2143 /* send the Xcpustop IPI to all CPUs in map */
2144 selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
2145
2146 while ((stopped_cpus & map) != map)
2147 /* spin */ ;
2148
2149 return 1;
2150 }
2151
2152
2153 /*
2154 * Called by a CPU to restart stopped CPUs.
2155 *
2156 * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
2157 *
2158 * - Signals all CPUs in map to restart.
2159 * - Waits for each to restart.
2160 *
2161 * Returns:
2162 * -1: error
2163 * 0: NA
2164 * 1: ok
2165 */
2166 int
2167 restart_cpus(u_int map)
2168 {
2169 if (!smp_started)
2170 return 0;
2171
2172 started_cpus = map; /* signal other cpus to restart */
2173
2174 while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */
2175 /* spin */ ;
2176
2177 return 1;
2178 }
2179
2180 int smp_active = 0; /* are the APs allowed to run? */
2181 SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
2182
2183 /* XXX maybe should be hw.ncpu */
2184 static int smp_cpus = 1; /* how many cpu's running */
2185 SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
2186
2187 int invltlb_ok = 0; /* throttle smp_invltlb() till safe */
2188 SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
2189
2190 /* Warning: Do not staticize. Used from swtch.s */
2191 int do_page_zero_idle = 1; /* bzero pages for fun and profit in idleloop */
2192 SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW,
2193 &do_page_zero_idle, 0, "");
2194
2195 /* Is forwarding of a interrupt to the CPU holding the ISR lock enabled ? */
2196 int forward_irq_enabled = 1;
2197 SYSCTL_INT(_machdep, OID_AUTO, forward_irq_enabled, CTLFLAG_RW,
2198 &forward_irq_enabled, 0, "");
2199
2200 /* Enable forwarding of a signal to a process running on a different CPU */
2201 static int forward_signal_enabled = 1;
2202 SYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
2203 &forward_signal_enabled, 0, "");
2204
2205 /* Enable forwarding of roundrobin to all other cpus */
2206 static int forward_roundrobin_enabled = 1;
2207 SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
2208 &forward_roundrobin_enabled, 0, "");
2209
2210 /*
2211 * This is called once the rest of the system is up and running and we're
2212 * ready to let the AP's out of the pen.
2213 */
2214 void ap_init(void);
2215
2216 void
2217 ap_init()
2218 {
2219 u_int apic_id;
2220
2221 smp_cpus++;
2222
2223 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
2224 lidt(&r_idt);
2225 #endif
2226
2227 /* Build our map of 'other' CPUs. */
2228 other_cpus = all_cpus & ~(1 << cpuid);
2229
2230 printf("SMP: AP CPU #%d Launched!\n", cpuid);
2231
2232 /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */
2233 load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS);
2234
2235 /* A quick check from sanity claus */
2236 apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2237 if (cpuid != apic_id) {
2238 printf("SMP: cpuid = %d\n", cpuid);
2239 printf("SMP: apic_id = %d\n", apic_id);
2240 printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
2241 panic("cpuid mismatch! boom!!");
2242 }
2243
2244 /* Init local apic for irq's */
2245 apic_initialize();
2246
2247 /* Set memory range attributes for this CPU to match the BSP */
2248 mem_range_AP_init();
2249
2250 /*
2251 * Activate smp_invltlb, although strictly speaking, this isn't
2252 * quite correct yet. We should have a bitfield for cpus willing
2253 * to accept TLB flush IPI's or something and sync them.
2254 */
2255 if (smp_cpus == mp_ncpus) {
2256 invltlb_ok = 1;
2257 smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
2258 smp_active = 1; /* historic */
2259 }
2260
2261 curproc = NULL; /* make sure */
2262 }
2263
2264 #ifdef BETTER_CLOCK
2265
2266 #define CHECKSTATE_USER 0
2267 #define CHECKSTATE_SYS 1
2268 #define CHECKSTATE_INTR 2
2269
2270 /* Do not staticize. Used from apic_vector.s */
2271 struct proc* checkstate_curproc[NCPU];
2272 int checkstate_cpustate[NCPU];
2273 u_long checkstate_pc[NCPU];
2274
2275 extern long cp_time[CPUSTATES];
2276
2277 #define PC_TO_INDEX(pc, prof) \
2278 ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
2279 (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
2280
2281 static void
2282 addupc_intr_forwarded(struct proc *p, int id, int *astmap)
2283 {
2284 int i;
2285 struct uprof *prof;
2286 u_long pc;
2287
2288 pc = checkstate_pc[id];
2289 prof = &p->p_stats->p_prof;
2290 if (pc >= prof->pr_off &&
2291 (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) {
2292 if ((p->p_flag & P_OWEUPC) == 0) {
2293 prof->pr_addr = pc;
2294 prof->pr_ticks = 1;
2295 p->p_flag |= P_OWEUPC;
2296 }
2297 *astmap |= (1 << id);
2298 }
2299 }
2300
2301 static void
2302 forwarded_statclock(int id, int pscnt, int *astmap)
2303 {
2304 struct pstats *pstats;
2305 long rss;
2306 struct rusage *ru;
2307 struct vmspace *vm;
2308 int cpustate;
2309 struct proc *p;
2310 #ifdef GPROF
2311 register struct gmonparam *g;
2312 int i;
2313 #endif
2314
2315 p = checkstate_curproc[id];
2316 cpustate = checkstate_cpustate[id];
2317
2318 switch (cpustate) {
2319 case CHECKSTATE_USER:
2320 if (p->p_flag & P_PROFIL)
2321 addupc_intr_forwarded(p, id, astmap);
2322 if (pscnt > 1)
2323 return;
2324 p->p_uticks++;
2325 if (p->p_nice > NZERO)
2326 cp_time[CP_NICE]++;
2327 else
2328 cp_time[CP_USER]++;
2329 break;
2330 case CHECKSTATE_SYS:
2331 #ifdef GPROF
2332 /*
2333 * Kernel statistics are just like addupc_intr, only easier.
2334 */
2335 g = &_gmonparam;
2336 if (g->state == GMON_PROF_ON) {
2337 i = checkstate_pc[id] - g->lowpc;
2338 if (i < g->textsize) {
2339 i /= HISTFRACTION * sizeof(*g->kcount);
2340 g->kcount[i]++;
2341 }
2342 }
2343 #endif
2344 if (pscnt > 1)
2345 return;
2346
2347 if (!p)
2348 cp_time[CP_IDLE]++;
2349 else {
2350 p->p_sticks++;
2351 cp_time[CP_SYS]++;
2352 }
2353 break;
2354 case CHECKSTATE_INTR:
2355 default:
2356 #ifdef GPROF
2357 /*
2358 * Kernel statistics are just like addupc_intr, only easier.
2359 */
2360 g = &_gmonparam;
2361 if (g->state == GMON_PROF_ON) {
2362 i = checkstate_pc[id] - g->lowpc;
2363 if (i < g->textsize) {
2364 i /= HISTFRACTION * sizeof(*g->kcount);
2365 g->kcount[i]++;
2366 }
2367 }
2368 #endif
2369 if (pscnt > 1)
2370 return;
2371 if (p)
2372 p->p_iticks++;
2373 cp_time[CP_INTR]++;
2374 }
2375 if (p != NULL) {
2376 p->p_cpticks++;
2377 if (++p->p_estcpu == 0)
2378 p->p_estcpu--;
2379 if ((p->p_estcpu & 3) == 0) {
2380 resetpriority(p);
2381 if (p->p_priority >= PUSER)
2382 p->p_priority = p->p_usrpri;
2383 }
2384
2385 /* Update resource usage integrals and maximums. */
2386 if ((pstats = p->p_stats) != NULL &&
2387 (ru = &pstats->p_ru) != NULL &&
2388 (vm = p->p_vmspace) != NULL) {
2389 ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024;
2390 ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024;
2391 ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024;
2392 rss = vm->vm_pmap.pm_stats.resident_count *
2393 PAGE_SIZE / 1024;
2394 if (ru->ru_maxrss < rss)
2395 ru->ru_maxrss = rss;
2396 }
2397 }
2398 }
2399
2400 void
2401 forward_statclock(int pscnt)
2402 {
2403 int map;
2404 int id;
2405 int i;
2406
2407 /* Kludge. We don't yet have separate locks for the interrupts
2408 * and the kernel. This means that we cannot let the other processors
2409 * handle complex interrupts while inhibiting them from entering
2410 * the kernel in a non-interrupt context.
2411 *
2412 * What we can do, without changing the locking mechanisms yet,
2413 * is letting the other processors handle a very simple interrupt
2414 * (wich determines the processor states), and do the main
2415 * work ourself.
2416 */
2417
2418 if (!smp_started || !invltlb_ok || cold || panicstr)
2419 return;
2420
2421 /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */
2422
2423 map = other_cpus & ~stopped_cpus ;
2424 checkstate_probed_cpus = 0;
2425 if (map != 0)
2426 selected_apic_ipi(map,
2427 XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2428
2429 i = 0;
2430 while (checkstate_probed_cpus != map) {
2431 /* spin */
2432 i++;
2433 if (i == 100000) {
2434 #ifdef BETTER_CLOCK_DIAGNOSTIC
2435 printf("forward_statclock: checkstate %x\n",
2436 checkstate_probed_cpus);
2437 #endif
2438 break;
2439 }
2440 }
2441
2442 /*
2443 * Step 2: walk through other processors processes, update ticks and
2444 * profiling info.
2445 */
2446
2447 map = 0;
2448 for (id = 0; id < mp_ncpus; id++) {
2449 if (id == cpuid)
2450 continue;
2451 if (((1 << id) & checkstate_probed_cpus) == 0)
2452 continue;
2453 forwarded_statclock(id, pscnt, &map);
2454 }
2455 if (map != 0) {
2456 checkstate_need_ast |= map;
2457 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2458 i = 0;
2459 while ((checkstate_need_ast & map) != 0) {
2460 /* spin */
2461 i++;
2462 if (i > 100000) {
2463 #ifdef BETTER_CLOCK_DIAGNOSTIC
2464 printf("forward_statclock: dropped ast 0x%x\n",
2465 checkstate_need_ast & map);
2466 #endif
2467 break;
2468 }
2469 }
2470 }
2471 }
2472
2473 void
2474 forward_hardclock(int pscnt)
2475 {
2476 int map;
2477 int id;
2478 struct proc *p;
2479 struct pstats *pstats;
2480 int i;
2481
2482 /* Kludge. We don't yet have separate locks for the interrupts
2483 * and the kernel. This means that we cannot let the other processors
2484 * handle complex interrupts while inhibiting them from entering
2485 * the kernel in a non-interrupt context.
2486 *
2487 * What we can do, without changing the locking mechanisms yet,
2488 * is letting the other processors handle a very simple interrupt
2489 * (wich determines the processor states), and do the main
2490 * work ourself.
2491 */
2492
2493 if (!smp_started || !invltlb_ok || cold || panicstr)
2494 return;
2495
2496 /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */
2497
2498 map = other_cpus & ~stopped_cpus ;
2499 checkstate_probed_cpus = 0;
2500 if (map != 0)
2501 selected_apic_ipi(map,
2502 XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2503
2504 i = 0;
2505 while (checkstate_probed_cpus != map) {
2506 /* spin */
2507 i++;
2508 if (i == 100000) {
2509 #ifdef BETTER_CLOCK_DIAGNOSTIC
2510 printf("forward_hardclock: checkstate %x\n",
2511 checkstate_probed_cpus);
2512 #endif
2513 break;
2514 }
2515 }
2516
2517 /*
2518 * Step 2: walk through other processors processes, update virtual
2519 * timer and profiling timer. If stathz == 0, also update ticks and
2520 * profiling info.
2521 */
2522
2523 map = 0;
2524 for (id = 0; id < mp_ncpus; id++) {
2525 if (id == cpuid)
2526 continue;
2527 if (((1 << id) & checkstate_probed_cpus) == 0)
2528 continue;
2529 p = checkstate_curproc[id];
2530 if (p) {
2531 pstats = p->p_stats;
2532 if (checkstate_cpustate[id] == CHECKSTATE_USER &&
2533 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
2534 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
2535 psignal(p, SIGVTALRM);
2536 map |= (1 << id);
2537 }
2538 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
2539 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
2540 psignal(p, SIGPROF);
2541 map |= (1 << id);
2542 }
2543 }
2544 if (stathz == 0) {
2545 forwarded_statclock( id, pscnt, &map);
2546 }
2547 }
2548 if (map != 0) {
2549 checkstate_need_ast |= map;
2550 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2551 i = 0;
2552 while ((checkstate_need_ast & map) != 0) {
2553 /* spin */
2554 i++;
2555 if (i > 100000) {
2556 #ifdef BETTER_CLOCK_DIAGNOSTIC
2557 printf("forward_hardclock: dropped ast 0x%x\n",
2558 checkstate_need_ast & map);
2559 #endif
2560 break;
2561 }
2562 }
2563 }
2564 }
2565
2566 #endif /* BETTER_CLOCK */
2567
2568 void
2569 forward_signal(struct proc *p)
2570 {
2571 int map;
2572 int id;
2573 int i;
2574
2575 /* Kludge. We don't yet have separate locks for the interrupts
2576 * and the kernel. This means that we cannot let the other processors
2577 * handle complex interrupts while inhibiting them from entering
2578 * the kernel in a non-interrupt context.
2579 *
2580 * What we can do, without changing the locking mechanisms yet,
2581 * is letting the other processors handle a very simple interrupt
2582 * (wich determines the processor states), and do the main
2583 * work ourself.
2584 */
2585
2586 if (!smp_started || !invltlb_ok || cold || panicstr)
2587 return;
2588 if (!forward_signal_enabled)
2589 return;
2590 while (1) {
2591 if (p->p_stat != SRUN)
2592 return;
2593 id = (u_char) p->p_oncpu;
2594 if (id == 0xff)
2595 return;
2596 map = (1<<id);
2597 checkstate_need_ast |= map;
2598 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2599 i = 0;
2600 while ((checkstate_need_ast & map) != 0) {
2601 /* spin */
2602 i++;
2603 if (i > 100000) {
2604 #if 0
2605 printf("forward_signal: dropped ast 0x%x\n",
2606 checkstate_need_ast & map);
2607 #endif
2608 break;
2609 }
2610 }
2611 if (id == (u_char) p->p_oncpu)
2612 return;
2613 }
2614 }
2615
2616 void
2617 forward_roundrobin(void)
2618 {
2619 u_int map;
2620 int i;
2621
2622 if (!smp_started || !invltlb_ok || cold || panicstr)
2623 return;
2624 if (!forward_roundrobin_enabled)
2625 return;
2626 resched_cpus |= other_cpus;
2627 map = other_cpus & ~stopped_cpus ;
2628 #if 1
2629 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2630 #else
2631 (void) all_but_self_ipi(XCPUAST_OFFSET);
2632 #endif
2633 i = 0;
2634 while ((checkstate_need_ast & map) != 0) {
2635 /* spin */
2636 i++;
2637 if (i > 100000) {
2638 #if 0
2639 printf("forward_roundrobin: dropped ast 0x%x\n",
2640 checkstate_need_ast & map);
2641 #endif
2642 break;
2643 }
2644 }
2645 }
2646
2647
2648 #ifdef APIC_INTR_REORDER
2649 /*
2650 * Maintain mapping from softintr vector to isr bit in local apic.
2651 */
2652 void
2653 set_lapic_isrloc(int intr, int vector)
2654 {
2655 if (intr < 0 || intr > 32)
2656 panic("set_apic_isrloc: bad intr argument: %d",intr);
2657 if (vector < ICU_OFFSET || vector > 255)
2658 panic("set_apic_isrloc: bad vector argument: %d",vector);
2659 apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
2660 apic_isrbit_location[intr].bit = (1<<(vector & 31));
2661 }
2662 #endif
2663
2664 /*
2665 * All-CPU rendezvous. CPUs are signalled, all execute the setup function
2666 * (if specified), rendezvous, execute the action function (if specified),
2667 * rendezvous again, execute the teardown function (if specified), and then
2668 * resume.
2669 *
2670 * Note that the supplied external functions _must_ be reentrant and aware
2671 * that they are running in parallel and in an unknown lock context.
2672 */
2673 static void (*smp_rv_setup_func)(void *arg);
2674 static void (*smp_rv_action_func)(void *arg);
2675 static void (*smp_rv_teardown_func)(void *arg);
2676 static void *smp_rv_func_arg;
2677 static volatile int smp_rv_waiters[2];
2678
2679 void
2680 smp_rendezvous_action(void)
2681 {
2682 /* setup function */
2683 if (smp_rv_setup_func != NULL)
2684 smp_rv_setup_func(smp_rv_func_arg);
2685 /* spin on entry rendezvous */
2686 atomic_add_int(&smp_rv_waiters[0], 1);
2687 while (smp_rv_waiters[0] < mp_ncpus)
2688 ;
2689 /* action function */
2690 if (smp_rv_action_func != NULL)
2691 smp_rv_action_func(smp_rv_func_arg);
2692 /* spin on exit rendezvous */
2693 atomic_add_int(&smp_rv_waiters[1], 1);
2694 while (smp_rv_waiters[1] < mp_ncpus)
2695 ;
2696 /* teardown function */
2697 if (smp_rv_teardown_func != NULL)
2698 smp_rv_teardown_func(smp_rv_func_arg);
2699 }
2700
2701 void
2702 smp_rendezvous(void (* setup_func)(void *),
2703 void (* action_func)(void *),
2704 void (* teardown_func)(void *),
2705 void *arg)
2706 {
2707 u_int efl;
2708
2709 /* obtain rendezvous lock */
2710 s_lock(&smp_rv_lock); /* XXX sleep here? NOWAIT flag? */
2711
2712 /* set static function pointers */
2713 smp_rv_setup_func = setup_func;
2714 smp_rv_action_func = action_func;
2715 smp_rv_teardown_func = teardown_func;
2716 smp_rv_func_arg = arg;
2717 smp_rv_waiters[0] = 0;
2718 smp_rv_waiters[1] = 0;
2719
2720 /* disable interrupts on this CPU, save interrupt status */
2721 efl = read_eflags();
2722 write_eflags(efl & ~PSL_I);
2723
2724 /* signal other processors, which will enter the IPI with interrupts off */
2725 all_but_self_ipi(XRENDEZVOUS_OFFSET);
2726
2727 /* call executor function */
2728 smp_rendezvous_action();
2729
2730 /* restore interrupt flag */
2731 write_eflags(efl);
2732
2733 /* release lock */
2734 s_unlock(&smp_rv_lock);
2735 }
Cache object: fbb8e145f9d0ba1f2cc8b1b052f2b098
|