1 /*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD$
26 */
27
28 #include "opt_cpu.h"
29 #include "opt_user_ldt.h"
30
31 #ifdef SMP
32 #include <machine/smptests.h>
33 #else
34 #error
35 #endif
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/bus.h>
40 #include <sys/kernel.h>
41 #include <sys/proc.h>
42 #include <sys/sysctl.h>
43 #include <sys/malloc.h>
44 #include <sys/memrange.h>
45 #ifdef BETTER_CLOCK
46 #include <sys/dkstat.h>
47 #endif
48 #include <sys/cons.h> /* cngetc() */
49
50 #include <vm/vm.h>
51 #include <vm/vm_param.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_extern.h>
55 #ifdef BETTER_CLOCK
56 #include <sys/lock.h>
57 #include <vm/vm_map.h>
58 #include <sys/user.h>
59 #ifdef GPROF
60 #include <sys/gmon.h>
61 #endif
62 #endif
63
64 #include <machine/smp.h>
65 #include <machine/apic.h>
66 #include <machine/atomic.h>
67 #include <machine/cpufunc.h>
68 #include <machine/mpapic.h>
69 #include <machine/psl.h>
70 #include <machine/segments.h>
71 #include <machine/smptests.h> /** TEST_DEFAULT_CONFIG, TEST_TEST1 */
72 #include <machine/tss.h>
73 #include <machine/specialreg.h>
74 #include <machine/globaldata.h>
75
76 #include <pci/pcivar.h>
77
78 #if defined(APIC_IO)
79 #include <machine/md_var.h> /* setidt() */
80 #include <i386/isa/icu.h> /* IPIs */
81 #include <i386/isa/intr_machdep.h> /* IPIs */
82 #endif /* APIC_IO */
83
84 #if defined(TEST_DEFAULT_CONFIG)
85 #define MPFPS_MPFB1 TEST_DEFAULT_CONFIG
86 #else
87 #define MPFPS_MPFB1 mpfps->mpfb1
88 #endif /* TEST_DEFAULT_CONFIG */
89
90 #define WARMBOOT_TARGET 0
91 #define WARMBOOT_OFF (KERNBASE + 0x0467)
92 #define WARMBOOT_SEG (KERNBASE + 0x0469)
93
94 #ifdef PC98
95 #define BIOS_BASE (0xe8000)
96 #define BIOS_SIZE (0x18000)
97 #else
98 #define BIOS_BASE (0xf0000)
99 #define BIOS_SIZE (0x10000)
100 #endif
101 #define BIOS_COUNT (BIOS_SIZE/4)
102
103 #define CMOS_REG (0x70)
104 #define CMOS_DATA (0x71)
105 #define BIOS_RESET (0x0f)
106 #define BIOS_WARM (0x0a)
107
108 #define PROCENTRY_FLAG_EN 0x01
109 #define PROCENTRY_FLAG_BP 0x02
110 #define IOAPICENTRY_FLAG_EN 0x01
111
112
113 /* MP Floating Pointer Structure */
114 typedef struct MPFPS {
115 char signature[4];
116 void *pap;
117 u_char length;
118 u_char spec_rev;
119 u_char checksum;
120 u_char mpfb1;
121 u_char mpfb2;
122 u_char mpfb3;
123 u_char mpfb4;
124 u_char mpfb5;
125 } *mpfps_t;
126
127 /* MP Configuration Table Header */
128 typedef struct MPCTH {
129 char signature[4];
130 u_short base_table_length;
131 u_char spec_rev;
132 u_char checksum;
133 u_char oem_id[8];
134 u_char product_id[12];
135 void *oem_table_pointer;
136 u_short oem_table_size;
137 u_short entry_count;
138 void *apic_address;
139 u_short extended_table_length;
140 u_char extended_table_checksum;
141 u_char reserved;
142 } *mpcth_t;
143
144
145 typedef struct PROCENTRY {
146 u_char type;
147 u_char apic_id;
148 u_char apic_version;
149 u_char cpu_flags;
150 u_long cpu_signature;
151 u_long feature_flags;
152 u_long reserved1;
153 u_long reserved2;
154 } *proc_entry_ptr;
155
156 typedef struct BUSENTRY {
157 u_char type;
158 u_char bus_id;
159 char bus_type[6];
160 } *bus_entry_ptr;
161
162 typedef struct IOAPICENTRY {
163 u_char type;
164 u_char apic_id;
165 u_char apic_version;
166 u_char apic_flags;
167 void *apic_address;
168 } *io_apic_entry_ptr;
169
170 typedef struct INTENTRY {
171 u_char type;
172 u_char int_type;
173 u_short int_flags;
174 u_char src_bus_id;
175 u_char src_bus_irq;
176 u_char dst_apic_id;
177 u_char dst_apic_int;
178 } *int_entry_ptr;
179
180 /* descriptions of MP basetable entries */
181 typedef struct BASETABLE_ENTRY {
182 u_char type;
183 u_char length;
184 char name[16];
185 } basetable_entry;
186
187 /*
188 * this code MUST be enabled here and in mpboot.s.
189 * it follows the very early stages of AP boot by placing values in CMOS ram.
190 * it NORMALLY will never be needed and thus the primitive method for enabling.
191 *
192 #define CHECK_POINTS
193 */
194
195 #if defined(CHECK_POINTS) && !defined(PC98)
196 #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
197 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
198
199 #define CHECK_INIT(D); \
200 CHECK_WRITE(0x34, (D)); \
201 CHECK_WRITE(0x35, (D)); \
202 CHECK_WRITE(0x36, (D)); \
203 CHECK_WRITE(0x37, (D)); \
204 CHECK_WRITE(0x38, (D)); \
205 CHECK_WRITE(0x39, (D));
206
207 #define CHECK_PRINT(S); \
208 printf("%s: %d, %d, %d, %d, %d, %d\n", \
209 (S), \
210 CHECK_READ(0x34), \
211 CHECK_READ(0x35), \
212 CHECK_READ(0x36), \
213 CHECK_READ(0x37), \
214 CHECK_READ(0x38), \
215 CHECK_READ(0x39));
216
217 #else /* CHECK_POINTS */
218
219 #define CHECK_INIT(D)
220 #define CHECK_PRINT(S)
221
222 #endif /* CHECK_POINTS */
223
224 /*
225 * Values to send to the POST hardware.
226 */
227 #define MP_BOOTADDRESS_POST 0x10
228 #define MP_PROBE_POST 0x11
229 #define MPTABLE_PASS1_POST 0x12
230
231 #define MP_START_POST 0x13
232 #define MP_ENABLE_POST 0x14
233 #define MPTABLE_PASS2_POST 0x15
234
235 #define START_ALL_APS_POST 0x16
236 #define INSTALL_AP_TRAMP_POST 0x17
237 #define START_AP_POST 0x18
238
239 #define MP_ANNOUNCE_POST 0x19
240
241 static int need_hyperthreading_fixup;
242 static u_int logical_cpus;
243 u_int logical_cpus_mask; /* bit mask of logical cpu's */
244
245 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
246 int current_postcode;
247
248 /** XXX FIXME: what system files declare these??? */
249 extern struct region_descriptor r_gdt, r_idt;
250
251 int bsp_apic_ready = 0; /* flags useability of BSP apic */
252 int mp_ncpus; /* # of CPUs, including BSP */
253 int mp_naps; /* # of Applications processors */
254 int mp_nbusses; /* # of busses */
255 int mp_napics; /* # of IO APICs */
256 int boot_cpu_id; /* designated BSP */
257 vm_offset_t cpu_apic_address;
258 vm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */
259 extern int nkpt;
260
261 u_int32_t cpu_apic_versions[MAXCPU];
262 u_int32_t *io_apic_versions;
263
264 #ifdef APIC_INTR_DIAGNOSTIC
265 int apic_itrace_enter[32];
266 int apic_itrace_tryisrlock[32];
267 int apic_itrace_gotisrlock[32];
268 int apic_itrace_active[32];
269 int apic_itrace_masked[32];
270 int apic_itrace_noisrlock[32];
271 int apic_itrace_masked2[32];
272 int apic_itrace_unmask[32];
273 int apic_itrace_noforward[32];
274 int apic_itrace_leave[32];
275 int apic_itrace_enter2[32];
276 int apic_itrace_doreti[32];
277 int apic_itrace_splz[32];
278 int apic_itrace_eoi[32];
279 #ifdef APIC_INTR_DIAGNOSTIC_IRQ
280 unsigned short apic_itrace_debugbuffer[32768];
281 int apic_itrace_debugbuffer_idx;
282 struct simplelock apic_itrace_debuglock;
283 #endif
284 #endif
285
286 #ifdef APIC_INTR_REORDER
287 struct {
288 volatile int *location;
289 int bit;
290 } apic_isrbit_location[32];
291 #endif
292
293 struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE];
294
295 /*
296 * APIC ID logical/physical mapping structures.
297 * We oversize these to simplify boot-time config.
298 */
299 int cpu_num_to_apic_id[NAPICID];
300 int io_num_to_apic_id[NAPICID];
301 int apic_id_to_logical[NAPICID];
302
303
304 /* Bitmap of all available CPUs */
305 u_int all_cpus;
306
307 /* AP uses this during bootstrap. Do not staticize. */
308 char *bootSTK;
309 static int bootAP;
310
311 /* Hotwire a 0->4MB V==P mapping */
312 extern pt_entry_t *KPTphys;
313
314 /* SMP page table page */
315 extern pt_entry_t *SMPpt;
316
317 struct pcb stoppcbs[MAXCPU];
318
319 int smp_started; /* has the system started? */
320
321 /*
322 * Local data and functions.
323 */
324
325 static int mp_capable;
326 static u_int boot_address;
327 static u_int base_memory;
328
329 static int picmode; /* 0: virtual wire mode, 1: PIC mode */
330 static mpfps_t mpfps;
331 static int search_for_sig(u_int32_t target, int count);
332 static void mp_enable(u_int boot_addr);
333
334 static void mptable_hyperthread_fixup(u_int id_mask);
335 static void mptable_pass1(void);
336 static int mptable_pass2(void);
337 static void default_mp_table(int type);
338 static void fix_mp_table(void);
339 static void setup_apic_irq_mapping(void);
340 static void init_locks(void);
341 static int start_all_aps(u_int boot_addr);
342 static void install_ap_tramp(u_int boot_addr);
343 static int start_ap(int logicalCpu, u_int boot_addr);
344 static int apic_int_is_bus_type(int intr, int bus_type);
345
346 static int hlt_cpus_mask;
347 static int hlt_logical_cpus = 1;
348 static u_int hyperthreading_cpus;
349 static u_int hyperthreading_cpus_mask;
350 static int hyperthreading_allowed;
351 static struct sysctl_ctx_list logical_cpu_clist;
352
353 /*
354 * Calculate usable address in base memory for AP trampoline code.
355 */
356 u_int
357 mp_bootaddress(u_int basemem)
358 {
359 POSTCODE(MP_BOOTADDRESS_POST);
360
361 base_memory = basemem * 1024; /* convert to bytes */
362
363 boot_address = base_memory & ~0xfff; /* round down to 4k boundary */
364 if ((base_memory - boot_address) < bootMP_size)
365 boot_address -= 4096; /* not enough, lower by 4k */
366
367 return boot_address;
368 }
369
370
371 /*
372 * Look for an Intel MP spec table (ie, SMP capable hardware).
373 */
374 int
375 mp_probe(void)
376 {
377 int x;
378 u_long segment;
379 u_int32_t target;
380
381 POSTCODE(MP_PROBE_POST);
382
383 /* see if EBDA exists */
384 if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
385 /* search first 1K of EBDA */
386 target = (u_int32_t) (segment << 4);
387 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
388 goto found;
389 } else {
390 /* last 1K of base memory, effective 'top of base' passed in */
391 target = (u_int32_t) (base_memory - 0x400);
392 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
393 goto found;
394 }
395
396 /* search the BIOS */
397 target = (u_int32_t) BIOS_BASE;
398 if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
399 goto found;
400
401 /* nothing found */
402 mpfps = (mpfps_t)0;
403 mp_capable = 0;
404 return 0;
405
406 found:
407 /* calculate needed resources */
408 mpfps = (mpfps_t)x;
409 mptable_pass1();
410
411 /* flag fact that we are running multiple processors */
412 mp_capable = 1;
413 return 1;
414 }
415
416
417 /*
418 * Startup the SMP processors.
419 */
420 void
421 mp_start(void)
422 {
423 POSTCODE(MP_START_POST);
424
425 /* look for MP capable motherboard */
426 if (mp_capable)
427 mp_enable(boot_address);
428 else
429 panic("MP hardware not found!");
430 }
431
432
433 /*
434 * Print various information about the SMP system hardware and setup.
435 */
436 void
437 mp_announce(void)
438 {
439 int x;
440
441 POSTCODE(MP_ANNOUNCE_POST);
442
443 printf("FreeBSD/SMP: Multiprocessor motherboard: %d CPUs\n", mp_ncpus);
444 printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
445 printf(", version: 0x%08x", cpu_apic_versions[0]);
446 printf(", at 0x%08x\n", cpu_apic_address);
447 for (x = 1; x <= mp_naps; ++x) {
448 printf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x));
449 printf(", version: 0x%08x", cpu_apic_versions[x]);
450 printf(", at 0x%08x\n", cpu_apic_address);
451 }
452
453 #if defined(APIC_IO)
454 for (x = 0; x < mp_napics; ++x) {
455 printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
456 printf(", version: 0x%08x", io_apic_versions[x]);
457 printf(", at 0x%08x\n", io_apic_address[x]);
458 }
459 #else
460 printf(" Warning: APIC I/O disabled\n");
461 #endif /* APIC_IO */
462 }
463
464 /*
465 * AP cpu's call this to sync up protected mode.
466 */
467 void
468 init_secondary(void)
469 {
470 int gsel_tss;
471 int x, myid = bootAP;
472 u_int cr0;
473
474 gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
475 gdt_segs[GPROC0_SEL].ssd_base =
476 (int) &SMP_prvspace[myid].globaldata.gd_common_tss;
477 SMP_prvspace[myid].globaldata.gd_prvspace = &SMP_prvspace[myid];
478
479 for (x = 0; x < NGDT; x++) {
480 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
481 }
482
483 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
484 r_gdt.rd_base = (int) &gdt[myid * NGDT];
485 lgdt(&r_gdt); /* does magic intra-segment return */
486
487 lidt(&r_idt);
488
489 lldt(_default_ldt);
490 #ifdef USER_LDT
491 currentldt = _default_ldt;
492 #endif
493
494 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
495 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
496 common_tss.tss_esp0 = 0; /* not used until after switch */
497 common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
498 common_tss.tss_ioopt = (sizeof common_tss) << 16;
499 tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd;
500 common_tssd = *tss_gdt;
501 ltr(gsel_tss);
502
503 /*
504 * Set to a known state:
505 * Set by mpboot.s: CR0_PG, CR0_PE
506 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
507 */
508 cr0 = rcr0();
509 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
510 load_cr0(cr0);
511
512 pmap_set_opt();
513 }
514
515
516 #if defined(APIC_IO)
517 /*
518 * Final configuration of the BSP's local APIC:
519 * - disable 'pic mode'.
520 * - disable 'virtual wire mode'.
521 * - enable NMI.
522 */
523 void
524 bsp_apic_configure(void)
525 {
526 u_char byte;
527 u_int32_t temp;
528
529 /* leave 'pic mode' if necessary */
530 if (picmode) {
531 outb(0x22, 0x70); /* select IMCR */
532 byte = inb(0x23); /* current contents */
533 byte |= 0x01; /* mask external INTR */
534 outb(0x23, byte); /* disconnect 8259s/NMI */
535 }
536
537 /* mask lint0 (the 8259 'virtual wire' connection) */
538 temp = lapic.lvt_lint0;
539 temp |= APIC_LVT_M; /* set the mask */
540 lapic.lvt_lint0 = temp;
541
542 /* setup lint1 to handle NMI */
543 temp = lapic.lvt_lint1;
544 temp &= ~APIC_LVT_M; /* clear the mask */
545 lapic.lvt_lint1 = temp;
546
547 if (bootverbose)
548 apic_dump("bsp_apic_configure()");
549 }
550 #endif /* APIC_IO */
551
552
553 /*******************************************************************
554 * local functions and data
555 */
556
557 typedef struct INTDATA {
558 u_char int_type;
559 u_short int_flags;
560 u_char src_bus_id;
561 u_char src_bus_irq;
562 u_char dst_apic_id;
563 u_char dst_apic_int;
564 u_char int_vector;
565 } io_int, local_int;
566
567 /* the IO INT data, one entry per possible APIC INTerrupt */
568 static io_int *io_apic_ints;
569
570 static int nintrs;
571
572 /*
573 * start the SMP system
574 */
575 static void
576 mp_enable(u_int boot_addr)
577 {
578 int x;
579 #if defined(APIC_IO)
580 int apic;
581 u_int ux;
582 #endif /* APIC_IO */
583
584 POSTCODE(MP_ENABLE_POST);
585
586 /* turn on 4MB of V == P addressing so we can get to MP table */
587 *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
588 invltlb();
589
590 /* examine the MP table for needed info, uses physical addresses */
591 x = mptable_pass2();
592
593 *(int *)PTD = 0;
594 invltlb();
595
596 /* can't process default configs till the CPU APIC is pmapped */
597 if (x)
598 default_mp_table(x);
599
600 /* initialize all SMP locks */
601 init_locks();
602
603 /* post scan cleanup */
604 fix_mp_table();
605 setup_apic_irq_mapping();
606
607 #if defined(APIC_IO)
608
609 /* fill the LOGICAL io_apic_versions table */
610 for (apic = 0; apic < mp_napics; ++apic) {
611 ux = io_apic_read(apic, IOAPIC_VER);
612 io_apic_versions[apic] = ux;
613 if (ux == 0xffffffff) {
614 int i;
615
616 for (i = 0; i < nintrs; i++)
617 if (io_apic_ints[i].dst_apic_id ==
618 IO_TO_ID(apic))
619 panic("Missing IO APIC");
620 printf("Skipping broken IO APIC #%d\n", apic);
621 } else
622 io_apic_set_id(apic, IO_TO_ID(apic));
623 }
624
625 /* program each IO APIC in the system */
626 for (apic = 0; apic < mp_napics; ++apic)
627 if (io_apic_versions[apic] != 0xffffffff &&
628 io_apic_setup(apic) < 0)
629 panic("IO APIC setup failure");
630
631 /* install a 'Spurious INTerrupt' vector */
632 setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
633 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
634
635 /* install an inter-CPU IPI for TLB invalidation */
636 setidt(XINVLTLB_OFFSET, Xinvltlb,
637 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
638
639 #ifdef BETTER_CLOCK
640 /* install an inter-CPU IPI for reading processor state */
641 setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate,
642 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
643 #endif
644
645 /* install an inter-CPU IPI for all-CPU rendezvous */
646 setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
647 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
648
649 /* install an inter-CPU IPI for forcing an additional software trap */
650 setidt(XCPUAST_OFFSET, Xcpuast,
651 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
652
653 /* install an inter-CPU IPI for interrupt forwarding */
654 setidt(XFORWARD_IRQ_OFFSET, Xforward_irq,
655 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
656
657 /* install an inter-CPU IPI for CPU stop/restart */
658 setidt(XCPUSTOP_OFFSET, Xcpustop,
659 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
660
661 #if defined(TEST_TEST1)
662 /* install a "fake hardware INTerrupt" vector */
663 setidt(XTEST1_OFFSET, Xtest1,
664 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
665 #endif /** TEST_TEST1 */
666
667 #endif /* APIC_IO */
668
669 /* start each Application Processor */
670 start_all_aps(boot_addr);
671 }
672
673
674 /*
675 * look for the MP spec signature
676 */
677
678 /* string defined by the Intel MP Spec as identifying the MP table */
679 #define MP_SIG 0x5f504d5f /* _MP_ */
680 #define NEXT(X) ((X) += 4)
681 static int
682 search_for_sig(u_int32_t target, int count)
683 {
684 int x;
685 u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
686
687 for (x = 0; x < count; NEXT(x))
688 if (addr[x] == MP_SIG)
689 /* make array index a byte index */
690 return (target + (x * sizeof(u_int32_t)));
691
692 return -1;
693 }
694
695
696 static basetable_entry basetable_entry_types[] =
697 {
698 {0, 20, "Processor"},
699 {1, 8, "Bus"},
700 {2, 8, "I/O APIC"},
701 {3, 8, "I/O INT"},
702 {4, 8, "Local INT"}
703 };
704
705 typedef struct BUSDATA {
706 u_char bus_id;
707 enum busTypes bus_type;
708 } bus_datum;
709
710 typedef struct BUSTYPENAME {
711 u_char type;
712 char name[7];
713 } bus_type_name;
714
715 static bus_type_name bus_type_table[] =
716 {
717 {CBUS, "CBUS"},
718 {CBUSII, "CBUSII"},
719 {EISA, "EISA"},
720 {MCA, "MCA"},
721 {UNKNOWN_BUSTYPE, "---"},
722 {ISA, "ISA"},
723 {MCA, "MCA"},
724 {UNKNOWN_BUSTYPE, "---"},
725 {UNKNOWN_BUSTYPE, "---"},
726 {UNKNOWN_BUSTYPE, "---"},
727 {UNKNOWN_BUSTYPE, "---"},
728 {UNKNOWN_BUSTYPE, "---"},
729 {PCI, "PCI"},
730 {UNKNOWN_BUSTYPE, "---"},
731 {UNKNOWN_BUSTYPE, "---"},
732 {UNKNOWN_BUSTYPE, "---"},
733 {UNKNOWN_BUSTYPE, "---"},
734 {XPRESS, "XPRESS"},
735 {UNKNOWN_BUSTYPE, "---"}
736 };
737 /* from MP spec v1.4, table 5-1 */
738 static int default_data[7][5] =
739 {
740 /* nbus, id0, type0, id1, type1 */
741 {1, 0, ISA, 255, 255},
742 {1, 0, EISA, 255, 255},
743 {1, 0, EISA, 255, 255},
744 {1, 0, MCA, 255, 255},
745 {2, 0, ISA, 1, PCI},
746 {2, 0, EISA, 1, PCI},
747 {2, 0, MCA, 1, PCI}
748 };
749
750
751 /* the bus data */
752 static bus_datum *bus_data;
753
754 static int processor_entry __P((proc_entry_ptr entry, int cpu));
755 static int bus_entry __P((bus_entry_ptr entry, int bus));
756 static int io_apic_entry __P((io_apic_entry_ptr entry, int apic));
757 static int int_entry __P((int_entry_ptr entry, int intr));
758 static int lookup_bus_type __P((char *name));
759
760
761 /*
762 * 1st pass on motherboard's Intel MP specification table.
763 *
764 * initializes:
765 * mp_ncpus = 1
766 *
767 * determines:
768 * cpu_apic_address (common to all CPUs)
769 * io_apic_address[N]
770 * mp_naps
771 * mp_nbusses
772 * mp_napics
773 * nintrs
774 */
775 static void
776 mptable_pass1(void)
777 {
778 int x;
779 mpcth_t cth;
780 int totalSize;
781 void* position;
782 int count;
783 int type;
784 u_int id_mask;
785
786 POSTCODE(MPTABLE_PASS1_POST);
787
788 /* clear various tables */
789 for (x = 0; x < NAPICID; ++x) {
790 io_apic_address[x] = ~0; /* IO APIC address table */
791 }
792
793 /* init everything to empty */
794 mp_naps = 0;
795 mp_nbusses = 0;
796 mp_napics = 0;
797 nintrs = 0;
798 id_mask = 0;
799
800 /* check for use of 'default' configuration */
801 if (MPFPS_MPFB1 != 0) {
802 /* use default addresses */
803 cpu_apic_address = DEFAULT_APIC_BASE;
804 io_apic_address[0] = DEFAULT_IO_APIC_BASE;
805
806 /* fill in with defaults */
807 mp_naps = 2; /* includes BSP */
808 mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
809 #if defined(APIC_IO)
810 mp_napics = 1;
811 nintrs = 16;
812 #endif /* APIC_IO */
813 }
814 else {
815 if ((cth = mpfps->pap) == 0)
816 panic("MP Configuration Table Header MISSING!");
817
818 cpu_apic_address = (vm_offset_t) cth->apic_address;
819
820 /* walk the table, recording info of interest */
821 totalSize = cth->base_table_length - sizeof(struct MPCTH);
822 position = (u_char *) cth + sizeof(struct MPCTH);
823 count = cth->entry_count;
824
825 while (count--) {
826 switch (type = *(u_char *) position) {
827 case 0: /* processor_entry */
828 if (((proc_entry_ptr)position)->cpu_flags
829 & PROCENTRY_FLAG_EN) {
830 ++mp_naps;
831 id_mask |= 1 <<
832 ((proc_entry_ptr)position)->apic_id;
833 }
834 break;
835 case 1: /* bus_entry */
836 ++mp_nbusses;
837 break;
838 case 2: /* io_apic_entry */
839 if (((io_apic_entry_ptr)position)->apic_flags
840 & IOAPICENTRY_FLAG_EN)
841 io_apic_address[mp_napics++] =
842 (vm_offset_t)((io_apic_entry_ptr)
843 position)->apic_address;
844 break;
845 case 3: /* int_entry */
846 ++nintrs;
847 break;
848 case 4: /* int_entry */
849 break;
850 default:
851 panic("mpfps Base Table HOSED!");
852 /* NOTREACHED */
853 }
854
855 totalSize -= basetable_entry_types[type].length;
856 (u_char*)position += basetable_entry_types[type].length;
857 }
858 }
859
860 /* qualify the numbers */
861 if (mp_naps > MAXCPU) {
862 printf("Warning: only using %d of %d available CPUs!\n",
863 MAXCPU, mp_naps);
864 mp_naps = MAXCPU;
865 }
866
867 /* See if we need to fixup HT logical CPUs. */
868 mptable_hyperthread_fixup(id_mask);
869
870 /*
871 * Count the BSP.
872 * This is also used as a counter while starting the APs.
873 */
874 mp_ncpus = 1;
875
876 --mp_naps; /* subtract the BSP */
877 }
878
879
880 /*
881 * 2nd pass on motherboard's Intel MP specification table.
882 *
883 * sets:
884 * boot_cpu_id
885 * ID_TO_IO(N), phy APIC ID to log CPU/IO table
886 * CPU_TO_ID(N), logical CPU to APIC ID table
887 * IO_TO_ID(N), logical IO to APIC ID table
888 * bus_data[N]
889 * io_apic_ints[N]
890 */
891 static int
892 mptable_pass2(void)
893 {
894 struct PROCENTRY proc;
895 int x;
896 mpcth_t cth;
897 int totalSize;
898 void* position;
899 int count;
900 int type;
901 int apic, bus, cpu, intr;
902 int i, j;
903 int pgeflag;
904
905 POSTCODE(MPTABLE_PASS2_POST);
906
907 /* Initialize fake proc entry for use with HT fixup. */
908 bzero(&proc, sizeof(proc));
909 proc.type = 0;
910 proc.cpu_flags = PROCENTRY_FLAG_EN;
911
912 pgeflag = 0; /* XXX - Not used under SMP yet. */
913
914 MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
915 M_DEVBUF, M_WAITOK);
916 MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
917 M_DEVBUF, M_WAITOK);
918 MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
919 M_DEVBUF, M_WAITOK);
920 MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
921 M_DEVBUF, M_WAITOK);
922
923 bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
924
925 for (i = 0; i < mp_napics; i++) {
926 for (j = 0; j < mp_napics; j++) {
927 /* same page frame as a previous IO apic? */
928 if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
929 (io_apic_address[i] & PG_FRAME)) {
930 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
931 + (NPTEPG-2-j) * PAGE_SIZE
932 + (io_apic_address[i] & PAGE_MASK));
933 break;
934 }
935 /* use this slot if available */
936 if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
937 SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
938 pgeflag | (io_apic_address[i] & PG_FRAME));
939 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
940 + (NPTEPG-2-j) * PAGE_SIZE
941 + (io_apic_address[i] & PAGE_MASK));
942 break;
943 }
944 }
945 }
946
947 /* clear various tables */
948 for (x = 0; x < NAPICID; ++x) {
949 ID_TO_IO(x) = -1; /* phy APIC ID to log CPU/IO table */
950 CPU_TO_ID(x) = -1; /* logical CPU to APIC ID table */
951 IO_TO_ID(x) = -1; /* logical IO to APIC ID table */
952 }
953
954 /* clear bus data table */
955 for (x = 0; x < mp_nbusses; ++x)
956 bus_data[x].bus_id = 0xff;
957
958 /* clear IO APIC INT table */
959 for (x = 0; x < (nintrs + 1); ++x) {
960 io_apic_ints[x].int_type = 0xff;
961 io_apic_ints[x].int_vector = 0xff;
962 }
963
964 /* setup the cpu/apic mapping arrays */
965 boot_cpu_id = -1;
966
967 /* record whether PIC or virtual-wire mode */
968 picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
969
970 /* check for use of 'default' configuration */
971 if (MPFPS_MPFB1 != 0)
972 return MPFPS_MPFB1; /* return default configuration type */
973
974 if ((cth = mpfps->pap) == 0)
975 panic("MP Configuration Table Header MISSING!");
976
977 /* walk the table, recording info of interest */
978 totalSize = cth->base_table_length - sizeof(struct MPCTH);
979 position = (u_char *) cth + sizeof(struct MPCTH);
980 count = cth->entry_count;
981 apic = bus = intr = 0;
982 cpu = 1; /* pre-count the BSP */
983
984 while (count--) {
985 switch (type = *(u_char *) position) {
986 case 0:
987 if (processor_entry(position, cpu))
988 ++cpu;
989
990 if (need_hyperthreading_fixup) {
991 /*
992 * Create fake mptable processor entries
993 * and feed them to processor_entry() to
994 * enumerate the logical CPUs.
995 */
996 proc.apic_id = ((proc_entry_ptr)position)->apic_id;
997 for (i = 1; i < logical_cpus; i++) {
998 proc.apic_id++;
999 (void)processor_entry(&proc, cpu);
1000 logical_cpus_mask |= (1 << cpu);
1001 if (hyperthreading_cpus > 1 &&
1002 proc.apic_id % hyperthreading_cpus != 0)
1003 hyperthreading_cpus_mask |= (1 << cpu);
1004 cpu++;
1005 }
1006 } else if (logical_cpus != 0) {
1007 u_int id = ((proc_entry_ptr)position)->apic_id;
1008
1009 /*
1010 * If this is an already-enumerated logical
1011 * CPU, add it to the bitmap.
1012 */
1013 if (id % logical_cpus != 0)
1014 logical_cpus_mask |= (1 << ID_TO_CPU(id));
1015 if (hyperthreading_cpus > 1 &&
1016 id % hyperthreading_cpus != 0)
1017 hyperthreading_cpus_mask |= (1 << ID_TO_CPU(id));
1018 }
1019 break;
1020 case 1:
1021 if (bus_entry(position, bus))
1022 ++bus;
1023 break;
1024 case 2:
1025 if (io_apic_entry(position, apic))
1026 ++apic;
1027 break;
1028 case 3:
1029 if (int_entry(position, intr))
1030 ++intr;
1031 break;
1032 case 4:
1033 /* int_entry(position); */
1034 break;
1035 default:
1036 panic("mpfps Base Table HOSED!");
1037 /* NOTREACHED */
1038 }
1039
1040 totalSize -= basetable_entry_types[type].length;
1041 (u_char *) position += basetable_entry_types[type].length;
1042 }
1043
1044 if (boot_cpu_id == -1)
1045 panic("NO BSP found!");
1046
1047 /* report fact that its NOT a default configuration */
1048 return 0;
1049 }
1050
1051 /*
1052 * Check if we should perform a hyperthreading "fix-up" to
1053 * enumerate any logical CPU's that aren't already listed
1054 * in the table.
1055 *
1056 * XXX: We assume that all of the physical CPUs in the
1057 * system have the same number of logical CPUs.
1058 *
1059 * XXX: We assume that APIC ID's are allocated such that
1060 * the APIC ID's for a physical processor are aligned
1061 * with the number of logical CPU's in the processor.
1062 */
1063 static void
1064 mptable_hyperthread_fixup(u_int id_mask)
1065 {
1066 u_int i, id;
1067 u_int threads_per_cache, p[4];
1068
1069 /* Nothing to do if there is no HTT support. */
1070 if ((cpu_feature & CPUID_HTT) == 0)
1071 return;
1072 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
1073 if (logical_cpus <= 1)
1074 return;
1075
1076 /*
1077 * Work out if hyperthreading is *really* enabled. This
1078 * is made really ugly by the fact that processors lie: Dual
1079 * core processors claim to be hyperthreaded even when they're
1080 * not, presumably because they want to be treated the same
1081 * way as HTT with respect to per-cpu software licensing.
1082 * At the time of writing (May 12, 2005) the only hyperthreaded
1083 * cpus are from Intel, and Intel's dual-core processors can be
1084 * identified via the "deterministic cache parameters" cpuid
1085 * calls.
1086 */
1087 /*
1088 * First determine if this is an Intel processor which claims
1089 * to have hyperthreading support.
1090 */
1091 if ((cpu_feature & CPUID_HTT) &&
1092 (strcmp(cpu_vendor, "GenuineIntel") == 0)) {
1093 /*
1094 * If the "deterministic cache parameters" cpuid calls
1095 * are available, use them.
1096 */
1097 if (cpu_high >= 4) {
1098 /* Ask the processor about the L1 cache. */
1099 for (i = 0; i < 1; i++) {
1100 cpuid_count(4, i, p);
1101 threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1;
1102 if (hyperthreading_cpus < threads_per_cache)
1103 hyperthreading_cpus = threads_per_cache;
1104 if ((p[0] & 0x1f) == 0)
1105 break;
1106 }
1107 }
1108
1109 /*
1110 * If the deterministic cache parameters are not
1111 * available, or if no caches were reported to exist,
1112 * just accept what the HTT flag indicated.
1113 */
1114 if (hyperthreading_cpus == 0)
1115 hyperthreading_cpus = logical_cpus;
1116 }
1117
1118 /*
1119 * For each APIC ID of a CPU that is set in the mask,
1120 * scan the other candidate APIC ID's for this
1121 * physical processor. If any of those ID's are
1122 * already in the table, then kill the fixup.
1123 */
1124 for (id = 0; id <= MAXCPU; id++) {
1125 if ((id_mask & 1 << id) == 0)
1126 continue;
1127 /* First, make sure we are on a logical_cpus boundary. */
1128 if (id % logical_cpus != 0)
1129 return;
1130 for (i = id + 1; i < id + logical_cpus; i++)
1131 if ((id_mask & 1 << i) != 0)
1132 return;
1133 }
1134
1135 /*
1136 * Ok, the ID's checked out, so enable the fixup. We have to fixup
1137 * mp_naps right now.
1138 */
1139 need_hyperthreading_fixup = 1;
1140 mp_naps *= logical_cpus;
1141 }
1142
1143 void
1144 assign_apic_irq(int apic, int intpin, int irq)
1145 {
1146 int x;
1147
1148 if (int_to_apicintpin[irq].ioapic != -1)
1149 panic("assign_apic_irq: inconsistent table");
1150
1151 int_to_apicintpin[irq].ioapic = apic;
1152 int_to_apicintpin[irq].int_pin = intpin;
1153 int_to_apicintpin[irq].apic_address = ioapic[apic];
1154 int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
1155
1156 for (x = 0; x < nintrs; x++) {
1157 if ((io_apic_ints[x].int_type == 0 ||
1158 io_apic_ints[x].int_type == 3) &&
1159 io_apic_ints[x].int_vector == 0xff &&
1160 io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
1161 io_apic_ints[x].dst_apic_int == intpin)
1162 io_apic_ints[x].int_vector = irq;
1163 }
1164 }
1165
1166 void
1167 revoke_apic_irq(int irq)
1168 {
1169 int x;
1170 int oldapic;
1171 int oldintpin;
1172
1173 if (int_to_apicintpin[irq].ioapic == -1)
1174 panic("revoke_apic_irq: inconsistent table");
1175
1176 oldapic = int_to_apicintpin[irq].ioapic;
1177 oldintpin = int_to_apicintpin[irq].int_pin;
1178
1179 int_to_apicintpin[irq].ioapic = -1;
1180 int_to_apicintpin[irq].int_pin = 0;
1181 int_to_apicintpin[irq].apic_address = NULL;
1182 int_to_apicintpin[irq].redirindex = 0;
1183
1184 for (x = 0; x < nintrs; x++) {
1185 if ((io_apic_ints[x].int_type == 0 ||
1186 io_apic_ints[x].int_type == 3) &&
1187 io_apic_ints[x].int_vector != 0xff &&
1188 io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
1189 io_apic_ints[x].dst_apic_int == oldintpin)
1190 io_apic_ints[x].int_vector = 0xff;
1191 }
1192 }
1193
1194
1195 static void
1196 allocate_apic_irq(int intr)
1197 {
1198 int apic;
1199 int intpin;
1200 int irq;
1201
1202 if (io_apic_ints[intr].int_vector != 0xff)
1203 return; /* Interrupt handler already assigned */
1204
1205 if (io_apic_ints[intr].int_type != 0 &&
1206 (io_apic_ints[intr].int_type != 3 ||
1207 (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
1208 io_apic_ints[intr].dst_apic_int == 0)))
1209 return; /* Not INT or ExtInt on != (0, 0) */
1210
1211 irq = 0;
1212 while (irq < APIC_INTMAPSIZE &&
1213 int_to_apicintpin[irq].ioapic != -1)
1214 irq++;
1215
1216 if (irq >= APIC_INTMAPSIZE)
1217 return; /* No free interrupt handlers */
1218
1219 apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
1220 intpin = io_apic_ints[intr].dst_apic_int;
1221
1222 assign_apic_irq(apic, intpin, irq);
1223 io_apic_setup_intpin(apic, intpin);
1224 }
1225
1226
1227 static void
1228 swap_apic_id(int apic, int oldid, int newid)
1229 {
1230 int x;
1231 int oapic;
1232
1233
1234 if (oldid == newid)
1235 return; /* Nothing to do */
1236
1237 printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
1238 apic, oldid, newid);
1239
1240 /* Swap physical APIC IDs in interrupt entries */
1241 for (x = 0; x < nintrs; x++) {
1242 if (io_apic_ints[x].dst_apic_id == oldid)
1243 io_apic_ints[x].dst_apic_id = newid;
1244 else if (io_apic_ints[x].dst_apic_id == newid)
1245 io_apic_ints[x].dst_apic_id = oldid;
1246 }
1247
1248 /* Swap physical APIC IDs in IO_TO_ID mappings */
1249 for (oapic = 0; oapic < mp_napics; oapic++)
1250 if (IO_TO_ID(oapic) == newid)
1251 break;
1252
1253 if (oapic < mp_napics) {
1254 printf("Changing APIC ID for IO APIC #%d from "
1255 "%d to %d in MP table\n",
1256 oapic, newid, oldid);
1257 IO_TO_ID(oapic) = oldid;
1258 }
1259 IO_TO_ID(apic) = newid;
1260 }
1261
1262
1263 static void
1264 fix_id_to_io_mapping(void)
1265 {
1266 int x;
1267
1268 for (x = 0; x < NAPICID; x++)
1269 ID_TO_IO(x) = -1;
1270
1271 for (x = 0; x <= mp_naps; x++)
1272 if (CPU_TO_ID(x) < NAPICID)
1273 ID_TO_IO(CPU_TO_ID(x)) = x;
1274
1275 for (x = 0; x < mp_napics; x++)
1276 if (IO_TO_ID(x) < NAPICID)
1277 ID_TO_IO(IO_TO_ID(x)) = x;
1278 }
1279
1280
1281 static int
1282 first_free_apic_id(void)
1283 {
1284 int freeid, x;
1285
1286 for (freeid = 0; freeid < NAPICID; freeid++) {
1287 for (x = 0; x <= mp_naps; x++)
1288 if (CPU_TO_ID(x) == freeid)
1289 break;
1290 if (x <= mp_naps)
1291 continue;
1292 for (x = 0; x < mp_napics; x++)
1293 if (IO_TO_ID(x) == freeid)
1294 break;
1295 if (x < mp_napics)
1296 continue;
1297 return freeid;
1298 }
1299 return freeid;
1300 }
1301
1302
1303 static int
1304 io_apic_id_acceptable(int apic, int id)
1305 {
1306 int cpu; /* Logical CPU number */
1307 int oapic; /* Logical IO APIC number for other IO APIC */
1308
1309 if (id >= NAPICID)
1310 return 0; /* Out of range */
1311
1312 for (cpu = 0; cpu <= mp_naps; cpu++)
1313 if (CPU_TO_ID(cpu) == id)
1314 return 0; /* Conflict with CPU */
1315
1316 for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
1317 if (IO_TO_ID(oapic) == id)
1318 return 0; /* Conflict with other APIC */
1319
1320 return 1; /* ID is acceptable for IO APIC */
1321 }
1322
1323
1324 /*
1325 * parse an Intel MP specification table
1326 */
1327 static void
1328 fix_mp_table(void)
1329 {
1330 int x;
1331 int id;
1332 int bus_0 = 0; /* Stop GCC warning */
1333 int bus_pci = 0; /* Stop GCC warning */
1334 int num_pci_bus;
1335 int apic; /* IO APIC unit number */
1336 int freeid; /* Free physical APIC ID */
1337 int physid; /* Current physical IO APIC ID */
1338
1339 /*
1340 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1341 * did it wrong. The MP spec says that when more than 1 PCI bus
1342 * exists the BIOS must begin with bus entries for the PCI bus and use
1343 * actual PCI bus numbering. This implies that when only 1 PCI bus
1344 * exists the BIOS can choose to ignore this ordering, and indeed many
1345 * MP motherboards do ignore it. This causes a problem when the PCI
1346 * sub-system makes requests of the MP sub-system based on PCI bus
1347 * numbers. So here we look for the situation and renumber the
1348 * busses and associated INTs in an effort to "make it right".
1349 */
1350
1351 /* find bus 0, PCI bus, count the number of PCI busses */
1352 for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1353 if (bus_data[x].bus_id == 0) {
1354 bus_0 = x;
1355 }
1356 if (bus_data[x].bus_type == PCI) {
1357 ++num_pci_bus;
1358 bus_pci = x;
1359 }
1360 }
1361 /*
1362 * bus_0 == slot of bus with ID of 0
1363 * bus_pci == slot of last PCI bus encountered
1364 */
1365
1366 /* check the 1 PCI bus case for sanity */
1367 /* if it is number 0 all is well */
1368 if (num_pci_bus == 1 &&
1369 bus_data[bus_pci].bus_id != 0) {
1370
1371 /* mis-numbered, swap with whichever bus uses slot 0 */
1372
1373 /* swap the bus entry types */
1374 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1375 bus_data[bus_0].bus_type = PCI;
1376
1377 /* swap each relavant INTerrupt entry */
1378 id = bus_data[bus_pci].bus_id;
1379 for (x = 0; x < nintrs; ++x) {
1380 if (io_apic_ints[x].src_bus_id == id) {
1381 io_apic_ints[x].src_bus_id = 0;
1382 }
1383 else if (io_apic_ints[x].src_bus_id == 0) {
1384 io_apic_ints[x].src_bus_id = id;
1385 }
1386 }
1387 }
1388
1389 /* Assign IO APIC IDs.
1390 *
1391 * First try the existing ID. If a conflict is detected, try
1392 * the ID in the MP table. If a conflict is still detected, find
1393 * a free id.
1394 *
1395 * We cannot use the ID_TO_IO table before all conflicts has been
1396 * resolved and the table has been corrected.
1397 */
1398 for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
1399
1400 /* First try to use the value set by the BIOS */
1401 physid = io_apic_get_id(apic);
1402 if (io_apic_id_acceptable(apic, physid)) {
1403 if (IO_TO_ID(apic) != physid)
1404 swap_apic_id(apic, IO_TO_ID(apic), physid);
1405 continue;
1406 }
1407
1408 /* Then check if the value in the MP table is acceptable */
1409 if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
1410 continue;
1411
1412 /* Last resort, find a free APIC ID and use it */
1413 freeid = first_free_apic_id();
1414 if (freeid >= NAPICID)
1415 panic("No free physical APIC IDs found");
1416
1417 if (io_apic_id_acceptable(apic, freeid)) {
1418 swap_apic_id(apic, IO_TO_ID(apic), freeid);
1419 continue;
1420 }
1421 panic("Free physical APIC ID not usable");
1422 }
1423 fix_id_to_io_mapping();
1424
1425 /* detect and fix broken Compaq MP table */
1426 if (apic_int_type(0, 0) == -1) {
1427 printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
1428 io_apic_ints[nintrs].int_type = 3; /* ExtInt */
1429 io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */
1430 /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
1431 io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
1432 io_apic_ints[nintrs].dst_apic_int = 0; /* Pin 0 */
1433 nintrs++;
1434 } else if (apic_int_type(0, 0) == 0) {
1435 printf("APIC_IO: MP table broken: ExtINT entry corrupt!\n");
1436 for (x = 0; x < nintrs; ++x)
1437 if ((0 == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1438 (0 == io_apic_ints[x].dst_apic_int)) {
1439 io_apic_ints[x].int_type = 3;
1440 io_apic_ints[x].int_vector = 0xff;
1441 break;
1442 }
1443 }
1444 }
1445
1446
1447 /* Assign low level interrupt handlers */
1448 static void
1449 setup_apic_irq_mapping(void)
1450 {
1451 int x;
1452 int int_vector;
1453
1454 /* Clear array */
1455 for (x = 0; x < APIC_INTMAPSIZE; x++) {
1456 int_to_apicintpin[x].ioapic = -1;
1457 int_to_apicintpin[x].int_pin = 0;
1458 int_to_apicintpin[x].apic_address = NULL;
1459 int_to_apicintpin[x].redirindex = 0;
1460 }
1461
1462 /* First assign ISA/EISA interrupts */
1463 for (x = 0; x < nintrs; x++) {
1464 int_vector = io_apic_ints[x].src_bus_irq;
1465 if (int_vector < APIC_INTMAPSIZE &&
1466 io_apic_ints[x].int_vector == 0xff &&
1467 int_to_apicintpin[int_vector].ioapic == -1 &&
1468 (apic_int_is_bus_type(x, ISA) ||
1469 apic_int_is_bus_type(x, EISA)) &&
1470 io_apic_ints[x].int_type == 0) {
1471 assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1472 io_apic_ints[x].dst_apic_int,
1473 int_vector);
1474 }
1475 }
1476
1477 /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
1478 for (x = 0; x < nintrs; x++) {
1479 if (io_apic_ints[x].dst_apic_int == 0 &&
1480 io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1481 io_apic_ints[x].int_vector == 0xff &&
1482 int_to_apicintpin[0].ioapic == -1 &&
1483 io_apic_ints[x].int_type == 3) {
1484 assign_apic_irq(0, 0, 0);
1485 break;
1486 }
1487 }
1488 /* PCI interrupt assignment is deferred */
1489 }
1490
1491
1492 static int
1493 processor_entry(proc_entry_ptr entry, int cpu)
1494 {
1495 /* check for usability */
1496 if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
1497 return 0;
1498
1499 if(entry->apic_id >= NAPICID)
1500 panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
1501 /* check for BSP flag */
1502 if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
1503 boot_cpu_id = entry->apic_id;
1504 CPU_TO_ID(0) = entry->apic_id;
1505 ID_TO_CPU(entry->apic_id) = 0;
1506 return 0; /* its already been counted */
1507 }
1508
1509 /* add another AP to list, if less than max number of CPUs */
1510 else if (cpu < MAXCPU) {
1511 CPU_TO_ID(cpu) = entry->apic_id;
1512 ID_TO_CPU(entry->apic_id) = cpu;
1513 return 1;
1514 }
1515
1516 return 0;
1517 }
1518
1519
1520 static int
1521 bus_entry(bus_entry_ptr entry, int bus)
1522 {
1523 int x;
1524 char c, name[8];
1525
1526 /* encode the name into an index */
1527 for (x = 0; x < 6; ++x) {
1528 if ((c = entry->bus_type[x]) == ' ')
1529 break;
1530 name[x] = c;
1531 }
1532 name[x] = '\0';
1533
1534 if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1535 panic("unknown bus type: '%s'", name);
1536
1537 bus_data[bus].bus_id = entry->bus_id;
1538 bus_data[bus].bus_type = x;
1539
1540 return 1;
1541 }
1542
1543
1544 static int
1545 io_apic_entry(io_apic_entry_ptr entry, int apic)
1546 {
1547 if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1548 return 0;
1549
1550 IO_TO_ID(apic) = entry->apic_id;
1551 if (entry->apic_id < NAPICID)
1552 ID_TO_IO(entry->apic_id) = apic;
1553
1554 return 1;
1555 }
1556
1557
1558 static int
1559 lookup_bus_type(char *name)
1560 {
1561 int x;
1562
1563 for (x = 0; x < MAX_BUSTYPE; ++x)
1564 if (strcmp(bus_type_table[x].name, name) == 0)
1565 return bus_type_table[x].type;
1566
1567 return UNKNOWN_BUSTYPE;
1568 }
1569
1570
1571 static int
1572 int_entry(int_entry_ptr entry, int intr)
1573 {
1574 int apic;
1575
1576 io_apic_ints[intr].int_type = entry->int_type;
1577 io_apic_ints[intr].int_flags = entry->int_flags;
1578 io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1579 io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1580 if (entry->dst_apic_id == 255) {
1581 /* This signal goes to all IO APICS. Select an IO APIC
1582 with sufficient number of interrupt pins */
1583 for (apic = 0; apic < mp_napics; apic++) {
1584 if (io_apic_read(apic, IOAPIC_VER) == 0xffffffff)
1585 continue;
1586 if (((io_apic_read(apic, IOAPIC_VER) &
1587 IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1588 entry->dst_apic_int)
1589 break;
1590 }
1591 if (apic < mp_napics)
1592 io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1593 else
1594 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1595 } else
1596 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1597 io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1598
1599 return 1;
1600 }
1601
1602
1603 static int
1604 apic_int_is_bus_type(int intr, int bus_type)
1605 {
1606 int bus;
1607
1608 for (bus = 0; bus < mp_nbusses; ++bus)
1609 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1610 && ((int) bus_data[bus].bus_type == bus_type))
1611 return 1;
1612
1613 return 0;
1614 }
1615
1616
1617 /*
1618 * Given a traditional ISA INT mask, return an APIC mask.
1619 */
1620 u_int
1621 isa_apic_mask(u_int isa_mask)
1622 {
1623 int isa_irq;
1624 int apic_pin;
1625
1626 #if defined(SKIP_IRQ15_REDIRECT)
1627 if (isa_mask == (1 << 15)) {
1628 printf("skipping ISA IRQ15 redirect\n");
1629 return isa_mask;
1630 }
1631 #endif /* SKIP_IRQ15_REDIRECT */
1632
1633 isa_irq = ffs(isa_mask); /* find its bit position */
1634 if (isa_irq == 0) /* doesn't exist */
1635 return 0;
1636 --isa_irq; /* make it zero based */
1637
1638 apic_pin = isa_apic_irq(isa_irq); /* look for APIC connection */
1639 if (apic_pin == -1)
1640 return 0;
1641
1642 return (1 << apic_pin); /* convert pin# to a mask */
1643 }
1644
1645
1646 /*
1647 * Determine which APIC pin an ISA/EISA INT is attached to.
1648 */
1649 #define INTTYPE(I) (io_apic_ints[(I)].int_type)
1650 #define INTPIN(I) (io_apic_ints[(I)].dst_apic_int)
1651 #define INTIRQ(I) (io_apic_ints[(I)].int_vector)
1652 #define INTAPIC(I) (ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1653
1654 #define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq)
1655 int
1656 isa_apic_irq(int isa_irq)
1657 {
1658 int intr;
1659
1660 for (intr = 0; intr < nintrs; ++intr) { /* check each record */
1661 if (INTTYPE(intr) == 0) { /* standard INT */
1662 if (SRCBUSIRQ(intr) == isa_irq) {
1663 if (apic_int_is_bus_type(intr, ISA) ||
1664 apic_int_is_bus_type(intr, EISA)) {
1665 if (INTIRQ(intr) == 0xff)
1666 return -1; /* unassigned */
1667 return INTIRQ(intr); /* found */
1668 }
1669 }
1670 }
1671 }
1672 return -1; /* NOT found */
1673 }
1674
1675
1676 /*
1677 * Determine which APIC pin a PCI INT is attached to.
1678 */
1679 #define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id)
1680 #define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1681 #define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03)
1682 static int
1683 pci_apic_irq_raw(int pciBus, int pciDevice, int pciInt)
1684 {
1685 int intr;
1686
1687 --pciInt; /* zero based */
1688
1689 for (intr = 0; intr < nintrs; ++intr) /* check each record */
1690 if ((INTTYPE(intr) == 0) /* standard INT */
1691 && (SRCBUSID(intr) == pciBus)
1692 && (SRCBUSDEVICE(intr) == pciDevice)
1693 && (SRCBUSLINE(intr) == pciInt)) /* a candidate IRQ */
1694 if (apic_int_is_bus_type(intr, PCI)) {
1695 if (INTIRQ(intr) == 0xff)
1696 allocate_apic_irq(intr);
1697 if (INTIRQ(intr) == 0xff)
1698 return -1; /* unassigned */
1699 return INTIRQ(intr); /* exact match */
1700 }
1701
1702 return -1; /* NOT found */
1703 }
1704
1705 static int
1706 pci_apic_bus_present(int bus)
1707 {
1708 int intr;
1709
1710 for (intr = 0; intr < nintrs; ++intr)
1711 if ((INTTYPE(intr) == 0) && (SRCBUSID(intr) == bus))
1712 return (1);
1713 return (0);
1714 }
1715
1716 int
1717 pci_apic_irq(int bus, int device, int pin, void *arg)
1718 {
1719 device_t dev, bus_dev, pcib, parent;
1720 int irq;
1721
1722 parent = (device_t)arg;
1723 pcib = NULL;
1724 loop:
1725 /* See if there is an exact match first. */
1726 if (bootverbose) {
1727 printf("APIC_IO: trying to route %d:%d INT%c\n", bus, device,
1728 pin + 'A' - 1);
1729 }
1730 irq = pci_apic_irq_raw(bus, device, pin);
1731 if (irq != -1)
1732 return (irq);
1733
1734 /* If this bus has other entries but not this one, punt. */
1735 if (pci_apic_bus_present(bus))
1736 return (-1);
1737
1738 /* Safety net, don't try to walk past bus 0. */
1739 if (bus == 0)
1740 return (-1);
1741
1742 /*
1743 * Try to find our parent bus and the bridge it hangs off of. If
1744 * we are recursing up the chain, we need to find the previous bridge's
1745 * parent bus. If we have a valid parent device, then that is our
1746 * parent bus. Otherwise, try to find ourself so that we can find
1747 * our parent bus. Every device has a function of 0 and we are
1748 * really just trying to find our parent, so assume a function of 0
1749 * to find either ourself or one of our siblings.
1750 */
1751 if (pcib != NULL)
1752 bus_dev = device_get_parent(pcib);
1753 else if (parent != NULL)
1754 bus_dev = parent;
1755 else {
1756 dev = pci_find_bsf(bus, device, 0);
1757 if (dev == NULL)
1758 return (-1);
1759 bus_dev = device_get_parent(dev);
1760 }
1761 if (bus_dev == NULL)
1762 return (-1);
1763 pcib = device_get_parent(bus_dev);
1764 if (pcib == NULL)
1765 return (-1);
1766
1767 if (device_get_parent(pcib) == NULL)
1768 return (-1);
1769
1770 /*
1771 * Do the swizzle thing.
1772 *
1773 * XXX: no error checking for the bus number here
1774 * (valid, does it exist, etc.).
1775 */
1776 bus = pci_get_bus(pcib);
1777 pin = (device + (pin - 1)) % 4 + 1;
1778 device = pci_get_slot(pcib);
1779 goto loop;
1780 }
1781
1782 int
1783 next_apic_irq(int irq)
1784 {
1785 int intr, ointr;
1786 int bus, bustype;
1787
1788 bus = 0;
1789 bustype = 0;
1790 for (intr = 0; intr < nintrs; intr++) {
1791 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1792 continue;
1793 bus = SRCBUSID(intr);
1794 bustype = apic_bus_type(bus);
1795 if (bustype != ISA &&
1796 bustype != EISA &&
1797 bustype != PCI)
1798 continue;
1799 break;
1800 }
1801 if (intr >= nintrs) {
1802 return -1;
1803 }
1804 for (ointr = intr + 1; ointr < nintrs; ointr++) {
1805 if (INTTYPE(ointr) != 0)
1806 continue;
1807 if (bus != SRCBUSID(ointr))
1808 continue;
1809 if (bustype == PCI) {
1810 if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1811 continue;
1812 if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1813 continue;
1814 }
1815 if (bustype == ISA || bustype == EISA) {
1816 if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1817 continue;
1818 }
1819 if (INTPIN(intr) == INTPIN(ointr))
1820 continue;
1821 break;
1822 }
1823 if (ointr >= nintrs) {
1824 return -1;
1825 }
1826 return INTIRQ(ointr);
1827 }
1828 #undef SRCBUSLINE
1829 #undef SRCBUSDEVICE
1830 #undef SRCBUSID
1831 #undef SRCBUSIRQ
1832
1833 #undef INTPIN
1834 #undef INTIRQ
1835 #undef INTAPIC
1836 #undef INTTYPE
1837
1838
1839 /*
1840 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1841 *
1842 * XXX FIXME:
1843 * Exactly what this means is unclear at this point. It is a solution
1844 * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard
1845 * could route any of the ISA INTs to upper (>15) IRQ values. But most would
1846 * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1847 * option.
1848 */
1849 int
1850 undirect_isa_irq(int rirq)
1851 {
1852 #if defined(READY)
1853 if (bootverbose)
1854 printf("Freeing redirected ISA irq %d.\n", rirq);
1855 /** FIXME: tickle the MB redirector chip */
1856 return ???;
1857 #else
1858 if (bootverbose)
1859 printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1860 return 0;
1861 #endif /* READY */
1862 }
1863
1864
1865 /*
1866 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1867 */
1868 int
1869 undirect_pci_irq(int rirq)
1870 {
1871 #if defined(READY)
1872 if (bootverbose)
1873 printf("Freeing redirected PCI irq %d.\n", rirq);
1874
1875 /** FIXME: tickle the MB redirector chip */
1876 return ???;
1877 #else
1878 if (bootverbose)
1879 printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1880 rirq);
1881 return 0;
1882 #endif /* READY */
1883 }
1884
1885
1886 /*
1887 * given a bus ID, return:
1888 * the bus type if found
1889 * -1 if NOT found
1890 */
1891 int
1892 apic_bus_type(int id)
1893 {
1894 int x;
1895
1896 for (x = 0; x < mp_nbusses; ++x)
1897 if (bus_data[x].bus_id == id)
1898 return bus_data[x].bus_type;
1899
1900 return -1;
1901 }
1902
1903
1904 /*
1905 * given a LOGICAL APIC# and pin#, return:
1906 * the associated src bus ID if found
1907 * -1 if NOT found
1908 */
1909 int
1910 apic_src_bus_id(int apic, int pin)
1911 {
1912 int x;
1913
1914 /* search each of the possible INTerrupt sources */
1915 for (x = 0; x < nintrs; ++x)
1916 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1917 (pin == io_apic_ints[x].dst_apic_int))
1918 return (io_apic_ints[x].src_bus_id);
1919
1920 return -1; /* NOT found */
1921 }
1922
1923
1924 /*
1925 * given a LOGICAL APIC# and pin#, return:
1926 * the associated src bus IRQ if found
1927 * -1 if NOT found
1928 */
1929 int
1930 apic_src_bus_irq(int apic, int pin)
1931 {
1932 int x;
1933
1934 for (x = 0; x < nintrs; x++)
1935 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1936 (pin == io_apic_ints[x].dst_apic_int))
1937 return (io_apic_ints[x].src_bus_irq);
1938
1939 return -1; /* NOT found */
1940 }
1941
1942
1943 /*
1944 * given a LOGICAL APIC# and pin#, return:
1945 * the associated INTerrupt type if found
1946 * -1 if NOT found
1947 */
1948 int
1949 apic_int_type(int apic, int pin)
1950 {
1951 int x;
1952
1953 /* search each of the possible INTerrupt sources */
1954 for (x = 0; x < nintrs; ++x)
1955 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1956 (pin == io_apic_ints[x].dst_apic_int))
1957 return (io_apic_ints[x].int_type);
1958
1959 return -1; /* NOT found */
1960 }
1961
1962 int
1963 apic_irq(int apic, int pin)
1964 {
1965 int x;
1966 int res;
1967
1968 for (x = 0; x < nintrs; ++x)
1969 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1970 (pin == io_apic_ints[x].dst_apic_int)) {
1971 res = io_apic_ints[x].int_vector;
1972 if (res == 0xff)
1973 return -1;
1974 if (apic != int_to_apicintpin[res].ioapic)
1975 panic("apic_irq: inconsistent table");
1976 if (pin != int_to_apicintpin[res].int_pin)
1977 panic("apic_irq inconsistent table (2)");
1978 return res;
1979 }
1980 return -1;
1981 }
1982
1983
1984 /*
1985 * given a LOGICAL APIC# and pin#, return:
1986 * the associated trigger mode if found
1987 * -1 if NOT found
1988 */
1989 int
1990 apic_trigger(int apic, int pin)
1991 {
1992 int x;
1993
1994 /* search each of the possible INTerrupt sources */
1995 for (x = 0; x < nintrs; ++x)
1996 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1997 (pin == io_apic_ints[x].dst_apic_int))
1998 return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1999
2000 return -1; /* NOT found */
2001 }
2002
2003
2004 /*
2005 * given a LOGICAL APIC# and pin#, return:
2006 * the associated 'active' level if found
2007 * -1 if NOT found
2008 */
2009 int
2010 apic_polarity(int apic, int pin)
2011 {
2012 int x;
2013
2014 /* search each of the possible INTerrupt sources */
2015 for (x = 0; x < nintrs; ++x)
2016 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
2017 (pin == io_apic_ints[x].dst_apic_int))
2018 return (io_apic_ints[x].int_flags & 0x03);
2019
2020 return -1; /* NOT found */
2021 }
2022
2023
2024 /*
2025 * set data according to MP defaults
2026 * FIXME: probably not complete yet...
2027 */
2028 static void
2029 default_mp_table(int type)
2030 {
2031 int ap_cpu_id;
2032 #if defined(APIC_IO)
2033 int io_apic_id;
2034 int pin;
2035 #endif /* APIC_IO */
2036
2037 #if 0
2038 printf(" MP default config type: %d\n", type);
2039 switch (type) {
2040 case 1:
2041 printf(" bus: ISA, APIC: 82489DX\n");
2042 break;
2043 case 2:
2044 printf(" bus: EISA, APIC: 82489DX\n");
2045 break;
2046 case 3:
2047 printf(" bus: EISA, APIC: 82489DX\n");
2048 break;
2049 case 4:
2050 printf(" bus: MCA, APIC: 82489DX\n");
2051 break;
2052 case 5:
2053 printf(" bus: ISA+PCI, APIC: Integrated\n");
2054 break;
2055 case 6:
2056 printf(" bus: EISA+PCI, APIC: Integrated\n");
2057 break;
2058 case 7:
2059 printf(" bus: MCA+PCI, APIC: Integrated\n");
2060 break;
2061 default:
2062 printf(" future type\n");
2063 break;
2064 /* NOTREACHED */
2065 }
2066 #endif /* 0 */
2067
2068 boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
2069 ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
2070
2071 /* BSP */
2072 CPU_TO_ID(0) = boot_cpu_id;
2073 ID_TO_CPU(boot_cpu_id) = 0;
2074
2075 /* one and only AP */
2076 CPU_TO_ID(1) = ap_cpu_id;
2077 ID_TO_CPU(ap_cpu_id) = 1;
2078
2079 #if defined(APIC_IO)
2080 /* one and only IO APIC */
2081 io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
2082
2083 /*
2084 * sanity check, refer to MP spec section 3.6.6, last paragraph
2085 * necessary as some hardware isn't properly setting up the IO APIC
2086 */
2087 #if defined(REALLY_ANAL_IOAPICID_VALUE)
2088 if (io_apic_id != 2) {
2089 #else
2090 if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
2091 #endif /* REALLY_ANAL_IOAPICID_VALUE */
2092 io_apic_set_id(0, 2);
2093 io_apic_id = 2;
2094 }
2095 IO_TO_ID(0) = io_apic_id;
2096 ID_TO_IO(io_apic_id) = 0;
2097 #endif /* APIC_IO */
2098
2099 /* fill out bus entries */
2100 switch (type) {
2101 case 1:
2102 case 2:
2103 case 3:
2104 case 4:
2105 case 5:
2106 case 6:
2107 case 7:
2108 bus_data[0].bus_id = default_data[type - 1][1];
2109 bus_data[0].bus_type = default_data[type - 1][2];
2110 bus_data[1].bus_id = default_data[type - 1][3];
2111 bus_data[1].bus_type = default_data[type - 1][4];
2112 break;
2113
2114 /* case 4: case 7: MCA NOT supported */
2115 default: /* illegal/reserved */
2116 panic("BAD default MP config: %d", type);
2117 /* NOTREACHED */
2118 }
2119
2120 #if defined(APIC_IO)
2121 /* general cases from MP v1.4, table 5-2 */
2122 for (pin = 0; pin < 16; ++pin) {
2123 io_apic_ints[pin].int_type = 0;
2124 io_apic_ints[pin].int_flags = 0x05; /* edge/active-hi */
2125 io_apic_ints[pin].src_bus_id = 0;
2126 io_apic_ints[pin].src_bus_irq = pin; /* IRQ2 caught below */
2127 io_apic_ints[pin].dst_apic_id = io_apic_id;
2128 io_apic_ints[pin].dst_apic_int = pin; /* 1-to-1 */
2129 }
2130
2131 /* special cases from MP v1.4, table 5-2 */
2132 if (type == 2) {
2133 io_apic_ints[2].int_type = 0xff; /* N/C */
2134 io_apic_ints[13].int_type = 0xff; /* N/C */
2135 #if !defined(APIC_MIXED_MODE)
2136 /** FIXME: ??? */
2137 panic("sorry, can't support type 2 default yet");
2138 #endif /* APIC_MIXED_MODE */
2139 }
2140 else
2141 io_apic_ints[2].src_bus_irq = 0; /* ISA IRQ0 is on APIC INT 2 */
2142
2143 if (type == 7)
2144 io_apic_ints[0].int_type = 0xff; /* N/C */
2145 else
2146 io_apic_ints[0].int_type = 3; /* vectored 8259 */
2147 #endif /* APIC_IO */
2148 }
2149
2150
2151 /*
2152 * initialize all the SMP locks
2153 */
2154
2155 /* critical region around IO APIC, apic_imen */
2156 struct simplelock imen_lock;
2157
2158 /* critical region around splxx(), cpl, cml, cil, ipending */
2159 struct simplelock cpl_lock;
2160
2161 /* Make FAST_INTR() routines sequential */
2162 struct simplelock fast_intr_lock;
2163
2164 /* critical region around INTR() routines */
2165 struct simplelock intr_lock;
2166
2167 /* lock regions protected in UP kernel via cli/sti */
2168 struct simplelock mpintr_lock;
2169
2170 /* lock region used by kernel profiling */
2171 struct simplelock mcount_lock;
2172
2173 #ifdef USE_COMLOCK
2174 /* locks com (tty) data/hardware accesses: a FASTINTR() */
2175 struct simplelock com_lock;
2176 #endif /* USE_COMLOCK */
2177
2178 #ifdef USE_CLOCKLOCK
2179 /* lock regions around the clock hardware */
2180 struct simplelock clock_lock;
2181 #endif /* USE_CLOCKLOCK */
2182
2183 /* lock around the MP rendezvous */
2184 static struct simplelock smp_rv_lock;
2185
2186 static void
2187 init_locks(void)
2188 {
2189 /*
2190 * Get the initial mp_lock with a count of 1 for the BSP.
2191 * This uses a LOGICAL cpu ID, ie BSP == 0.
2192 */
2193 mp_lock = 0x00000001;
2194
2195 #if 0
2196 /* ISR uses its own "giant lock" */
2197 isr_lock = FREE_LOCK;
2198 #endif
2199
2200 #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
2201 s_lock_init((struct simplelock*)&apic_itrace_debuglock);
2202 #endif
2203
2204 s_lock_init((struct simplelock*)&mpintr_lock);
2205
2206 s_lock_init((struct simplelock*)&mcount_lock);
2207
2208 s_lock_init((struct simplelock*)&fast_intr_lock);
2209 s_lock_init((struct simplelock*)&intr_lock);
2210 s_lock_init((struct simplelock*)&imen_lock);
2211 s_lock_init((struct simplelock*)&cpl_lock);
2212 s_lock_init(&smp_rv_lock);
2213
2214 #ifdef USE_COMLOCK
2215 s_lock_init((struct simplelock*)&com_lock);
2216 #endif /* USE_COMLOCK */
2217 #ifdef USE_CLOCKLOCK
2218 s_lock_init((struct simplelock*)&clock_lock);
2219 #endif /* USE_CLOCKLOCK */
2220 }
2221
2222
2223 /* Wait for all APs to be fully initialized */
2224 extern int wait_ap(unsigned int);
2225
2226 /*
2227 * start each AP in our list
2228 */
2229 static int
2230 start_all_aps(u_int boot_addr)
2231 {
2232 int x, i, pg;
2233 u_char mpbiosreason;
2234 u_long mpbioswarmvec;
2235 struct globaldata *gd;
2236 char *stack;
2237 uintptr_t kptbase;
2238
2239 POSTCODE(START_ALL_APS_POST);
2240
2241 /* initialize BSP's local APIC */
2242 apic_initialize();
2243 bsp_apic_ready = 1;
2244
2245 /* install the AP 1st level boot code */
2246 install_ap_tramp(boot_addr);
2247
2248
2249 /* save the current value of the warm-start vector */
2250 mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
2251 #ifndef PC98
2252 outb(CMOS_REG, BIOS_RESET);
2253 mpbiosreason = inb(CMOS_DATA);
2254 #endif
2255
2256 /* record BSP in CPU map */
2257 all_cpus = 1;
2258
2259 /* set up temporary P==V mapping for AP boot */
2260 /* XXX this is a hack, we should boot the AP on its own stack/PTD */
2261 kptbase = (uintptr_t)(void *)KPTphys;
2262 for (x = 0; x < NKPT; x++)
2263 PTD[x] = (pd_entry_t)(PG_V | PG_RW |
2264 ((kptbase + x * PAGE_SIZE) & PG_FRAME));
2265 invltlb();
2266
2267 /* start each AP */
2268 for (x = 1; x <= mp_naps; ++x) {
2269
2270 /* This is a bit verbose, it will go away soon. */
2271
2272 /* first page of AP's private space */
2273 pg = x * i386_btop(sizeof(struct privatespace));
2274
2275 /* allocate a new private data page */
2276 gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
2277
2278 /* wire it into the private page table page */
2279 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
2280
2281 /* allocate and set up an idle stack data page */
2282 stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
2283 for (i = 0; i < UPAGES; i++)
2284 SMPpt[pg + 6 + i] = (pt_entry_t)
2285 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
2286
2287 SMPpt[pg + 1] = 0; /* *prv_CMAP1 */
2288 SMPpt[pg + 2] = 0; /* *prv_CMAP2 */
2289 SMPpt[pg + 3] = 0; /* *prv_CMAP3 */
2290 SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
2291 SMPpt[pg + 5] = 0; /* *prv_PMAP2 */
2292
2293 /* prime data page for it to use */
2294 gd->gd_cpuid = x;
2295 gd->gd_cpu_lockid = x << 24;
2296 gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
2297 gd->gd_prv_CMAP2 = &SMPpt[pg + 2];
2298 gd->gd_prv_CMAP3 = &SMPpt[pg + 3];
2299 gd->gd_prv_PMAP1 = (pd_entry_t *)&SMPpt[pg + 4];
2300 gd->gd_prv_PMAP2 = (pd_entry_t *)&SMPpt[pg + 5];
2301 gd->gd_prv_CADDR1 = SMP_prvspace[x].CPAGE1;
2302 gd->gd_prv_CADDR2 = SMP_prvspace[x].CPAGE2;
2303 gd->gd_prv_CADDR3 = SMP_prvspace[x].CPAGE3;
2304 gd->gd_prv_PADDR1 = (pt_entry_t *)SMP_prvspace[x].PPAGE1;
2305 gd->gd_prv_PADDR2 = (pt_entry_t *)SMP_prvspace[x].PPAGE2;
2306
2307 /* setup a vector to our boot code */
2308 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
2309 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
2310 #ifndef PC98
2311 outb(CMOS_REG, BIOS_RESET);
2312 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
2313 #endif
2314
2315 bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
2316 bootAP = x;
2317
2318 /* attempt to start the Application Processor */
2319 CHECK_INIT(99); /* setup checkpoints */
2320 if (!start_ap(x, boot_addr)) {
2321 printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
2322 CHECK_PRINT("trace"); /* show checkpoints */
2323 /* better panic as the AP may be running loose */
2324 printf("panic y/n? [y] ");
2325 if (cngetc() != 'n')
2326 panic("bye-bye");
2327 }
2328 CHECK_PRINT("trace"); /* show checkpoints */
2329
2330 /* record its version info */
2331 cpu_apic_versions[x] = cpu_apic_versions[0];
2332
2333 all_cpus |= (1 << x); /* record AP in CPU map */
2334 }
2335
2336 /* build our map of 'other' CPUs */
2337 other_cpus = all_cpus & ~(1 << cpuid);
2338
2339 /* fill in our (BSP) APIC version */
2340 cpu_apic_versions[0] = lapic.version;
2341
2342 /* restore the warmstart vector */
2343 *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
2344 #ifndef PC98
2345 outb(CMOS_REG, BIOS_RESET);
2346 outb(CMOS_DATA, mpbiosreason);
2347 #endif
2348
2349 /*
2350 * Set up the idle context for the BSP. Similar to above except
2351 * that some was done by locore, some by pmap.c and some is implicit
2352 * because the BSP is cpu#0 and the page is initially zero, and also
2353 * because we can refer to variables by name on the BSP..
2354 */
2355
2356 /* Allocate and setup BSP idle stack */
2357 stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
2358 for (i = 0; i < UPAGES; i++)
2359 SMPpt[6 + i] = (pt_entry_t)
2360 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
2361
2362 for (x = 0; x < NKPT; x++)
2363 PTD[x] = 0;
2364 pmap_set_opt();
2365
2366 /* number of APs actually started */
2367 return mp_ncpus - 1;
2368 }
2369
2370
2371 /*
2372 * load the 1st level AP boot code into base memory.
2373 */
2374
2375 /* targets for relocation */
2376 extern void bigJump(void);
2377 extern void bootCodeSeg(void);
2378 extern void bootDataSeg(void);
2379 extern void MPentry(void);
2380 extern u_int MP_GDT;
2381 extern u_int mp_gdtbase;
2382
2383 static void
2384 install_ap_tramp(u_int boot_addr)
2385 {
2386 int x;
2387 int size = *(int *) ((u_long) & bootMP_size);
2388 u_char *src = (u_char *) ((u_long) bootMP);
2389 u_char *dst = (u_char *) boot_addr + KERNBASE;
2390 u_int boot_base = (u_int) bootMP;
2391 u_int8_t *dst8;
2392 u_int16_t *dst16;
2393 u_int32_t *dst32;
2394
2395 POSTCODE(INSTALL_AP_TRAMP_POST);
2396
2397 for (x = 0; x < size; ++x)
2398 *dst++ = *src++;
2399
2400 /*
2401 * modify addresses in code we just moved to basemem. unfortunately we
2402 * need fairly detailed info about mpboot.s for this to work. changes
2403 * to mpboot.s might require changes here.
2404 */
2405
2406 /* boot code is located in KERNEL space */
2407 dst = (u_char *) boot_addr + KERNBASE;
2408
2409 /* modify the lgdt arg */
2410 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
2411 *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
2412
2413 /* modify the ljmp target for MPentry() */
2414 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
2415 *dst32 = ((u_int) MPentry - KERNBASE);
2416
2417 /* modify the target for boot code segment */
2418 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
2419 dst8 = (u_int8_t *) (dst16 + 1);
2420 *dst16 = (u_int) boot_addr & 0xffff;
2421 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2422
2423 /* modify the target for boot data segment */
2424 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
2425 dst8 = (u_int8_t *) (dst16 + 1);
2426 *dst16 = (u_int) boot_addr & 0xffff;
2427 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2428 }
2429
2430
2431 /*
2432 * this function starts the AP (application processor) identified
2433 * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
2434 * to accomplish this. This is necessary because of the nuances
2435 * of the different hardware we might encounter. It ain't pretty,
2436 * but it seems to work.
2437 */
2438 static int
2439 start_ap(int logical_cpu, u_int boot_addr)
2440 {
2441 int physical_cpu;
2442 int vector;
2443 int cpus;
2444 u_long icr_lo, icr_hi;
2445
2446 POSTCODE(START_AP_POST);
2447
2448 /* get the PHYSICAL APIC ID# */
2449 physical_cpu = CPU_TO_ID(logical_cpu);
2450
2451 /* calculate the vector */
2452 vector = (boot_addr >> 12) & 0xff;
2453
2454 /* used as a watchpoint to signal AP startup */
2455 cpus = mp_ncpus;
2456
2457 /*
2458 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
2459 * and running the target CPU. OR this INIT IPI might be latched (P5
2460 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
2461 * ignored.
2462 */
2463
2464 /* setup the address for the target AP */
2465 icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
2466 icr_hi |= (physical_cpu << 24);
2467 lapic.icr_hi = icr_hi;
2468
2469 /* do an INIT IPI: assert RESET */
2470 icr_lo = lapic.icr_lo & 0xfff00000;
2471 lapic.icr_lo = icr_lo | 0x0000c500;
2472
2473 /* wait for pending status end */
2474 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2475 /* spin */ ;
2476
2477 /* do an INIT IPI: deassert RESET */
2478 lapic.icr_lo = icr_lo | 0x00008500;
2479
2480 /* wait for pending status end */
2481 u_sleep(10000); /* wait ~10mS */
2482 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2483 /* spin */ ;
2484
2485 /*
2486 * next we do a STARTUP IPI: the previous INIT IPI might still be
2487 * latched, (P5 bug) this 1st STARTUP would then terminate
2488 * immediately, and the previously started INIT IPI would continue. OR
2489 * the previous INIT IPI has already run. and this STARTUP IPI will
2490 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2491 * will run.
2492 */
2493
2494 /* do a STARTUP IPI */
2495 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2496 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2497 /* spin */ ;
2498 u_sleep(200); /* wait ~200uS */
2499
2500 /*
2501 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2502 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2503 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2504 * recognized after hardware RESET or INIT IPI.
2505 */
2506
2507 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2508 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2509 /* spin */ ;
2510 u_sleep(200); /* wait ~200uS */
2511
2512 /* wait for it to start */
2513 set_apic_timer(5000000);/* == 5 seconds */
2514 while (read_apic_timer())
2515 if (mp_ncpus > cpus)
2516 return 1; /* return SUCCESS */
2517
2518 return 0; /* return FAILURE */
2519 }
2520
2521
2522 /*
2523 * Flush the TLB on all other CPU's
2524 *
2525 * XXX: Needs to handshake and wait for completion before proceding.
2526 */
2527 void
2528 smp_invltlb(void)
2529 {
2530 #if defined(APIC_IO)
2531 if (smp_started && invltlb_ok)
2532 all_but_self_ipi(XINVLTLB_OFFSET);
2533 #endif /* APIC_IO */
2534 }
2535
2536 void
2537 invlpg(u_int addr)
2538 {
2539 __asm __volatile("invlpg (%0)"::"r"(addr):"memory");
2540
2541 /* send a message to the other CPUs */
2542 smp_invltlb();
2543 }
2544
2545 void
2546 invltlb(void)
2547 {
2548 u_long temp;
2549
2550 /*
2551 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
2552 * inlined.
2553 */
2554 __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
2555
2556 /* send a message to the other CPUs */
2557 smp_invltlb();
2558 }
2559
2560
2561 /*
2562 * When called the executing CPU will send an IPI to all other CPUs
2563 * requesting that they halt execution.
2564 *
2565 * Usually (but not necessarily) called with 'other_cpus' as its arg.
2566 *
2567 * - Signals all CPUs in map to stop.
2568 * - Waits for each to stop.
2569 *
2570 * Returns:
2571 * -1: error
2572 * 0: NA
2573 * 1: ok
2574 *
2575 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
2576 * from executing at same time.
2577 */
2578 int
2579 stop_cpus(u_int map)
2580 {
2581 if (!smp_started)
2582 return 0;
2583
2584 /* send the Xcpustop IPI to all CPUs in map */
2585 selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
2586
2587 while ((stopped_cpus & map) != map)
2588 /* spin */ ;
2589
2590 return 1;
2591 }
2592
2593
2594 /*
2595 * Called by a CPU to restart stopped CPUs.
2596 *
2597 * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
2598 *
2599 * - Signals all CPUs in map to restart.
2600 * - Waits for each to restart.
2601 *
2602 * Returns:
2603 * -1: error
2604 * 0: NA
2605 * 1: ok
2606 */
2607 int
2608 restart_cpus(u_int map)
2609 {
2610 if (!smp_started)
2611 return 0;
2612
2613 started_cpus = map; /* signal other cpus to restart */
2614
2615 while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */
2616 /* spin */ ;
2617
2618 return 1;
2619 }
2620
2621 int smp_active = 0; /* are the APs allowed to run? */
2622 SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
2623
2624 /* XXX maybe should be hw.ncpu */
2625 static int smp_cpus = 1; /* how many cpu's running */
2626 SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
2627
2628 int invltlb_ok = 0; /* throttle smp_invltlb() till safe */
2629 SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
2630
2631 /* Warning: Do not staticize. Used from swtch.s */
2632 int do_page_zero_idle = 1; /* bzero pages for fun and profit in idleloop */
2633 SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW,
2634 &do_page_zero_idle, 0, "");
2635
2636 /* Is forwarding of a interrupt to the CPU holding the ISR lock enabled ? */
2637 int forward_irq_enabled = 1;
2638 SYSCTL_INT(_machdep, OID_AUTO, forward_irq_enabled, CTLFLAG_RW,
2639 &forward_irq_enabled, 0, "");
2640
2641 /* Enable forwarding of a signal to a process running on a different CPU */
2642 static int forward_signal_enabled = 1;
2643 SYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
2644 &forward_signal_enabled, 0, "");
2645
2646 /* Enable forwarding of roundrobin to all other cpus */
2647 static int forward_roundrobin_enabled = 1;
2648 SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
2649 &forward_roundrobin_enabled, 0, "");
2650
2651 /*
2652 * This is called once the rest of the system is up and running and we're
2653 * ready to let the AP's out of the pen.
2654 */
2655 void ap_init(void);
2656
2657 void
2658 ap_init()
2659 {
2660 u_int apic_id;
2661
2662 /* BSP may have changed PTD while we're waiting for the lock */
2663 cpu_invltlb();
2664
2665 smp_cpus++;
2666
2667 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
2668 lidt(&r_idt);
2669 #endif
2670
2671 /* Build our map of 'other' CPUs. */
2672 other_cpus = all_cpus & ~(1 << cpuid);
2673
2674 printf("SMP: AP CPU #%d Launched!\n", cpuid);
2675
2676 /* set up CPU registers and state */
2677 cpu_setregs();
2678
2679 /* set up FPU state on the AP */
2680 npxinit(__INITIAL_NPXCW__);
2681
2682 /* set up SSE registers */
2683 enable_sse();
2684
2685 /* A quick check from sanity claus */
2686 apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2687 if (cpuid != apic_id) {
2688 printf("SMP: cpuid = %d\n", cpuid);
2689 printf("SMP: apic_id = %d\n", apic_id);
2690 printf("PTD[MPPTDI] = %llx\n", (u_int64_t)PTD[MPPTDI]);
2691 panic("cpuid mismatch! boom!!");
2692 }
2693
2694 /* Init local apic for irq's */
2695 apic_initialize();
2696
2697 /* Set memory range attributes for this CPU to match the BSP */
2698 mem_range_AP_init();
2699
2700 /*
2701 * Activate smp_invltlb, although strictly speaking, this isn't
2702 * quite correct yet. We should have a bitfield for cpus willing
2703 * to accept TLB flush IPI's or something and sync them.
2704 */
2705 if (smp_cpus == mp_ncpus) {
2706 invltlb_ok = 1;
2707 smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
2708 smp_active = 1; /* historic */
2709 }
2710 }
2711
2712 #ifdef BETTER_CLOCK
2713
2714 #define CHECKSTATE_USER 0
2715 #define CHECKSTATE_SYS 1
2716 #define CHECKSTATE_INTR 2
2717
2718 /* Do not staticize. Used from apic_vector.s */
2719 struct proc* checkstate_curproc[MAXCPU];
2720 int checkstate_cpustate[MAXCPU];
2721 u_long checkstate_pc[MAXCPU];
2722
2723 #define PC_TO_INDEX(pc, prof) \
2724 ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
2725 (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
2726
2727 static void
2728 addupc_intr_forwarded(struct proc *p, int id, int *astmap)
2729 {
2730 int i;
2731 struct uprof *prof;
2732 u_long pc;
2733
2734 pc = checkstate_pc[id];
2735 prof = &p->p_stats->p_prof;
2736 if (pc >= prof->pr_off &&
2737 (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) {
2738 if ((p->p_flag & P_OWEUPC) == 0) {
2739 prof->pr_addr = pc;
2740 prof->pr_ticks = 1;
2741 p->p_flag |= P_OWEUPC;
2742 }
2743 *astmap |= (1 << id);
2744 }
2745 }
2746
2747 static void
2748 forwarded_statclock(int id, int pscnt, int *astmap)
2749 {
2750 struct pstats *pstats;
2751 long rss;
2752 struct rusage *ru;
2753 struct vmspace *vm;
2754 int cpustate;
2755 struct proc *p;
2756 #ifdef GPROF
2757 register struct gmonparam *g;
2758 int i;
2759 #endif
2760
2761 p = checkstate_curproc[id];
2762 cpustate = checkstate_cpustate[id];
2763
2764 switch (cpustate) {
2765 case CHECKSTATE_USER:
2766 if (p->p_flag & P_PROFIL)
2767 addupc_intr_forwarded(p, id, astmap);
2768 if (pscnt > 1)
2769 return;
2770 p->p_uticks++;
2771 if (p->p_nice > NZERO)
2772 cp_time[CP_NICE]++;
2773 else
2774 cp_time[CP_USER]++;
2775 break;
2776 case CHECKSTATE_SYS:
2777 #ifdef GPROF
2778 /*
2779 * Kernel statistics are just like addupc_intr, only easier.
2780 */
2781 g = &_gmonparam;
2782 if (g->state == GMON_PROF_ON) {
2783 i = checkstate_pc[id] - g->lowpc;
2784 if (i < g->textsize) {
2785 i /= HISTFRACTION * sizeof(*g->kcount);
2786 g->kcount[i]++;
2787 }
2788 }
2789 #endif
2790 if (pscnt > 1)
2791 return;
2792
2793 if (!p)
2794 cp_time[CP_IDLE]++;
2795 else {
2796 p->p_sticks++;
2797 cp_time[CP_SYS]++;
2798 }
2799 break;
2800 case CHECKSTATE_INTR:
2801 default:
2802 #ifdef GPROF
2803 /*
2804 * Kernel statistics are just like addupc_intr, only easier.
2805 */
2806 g = &_gmonparam;
2807 if (g->state == GMON_PROF_ON) {
2808 i = checkstate_pc[id] - g->lowpc;
2809 if (i < g->textsize) {
2810 i /= HISTFRACTION * sizeof(*g->kcount);
2811 g->kcount[i]++;
2812 }
2813 }
2814 #endif
2815 if (pscnt > 1)
2816 return;
2817 if (p)
2818 p->p_iticks++;
2819 cp_time[CP_INTR]++;
2820 }
2821 if (p != NULL) {
2822 schedclock(p);
2823
2824 /* Update resource usage integrals and maximums. */
2825 if ((pstats = p->p_stats) != NULL &&
2826 (ru = &pstats->p_ru) != NULL &&
2827 (vm = p->p_vmspace) != NULL) {
2828 ru->ru_ixrss += pgtok(vm->vm_tsize);
2829 ru->ru_idrss += pgtok(vm->vm_dsize);
2830 ru->ru_isrss += pgtok(vm->vm_ssize);
2831 rss = pgtok(vmspace_resident_count(vm));
2832 if (ru->ru_maxrss < rss)
2833 ru->ru_maxrss = rss;
2834 }
2835 }
2836 }
2837
2838 void
2839 forward_statclock(int pscnt)
2840 {
2841 int map;
2842 int id;
2843 int i;
2844
2845 /* Kludge. We don't yet have separate locks for the interrupts
2846 * and the kernel. This means that we cannot let the other processors
2847 * handle complex interrupts while inhibiting them from entering
2848 * the kernel in a non-interrupt context.
2849 *
2850 * What we can do, without changing the locking mechanisms yet,
2851 * is letting the other processors handle a very simple interrupt
2852 * (wich determines the processor states), and do the main
2853 * work ourself.
2854 */
2855
2856 if (!smp_started || !invltlb_ok || cold || panicstr)
2857 return;
2858
2859 /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */
2860
2861 map = other_cpus & ~(stopped_cpus|hlt_cpus_mask);
2862 checkstate_probed_cpus = 0;
2863 if (map != 0)
2864 selected_apic_ipi(map,
2865 XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2866
2867 i = 0;
2868 while (checkstate_probed_cpus != map) {
2869 /* spin */
2870 i++;
2871 if (i == 100000) {
2872 #ifdef BETTER_CLOCK_DIAGNOSTIC
2873 printf("forward_statclock: checkstate %x\n",
2874 checkstate_probed_cpus);
2875 #endif
2876 break;
2877 }
2878 }
2879
2880 /*
2881 * Step 2: walk through other processors processes, update ticks and
2882 * profiling info.
2883 */
2884
2885 map = 0;
2886 for (id = 0; id < mp_ncpus; id++) {
2887 if (id == cpuid)
2888 continue;
2889 if (((1 << id) & checkstate_probed_cpus) == 0)
2890 continue;
2891 forwarded_statclock(id, pscnt, &map);
2892 }
2893 if (map != 0) {
2894 checkstate_need_ast |= map;
2895 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2896 i = 0;
2897 while ((checkstate_need_ast & map) != 0) {
2898 /* spin */
2899 i++;
2900 if (i > 100000) {
2901 #ifdef BETTER_CLOCK_DIAGNOSTIC
2902 printf("forward_statclock: dropped ast 0x%x\n",
2903 checkstate_need_ast & map);
2904 #endif
2905 break;
2906 }
2907 }
2908 }
2909 }
2910
2911 void
2912 forward_hardclock(int pscnt)
2913 {
2914 int map;
2915 int id;
2916 struct proc *p;
2917 struct pstats *pstats;
2918 int i;
2919
2920 /* Kludge. We don't yet have separate locks for the interrupts
2921 * and the kernel. This means that we cannot let the other processors
2922 * handle complex interrupts while inhibiting them from entering
2923 * the kernel in a non-interrupt context.
2924 *
2925 * What we can do, without changing the locking mechanisms yet,
2926 * is letting the other processors handle a very simple interrupt
2927 * (wich determines the processor states), and do the main
2928 * work ourself.
2929 */
2930
2931 if (!smp_started || !invltlb_ok || cold || panicstr)
2932 return;
2933
2934 /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */
2935
2936 map = other_cpus & ~(stopped_cpus|hlt_cpus_mask);
2937 checkstate_probed_cpus = 0;
2938 if (map != 0)
2939 selected_apic_ipi(map,
2940 XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2941
2942 i = 0;
2943 while (checkstate_probed_cpus != map) {
2944 /* spin */
2945 i++;
2946 if (i == 100000) {
2947 #ifdef BETTER_CLOCK_DIAGNOSTIC
2948 printf("forward_hardclock: checkstate %x\n",
2949 checkstate_probed_cpus);
2950 #endif
2951 break;
2952 }
2953 }
2954
2955 /*
2956 * Step 2: walk through other processors processes, update virtual
2957 * timer and profiling timer. If stathz == 0, also update ticks and
2958 * profiling info.
2959 */
2960
2961 map = 0;
2962 for (id = 0; id < mp_ncpus; id++) {
2963 if (id == cpuid)
2964 continue;
2965 if (((1 << id) & checkstate_probed_cpus) == 0)
2966 continue;
2967 p = checkstate_curproc[id];
2968 if (p) {
2969 pstats = p->p_stats;
2970 if (checkstate_cpustate[id] == CHECKSTATE_USER &&
2971 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
2972 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
2973 psignal(p, SIGVTALRM);
2974 map |= (1 << id);
2975 }
2976 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
2977 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
2978 psignal(p, SIGPROF);
2979 map |= (1 << id);
2980 }
2981 }
2982 if (stathz == 0) {
2983 forwarded_statclock( id, pscnt, &map);
2984 }
2985 }
2986 if (map != 0) {
2987 checkstate_need_ast |= map;
2988 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2989 i = 0;
2990 while ((checkstate_need_ast & map) != 0) {
2991 /* spin */
2992 i++;
2993 if (i > 100000) {
2994 #ifdef BETTER_CLOCK_DIAGNOSTIC
2995 printf("forward_hardclock: dropped ast 0x%x\n",
2996 checkstate_need_ast & map);
2997 #endif
2998 break;
2999 }
3000 }
3001 }
3002 }
3003
3004 #endif /* BETTER_CLOCK */
3005
3006 void
3007 forward_signal(struct proc *p)
3008 {
3009 int map;
3010 int id;
3011 int i;
3012
3013 /* Kludge. We don't yet have separate locks for the interrupts
3014 * and the kernel. This means that we cannot let the other processors
3015 * handle complex interrupts while inhibiting them from entering
3016 * the kernel in a non-interrupt context.
3017 *
3018 * What we can do, without changing the locking mechanisms yet,
3019 * is letting the other processors handle a very simple interrupt
3020 * (wich determines the processor states), and do the main
3021 * work ourself.
3022 */
3023
3024 if (!smp_started || !invltlb_ok || cold || panicstr)
3025 return;
3026 if (!forward_signal_enabled)
3027 return;
3028 while (1) {
3029 if (p->p_stat != SRUN)
3030 return;
3031 id = p->p_oncpu;
3032 if (id == 0xff)
3033 return;
3034 map = (1<<id);
3035 checkstate_need_ast |= map;
3036 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
3037 i = 0;
3038 while ((checkstate_need_ast & map) != 0) {
3039 /* spin */
3040 i++;
3041 if (i > 100000) {
3042 #if 0
3043 printf("forward_signal: dropped ast 0x%x\n",
3044 checkstate_need_ast & map);
3045 #endif
3046 break;
3047 }
3048 }
3049 if (id == p->p_oncpu)
3050 return;
3051 }
3052 }
3053
3054 void
3055 forward_roundrobin(void)
3056 {
3057 u_int map;
3058 int i;
3059
3060 if (!smp_started || !invltlb_ok || cold || panicstr)
3061 return;
3062 if (!forward_roundrobin_enabled)
3063 return;
3064 resched_cpus |= other_cpus;
3065 map = other_cpus & ~(stopped_cpus|hlt_cpus_mask);
3066 #if 1
3067 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
3068 #else
3069 (void) all_but_self_ipi(XCPUAST_OFFSET);
3070 #endif
3071 i = 0;
3072 while ((checkstate_need_ast & map) != 0) {
3073 /* spin */
3074 i++;
3075 if (i > 100000) {
3076 #if 0
3077 printf("forward_roundrobin: dropped ast 0x%x\n",
3078 checkstate_need_ast & map);
3079 #endif
3080 break;
3081 }
3082 }
3083 }
3084
3085
3086 #ifdef APIC_INTR_REORDER
3087 /*
3088 * Maintain mapping from softintr vector to isr bit in local apic.
3089 */
3090 void
3091 set_lapic_isrloc(int intr, int vector)
3092 {
3093 if (intr < 0 || intr > 32)
3094 panic("set_apic_isrloc: bad intr argument: %d",intr);
3095 if (vector < ICU_OFFSET || vector > 255)
3096 panic("set_apic_isrloc: bad vector argument: %d",vector);
3097 apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
3098 apic_isrbit_location[intr].bit = (1<<(vector & 31));
3099 }
3100 #endif
3101
3102 /*
3103 * All-CPU rendezvous. CPUs are signalled, all execute the setup function
3104 * (if specified), rendezvous, execute the action function (if specified),
3105 * rendezvous again, execute the teardown function (if specified), and then
3106 * resume.
3107 *
3108 * Note that the supplied external functions _must_ be reentrant and aware
3109 * that they are running in parallel and in an unknown lock context.
3110 */
3111 static void (*smp_rv_setup_func)(void *arg);
3112 static void (*smp_rv_action_func)(void *arg);
3113 static void (*smp_rv_teardown_func)(void *arg);
3114 static void *smp_rv_func_arg;
3115 static volatile int smp_rv_waiters[2];
3116
3117 void
3118 smp_rendezvous_action(void)
3119 {
3120 /* setup function */
3121 if (smp_rv_setup_func != NULL)
3122 smp_rv_setup_func(smp_rv_func_arg);
3123 /* spin on entry rendezvous */
3124 atomic_add_int(&smp_rv_waiters[0], 1);
3125 while (smp_rv_waiters[0] < mp_ncpus)
3126 ;
3127 /* action function */
3128 if (smp_rv_action_func != NULL)
3129 smp_rv_action_func(smp_rv_func_arg);
3130 /* spin on exit rendezvous */
3131 atomic_add_int(&smp_rv_waiters[1], 1);
3132 while (smp_rv_waiters[1] < mp_ncpus)
3133 ;
3134 /* teardown function */
3135 if (smp_rv_teardown_func != NULL)
3136 smp_rv_teardown_func(smp_rv_func_arg);
3137 }
3138
3139 void
3140 smp_rendezvous(void (* setup_func)(void *),
3141 void (* action_func)(void *),
3142 void (* teardown_func)(void *),
3143 void *arg)
3144 {
3145 u_int efl;
3146
3147 /* obtain rendezvous lock */
3148 s_lock(&smp_rv_lock); /* XXX sleep here? NOWAIT flag? */
3149
3150 /* set static function pointers */
3151 smp_rv_setup_func = setup_func;
3152 smp_rv_action_func = action_func;
3153 smp_rv_teardown_func = teardown_func;
3154 smp_rv_func_arg = arg;
3155 smp_rv_waiters[0] = 0;
3156 smp_rv_waiters[1] = 0;
3157
3158 /* disable interrupts on this CPU, save interrupt status */
3159 efl = read_eflags();
3160 write_eflags(efl & ~PSL_I);
3161
3162 /* signal other processors, which will enter the IPI with interrupts off */
3163 all_but_self_ipi(XRENDEZVOUS_OFFSET);
3164
3165 /* call executor function */
3166 smp_rendezvous_action();
3167
3168 /* restore interrupt flag */
3169 write_eflags(efl);
3170
3171 /* release lock */
3172 s_unlock(&smp_rv_lock);
3173 }
3174
3175 static int
3176 sysctl_htl_cpus(SYSCTL_HANDLER_ARGS)
3177 {
3178 u_int mask;
3179 int error;
3180
3181 mask = hlt_cpus_mask;
3182 error = sysctl_handle_int(oidp, &mask, 0, req);
3183 if (error || !req->newptr)
3184 return (error);
3185
3186 if (logical_cpus_mask != 0 &&
3187 (mask & logical_cpus_mask) == logical_cpus_mask)
3188 hlt_logical_cpus = 1;
3189 else
3190 hlt_logical_cpus = 0;
3191
3192 if (! hyperthreading_allowed)
3193 mask |= hyperthreading_cpus_mask;
3194
3195 if ((mask & all_cpus) == all_cpus)
3196 mask &= ~(1<<0);
3197 hlt_cpus_mask = mask;
3198 return (error);
3199 }
3200 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
3201 0, 0, sysctl_htl_cpus, "IU", "");
3202
3203 static int
3204 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
3205 {
3206 int disable, error;
3207
3208 disable = hlt_logical_cpus;
3209 error = sysctl_handle_int(oidp, &disable, 0, req);
3210 if (error || !req->newptr)
3211 return (error);
3212
3213 if (disable)
3214 hlt_cpus_mask |= logical_cpus_mask;
3215 else
3216 hlt_cpus_mask &= ~logical_cpus_mask;
3217
3218 if (! hyperthreading_allowed)
3219 hlt_cpus_mask |= hyperthreading_cpus_mask;
3220
3221 if ((hlt_cpus_mask & all_cpus) == all_cpus)
3222 hlt_cpus_mask &= ~(1<<0);
3223
3224 hlt_logical_cpus = disable;
3225 return (error);
3226 }
3227
3228 static int
3229 sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
3230 {
3231 int allowed, error;
3232
3233 allowed = hyperthreading_allowed;
3234 error = sysctl_handle_int(oidp, &allowed, 0, req);
3235 if (error || !req->newptr)
3236 return (error);
3237
3238 if (allowed)
3239 hlt_cpus_mask &= ~hyperthreading_cpus_mask;
3240 else
3241 hlt_cpus_mask |= hyperthreading_cpus_mask;
3242
3243 if (logical_cpus_mask != 0 &&
3244 (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
3245 hlt_logical_cpus = 1;
3246 else
3247 hlt_logical_cpus = 0;
3248
3249 if ((hlt_cpus_mask & all_cpus) == all_cpus)
3250 hlt_cpus_mask &= ~(1<<0);
3251
3252 hyperthreading_allowed = allowed;
3253 return (error);
3254 }
3255
3256 static void
3257 cpu_hlt_setup(void *dummy __unused)
3258 {
3259
3260 if (logical_cpus_mask != 0) {
3261 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
3262 &hlt_logical_cpus);
3263 sysctl_ctx_init(&logical_cpu_clist);
3264 SYSCTL_ADD_PROC(&logical_cpu_clist,
3265 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
3266 "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
3267 sysctl_hlt_logical_cpus, "IU", "");
3268 SYSCTL_ADD_UINT(&logical_cpu_clist,
3269 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
3270 "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
3271 &logical_cpus_mask, 0, "");
3272
3273 if (hlt_logical_cpus)
3274 hlt_cpus_mask |= logical_cpus_mask;
3275
3276 /*
3277 * If necessary for security purposes, force
3278 * hyperthreading off, regardless of the value
3279 * of hlt_logical_cpus.
3280 */
3281 if (hyperthreading_cpus_mask) {
3282 TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
3283 &hyperthreading_allowed);
3284 SYSCTL_ADD_PROC(&logical_cpu_clist,
3285 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
3286 "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
3287 0, 0, sysctl_hyperthreading_allowed, "IU", "");
3288 if (! hyperthreading_allowed)
3289 hlt_cpus_mask |= hyperthreading_cpus_mask;
3290 }
3291 }
3292 }
3293 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
3294
3295 int
3296 mp_grab_cpu_hlt(void)
3297 {
3298 u_int mask = 1 << cpuid;
3299 u_int temp;
3300 int retval;
3301
3302 retval = mask & hlt_cpus_mask;
3303 while (mask & hlt_cpus_mask) {
3304 temp = lapic.tpr;
3305 lapic.tpr = LOPRIO_LEVEL;
3306 __asm __volatile("sti; hlt" : : : "memory");
3307 lapic.tpr = temp;
3308 }
3309 return (retval);
3310 }
Cache object: 84185aa12bc71dfe83bbe897c5e76f79
|