1 /*-
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26 #include <sys/cdefs.h>
27 __FBSDID("$FreeBSD$");
28
29 #include "opt_apic.h"
30 #include "opt_cpu.h"
31 #include "opt_kstack_pages.h"
32 #include "opt_mp_watchdog.h"
33 #include "opt_sched.h"
34 #include "opt_smp.h"
35
36 #if !defined(lint)
37 #if !defined(SMP)
38 #error How did you get here?
39 #endif
40
41 #ifndef DEV_APIC
42 #error The apic device is required for SMP, add "device apic" to your config file.
43 #endif
44 #if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
45 #error SMP not supported with CPU_DISABLE_CMPXCHG
46 #endif
47 #endif /* not lint */
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/bus.h>
52 #include <sys/cons.h> /* cngetc() */
53 #ifdef GPROF
54 #include <sys/gmon.h>
55 #endif
56 #include <sys/kernel.h>
57 #include <sys/ktr.h>
58 #include <sys/lock.h>
59 #include <sys/malloc.h>
60 #include <sys/memrange.h>
61 #include <sys/mutex.h>
62 #include <sys/pcpu.h>
63 #include <sys/proc.h>
64 #include <sys/sched.h>
65 #include <sys/smp.h>
66 #include <sys/sysctl.h>
67
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/pmap.h>
71 #include <vm/vm_kern.h>
72 #include <vm/vm_extern.h>
73
74 #include <machine/apicreg.h>
75 #include <machine/md_var.h>
76 #include <machine/mp_watchdog.h>
77 #include <machine/pcb.h>
78 #include <machine/psl.h>
79 #include <machine/smp.h>
80 #include <machine/specialreg.h>
81 #include <machine/privatespace.h>
82
83 #define WARMBOOT_TARGET 0
84 #define WARMBOOT_OFF (KERNBASE + 0x0467)
85 #define WARMBOOT_SEG (KERNBASE + 0x0469)
86
87 #define CMOS_REG (0x70)
88 #define CMOS_DATA (0x71)
89 #define BIOS_RESET (0x0f)
90 #define BIOS_WARM (0x0a)
91
92 /*
93 * this code MUST be enabled here and in mpboot.s.
94 * it follows the very early stages of AP boot by placing values in CMOS ram.
95 * it NORMALLY will never be needed and thus the primitive method for enabling.
96 *
97 #define CHECK_POINTS
98 */
99
100 #if defined(CHECK_POINTS) && !defined(PC98)
101 #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
102 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
103
104 #define CHECK_INIT(D); \
105 CHECK_WRITE(0x34, (D)); \
106 CHECK_WRITE(0x35, (D)); \
107 CHECK_WRITE(0x36, (D)); \
108 CHECK_WRITE(0x37, (D)); \
109 CHECK_WRITE(0x38, (D)); \
110 CHECK_WRITE(0x39, (D));
111
112 #define CHECK_PRINT(S); \
113 printf("%s: %d, %d, %d, %d, %d, %d\n", \
114 (S), \
115 CHECK_READ(0x34), \
116 CHECK_READ(0x35), \
117 CHECK_READ(0x36), \
118 CHECK_READ(0x37), \
119 CHECK_READ(0x38), \
120 CHECK_READ(0x39));
121
122 #else /* CHECK_POINTS */
123
124 #define CHECK_INIT(D)
125 #define CHECK_PRINT(S)
126 #define CHECK_WRITE(A, D)
127
128 #endif /* CHECK_POINTS */
129
130 /* lock region used by kernel profiling */
131 int mcount_lock;
132
133 int mp_naps; /* # of Applications processors */
134 int boot_cpu_id = -1; /* designated BSP */
135 extern int nkpt;
136
137 /*
138 * CPU topology map datastructures for HTT.
139 */
140 static struct cpu_group mp_groups[MAXCPU];
141 static struct cpu_top mp_top;
142
143 /* AP uses this during bootstrap. Do not staticize. */
144 char *bootSTK;
145 static int bootAP;
146
147 /* Hotwire a 0->4MB V==P mapping */
148 extern pt_entry_t *KPTphys;
149
150 /* SMP page table page */
151 extern pt_entry_t *SMPpt;
152
153 struct pcb stoppcbs[MAXCPU];
154
155 /* Variables needed for SMP tlb shootdown. */
156 vm_offset_t smp_tlb_addr1;
157 vm_offset_t smp_tlb_addr2;
158 volatile int smp_tlb_wait;
159
160 #ifdef STOP_NMI
161 volatile cpumask_t ipi_nmi_pending;
162
163 static void ipi_nmi_selected(u_int32_t cpus);
164 #endif
165
166 #ifdef COUNT_IPIS
167 /* Interrupt counts. */
168 static u_long *ipi_preempt_counts[MAXCPU];
169 static u_long *ipi_ast_counts[MAXCPU];
170 u_long *ipi_invltlb_counts[MAXCPU];
171 u_long *ipi_invlrng_counts[MAXCPU];
172 u_long *ipi_invlpg_counts[MAXCPU];
173 u_long *ipi_invlcache_counts[MAXCPU];
174 u_long *ipi_rendezvous_counts[MAXCPU];
175 u_long *ipi_lazypmap_counts[MAXCPU];
176 #endif
177
178 /*
179 * Local data and functions.
180 */
181
182 #ifdef STOP_NMI
183 /*
184 * Provide an alternate method of stopping other CPUs. If another CPU has
185 * disabled interrupts the conventional STOP IPI will be blocked. This
186 * NMI-based stop should get through in that case.
187 */
188 static int stop_cpus_with_nmi = 1;
189 SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW,
190 &stop_cpus_with_nmi, 0, "");
191 TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi);
192 #else
193 #define stop_cpus_with_nmi 0
194 #endif
195
196 static u_int logical_cpus;
197
198 /* used to hold the AP's until we are ready to release them */
199 static struct mtx ap_boot_mtx;
200
201 /* Set to 1 once we're ready to let the APs out of the pen. */
202 static volatile int aps_ready = 0;
203
204 /*
205 * Store data from cpu_add() until later in the boot when we actually setup
206 * the APs.
207 */
208 struct cpu_info {
209 int cpu_present:1;
210 int cpu_bsp:1;
211 int cpu_disabled:1;
212 } static cpu_info[MAX_APIC_ID + 1];
213 int cpu_apic_ids[MAXCPU];
214
215 /* Holds pending bitmap based IPIs per CPU */
216 static volatile u_int cpu_ipi_pending[MAXCPU];
217
218 static u_int boot_address;
219
220 static void assign_cpu_ids(void);
221 static void install_ap_tramp(void);
222 static void set_interrupt_apic_ids(void);
223 static int start_all_aps(void);
224 static int start_ap(int apic_id);
225 static void release_aps(void *dummy);
226
227 static int hlt_logical_cpus;
228 static u_int hyperthreading_cpus;
229 static cpumask_t hyperthreading_cpus_mask;
230 static int hyperthreading_allowed = 1;
231 static struct sysctl_ctx_list logical_cpu_clist;
232
233 static void
234 mem_range_AP_init(void)
235 {
236 if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
237 mem_range_softc.mr_op->initAP(&mem_range_softc);
238 }
239
240 void
241 mp_topology(void)
242 {
243 struct cpu_group *group;
244 int apic_id;
245 int groups;
246 int cpu;
247
248 /* Build the smp_topology map. */
249 /* Nothing to do if there is no HTT support. */
250 if (hyperthreading_cpus <= 1)
251 return;
252 group = &mp_groups[0];
253 groups = 1;
254 for (cpu = 0, apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
255 if (!cpu_info[apic_id].cpu_present)
256 continue;
257 /*
258 * If the current group has members and we're not a logical
259 * cpu, create a new group.
260 */
261 if (group->cg_count != 0 &&
262 (apic_id % hyperthreading_cpus) == 0) {
263 group++;
264 groups++;
265 }
266 group->cg_count++;
267 group->cg_mask |= 1 << cpu;
268 cpu++;
269 }
270
271 mp_top.ct_count = groups;
272 mp_top.ct_group = mp_groups;
273 smp_topology = &mp_top;
274 }
275
276
277 /*
278 * Calculate usable address in base memory for AP trampoline code.
279 */
280 u_int
281 mp_bootaddress(u_int basemem)
282 {
283
284 boot_address = trunc_page(basemem); /* round down to 4k boundary */
285 if ((basemem - boot_address) < bootMP_size)
286 boot_address -= PAGE_SIZE; /* not enough, lower by 4k */
287
288 return boot_address;
289 }
290
291 void
292 cpu_add(u_int apic_id, char boot_cpu)
293 {
294
295 if (apic_id > MAX_APIC_ID) {
296 panic("SMP: APIC ID %d too high", apic_id);
297 return;
298 }
299 KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
300 apic_id));
301 cpu_info[apic_id].cpu_present = 1;
302 if (boot_cpu) {
303 KASSERT(boot_cpu_id == -1,
304 ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
305 boot_cpu_id));
306 boot_cpu_id = apic_id;
307 cpu_info[apic_id].cpu_bsp = 1;
308 }
309 if (mp_ncpus < MAXCPU)
310 mp_ncpus++;
311 if (bootverbose)
312 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
313 "AP");
314 }
315
316 void
317 cpu_mp_setmaxid(void)
318 {
319
320 mp_maxid = MAXCPU - 1;
321 }
322
323 int
324 cpu_mp_probe(void)
325 {
326
327 /*
328 * Always record BSP in CPU map so that the mbuf init code works
329 * correctly.
330 */
331 all_cpus = 1;
332 if (mp_ncpus == 0) {
333 /*
334 * No CPUs were found, so this must be a UP system. Setup
335 * the variables to represent a system with a single CPU
336 * with an id of 0.
337 */
338 mp_ncpus = 1;
339 return (0);
340 }
341
342 /* At least one CPU was found. */
343 if (mp_ncpus == 1) {
344 /*
345 * One CPU was found, so this must be a UP system with
346 * an I/O APIC.
347 */
348 return (0);
349 }
350
351 /* At least two CPUs were found. */
352 return (1);
353 }
354
355 /*
356 * Initialize the IPI handlers and start up the AP's.
357 */
358 void
359 cpu_mp_start(void)
360 {
361 int i;
362 u_int threads_per_cache, p[4];
363
364 /* Initialize the logical ID to APIC ID table. */
365 for (i = 0; i < MAXCPU; i++) {
366 cpu_apic_ids[i] = -1;
367 cpu_ipi_pending[i] = 0;
368 }
369
370 /* Install an inter-CPU IPI for TLB invalidation */
371 setidt(IPI_INVLTLB, IDTVEC(invltlb),
372 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
373 setidt(IPI_INVLPG, IDTVEC(invlpg),
374 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
375 setidt(IPI_INVLRNG, IDTVEC(invlrng),
376 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
377
378 /* Install an inter-CPU IPI for cache invalidation. */
379 setidt(IPI_INVLCACHE, IDTVEC(invlcache),
380 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
381
382 /* Install an inter-CPU IPI for lazy pmap release */
383 setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
384 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
385
386 /* Install an inter-CPU IPI for all-CPU rendezvous */
387 setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
388 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
389
390 /* Install generic inter-CPU IPI handler */
391 setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
392 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
393
394 /* Install an inter-CPU IPI for CPU stop/restart */
395 setidt(IPI_STOP, IDTVEC(cpustop),
396 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
397
398
399 /* Set boot_cpu_id if needed. */
400 if (boot_cpu_id == -1) {
401 boot_cpu_id = PCPU_GET(apic_id);
402 cpu_info[boot_cpu_id].cpu_bsp = 1;
403 } else
404 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
405 ("BSP's APIC ID doesn't match boot_cpu_id"));
406 cpu_apic_ids[0] = boot_cpu_id;
407
408 assign_cpu_ids();
409
410 /* Start each Application Processor */
411 start_all_aps();
412
413 /* Setup the initial logical CPUs info. */
414 logical_cpus = logical_cpus_mask = 0;
415 if (cpu_feature & CPUID_HTT)
416 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
417
418 /*
419 * Work out if hyperthreading is *really* enabled. This
420 * is made really ugly by the fact that processors lie: Dual
421 * core processors claim to be hyperthreaded even when they're
422 * not, presumably because they want to be treated the same
423 * way as HTT with respect to per-cpu software licensing.
424 * At the time of writing (May 12, 2005) the only hyperthreaded
425 * cpus are from Intel, and Intel's dual-core processors can be
426 * identified via the "deterministic cache parameters" cpuid
427 * calls.
428 */
429 /*
430 * First determine if this is an Intel processor which claims
431 * to have hyperthreading support.
432 */
433 if ((cpu_feature & CPUID_HTT) &&
434 (strcmp(cpu_vendor, "GenuineIntel") == 0)) {
435 /*
436 * If the "deterministic cache parameters" cpuid calls
437 * are available, use them.
438 */
439 if (cpu_high >= 4) {
440 /* Ask the processor about the L1 cache. */
441 for (i = 0; i < 1; i++) {
442 cpuid_count(4, i, p);
443 threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1;
444 if (hyperthreading_cpus < threads_per_cache)
445 hyperthreading_cpus = threads_per_cache;
446 if ((p[0] & 0x1f) == 0)
447 break;
448 }
449 }
450
451 /*
452 * If the deterministic cache parameters are not
453 * available, or if no caches were reported to exist,
454 * just accept what the HTT flag indicated.
455 */
456 if (hyperthreading_cpus == 0)
457 hyperthreading_cpus = logical_cpus;
458 }
459
460 set_interrupt_apic_ids();
461
462 /* Last, setup the cpu topology now that we have probed CPUs */
463 mp_topology();
464 }
465
466
467 /*
468 * Print various information about the SMP system hardware and setup.
469 */
470 void
471 cpu_mp_announce(void)
472 {
473 int i, x;
474
475 /* List CPUs */
476 printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
477 for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
478 if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
479 continue;
480 if (cpu_info[x].cpu_disabled)
481 printf(" cpu (AP): APIC ID: %2d (disabled)\n", x);
482 else {
483 KASSERT(i < mp_ncpus,
484 ("mp_ncpus and actual cpus are out of whack"));
485 printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
486 }
487 }
488 }
489
490 /*
491 * AP CPU's call this to initialize themselves.
492 */
493 void
494 init_secondary(void)
495 {
496 vm_offset_t addr;
497 int gsel_tss;
498 int x, myid;
499 u_int cr0;
500
501 /* bootAP is set in start_ap() to our ID. */
502 myid = bootAP;
503 gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
504 gdt_segs[GPROC0_SEL].ssd_base =
505 (int) &SMP_prvspace[myid].pcpu.pc_common_tss;
506 SMP_prvspace[myid].pcpu.pc_prvspace =
507 &SMP_prvspace[myid].pcpu;
508
509 for (x = 0; x < NGDT; x++) {
510 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
511 }
512
513 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
514 r_gdt.rd_base = (int) &gdt[myid * NGDT];
515 lgdt(&r_gdt); /* does magic intra-segment return */
516
517 lidt(&r_idt);
518
519 lldt(_default_ldt);
520 PCPU_SET(currentldt, _default_ldt);
521
522 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
523 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
524 PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
525 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
526 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
527 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
528 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
529 ltr(gsel_tss);
530
531 PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd);
532
533 /*
534 * Set to a known state:
535 * Set by mpboot.s: CR0_PG, CR0_PE
536 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
537 */
538 cr0 = rcr0();
539 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
540 load_cr0(cr0);
541 CHECK_WRITE(0x38, 5);
542
543 /* Disable local APIC just to be sure. */
544 lapic_disable();
545
546 /* signal our startup to the BSP. */
547 mp_naps++;
548 CHECK_WRITE(0x39, 6);
549
550 /* Spin until the BSP releases the AP's. */
551 while (!aps_ready)
552 ia32_pause();
553
554 /* BSP may have changed PTD while we were waiting */
555 invltlb();
556 for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
557 invlpg(addr);
558
559 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
560 lidt(&r_idt);
561 #endif
562
563 /* Initialize the PAT MSR if present. */
564 pmap_init_pat();
565
566 /* set up CPU registers and state */
567 cpu_setregs();
568
569 /* set up FPU state on the AP */
570 npxinit(__INITIAL_NPXCW__);
571
572 /* set up SSE registers */
573 enable_sse();
574
575 #ifdef PAE
576 /* Enable the PTE no-execute bit. */
577 if ((amd_feature & AMDID_NX) != 0) {
578 uint64_t msr;
579
580 msr = rdmsr(MSR_EFER) | EFER_NXE;
581 wrmsr(MSR_EFER, msr);
582 }
583 #endif
584
585 /* A quick check from sanity claus */
586 if (PCPU_GET(apic_id) != lapic_id()) {
587 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
588 printf("SMP: actual apic_id = %d\n", lapic_id());
589 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
590 printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]);
591 panic("cpuid mismatch! boom!!");
592 }
593
594 /* Initialize curthread. */
595 KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
596 PCPU_SET(curthread, PCPU_GET(idlethread));
597
598 mtx_lock_spin(&ap_boot_mtx);
599
600 /* Init local apic for irq's */
601 lapic_setup(1);
602
603 /* Set memory range attributes for this CPU to match the BSP */
604 mem_range_AP_init();
605
606 smp_cpus++;
607
608 CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
609 printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
610
611 /* Determine if we are a logical CPU. */
612 if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
613 logical_cpus_mask |= PCPU_GET(cpumask);
614
615 /* Determine if we are a hyperthread. */
616 if (hyperthreading_cpus > 1 &&
617 PCPU_GET(apic_id) % hyperthreading_cpus != 0)
618 hyperthreading_cpus_mask |= PCPU_GET(cpumask);
619
620 /* Build our map of 'other' CPUs. */
621 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
622
623 if (bootverbose)
624 lapic_dump("AP");
625
626 if (smp_cpus == mp_ncpus) {
627 /* enable IPI's, tlb shootdown, freezes etc */
628 atomic_store_rel_int(&smp_started, 1);
629 smp_active = 1; /* historic */
630 }
631
632 mtx_unlock_spin(&ap_boot_mtx);
633
634 /* wait until all the AP's are up */
635 while (smp_started == 0)
636 ia32_pause();
637
638 /* enter the scheduler */
639 sched_throw(NULL);
640
641 panic("scheduler returned us to %s", __func__);
642 /* NOTREACHED */
643 }
644
645 /*******************************************************************
646 * local functions and data
647 */
648
649 /*
650 * We tell the I/O APIC code about all the CPUs we want to receive
651 * interrupts. If we don't want certain CPUs to receive IRQs we
652 * can simply not tell the I/O APIC code about them in this function.
653 * We also do not tell it about the BSP since it tells itself about
654 * the BSP internally to work with UP kernels and on UP machines.
655 */
656 static void
657 set_interrupt_apic_ids(void)
658 {
659 u_int i, apic_id;
660
661 for (i = 0; i < MAXCPU; i++) {
662 apic_id = cpu_apic_ids[i];
663 if (apic_id == -1)
664 continue;
665 if (cpu_info[apic_id].cpu_bsp)
666 continue;
667 if (cpu_info[apic_id].cpu_disabled)
668 continue;
669
670 /* Don't let hyperthreads service interrupts. */
671 if (hyperthreading_cpus > 1 &&
672 apic_id % hyperthreading_cpus != 0)
673 continue;
674
675 intr_add_cpu(i);
676 }
677 }
678
679 /*
680 * Assign logical CPU IDs to local APICs.
681 */
682 static void
683 assign_cpu_ids(void)
684 {
685 u_int i;
686
687 /* Check for explicitly disabled CPUs. */
688 for (i = 0; i <= MAX_APIC_ID; i++) {
689 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
690 continue;
691
692 /* Don't use this CPU if it has been disabled by a tunable. */
693 if (resource_disabled("lapic", i)) {
694 cpu_info[i].cpu_disabled = 1;
695 continue;
696 }
697 }
698
699 /*
700 * Assign CPU IDs to local APIC IDs and disable any CPUs
701 * beyond MAXCPU. CPU 0 has already been assigned to the BSP,
702 * so we only have to assign IDs for APs.
703 */
704 mp_ncpus = 1;
705 for (i = 0; i <= MAX_APIC_ID; i++) {
706 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
707 cpu_info[i].cpu_disabled)
708 continue;
709
710 if (mp_ncpus < MAXCPU) {
711 cpu_apic_ids[mp_ncpus] = i;
712 mp_ncpus++;
713 } else
714 cpu_info[i].cpu_disabled = 1;
715 }
716 KASSERT(mp_maxid >= mp_ncpus - 1,
717 ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
718 mp_ncpus));
719 }
720
721 /*
722 * start each AP in our list
723 */
724 static int
725 start_all_aps(void)
726 {
727 #ifndef PC98
728 u_char mpbiosreason;
729 #endif
730 struct pcpu *pc;
731 char *stack;
732 uintptr_t kptbase;
733 u_int32_t mpbioswarmvec;
734 int apic_id, cpu, i, pg;
735
736 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
737
738 /* install the AP 1st level boot code */
739 install_ap_tramp();
740
741 /* save the current value of the warm-start vector */
742 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
743 #ifndef PC98
744 outb(CMOS_REG, BIOS_RESET);
745 mpbiosreason = inb(CMOS_DATA);
746 #endif
747
748 /* set up temporary P==V mapping for AP boot */
749 /* XXX this is a hack, we should boot the AP on its own stack/PTD */
750 kptbase = (uintptr_t)(void *)KPTphys;
751 for (i = 0; i < NKPT; i++)
752 PTD[i] = (pd_entry_t)(PG_V | PG_RW |
753 ((kptbase + i * PAGE_SIZE) & PG_FRAME));
754 invltlb();
755
756 /* start each AP */
757 for (cpu = 1; cpu < mp_ncpus; cpu++) {
758 apic_id = cpu_apic_ids[cpu];
759
760 /* first page of AP's private space */
761 pg = cpu * i386_btop(sizeof(struct privatespace));
762
763 /* allocate a new private data page */
764 pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
765
766 /* wire it into the private page table page */
767 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
768
769 /* allocate and set up an idle stack data page */
770 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
771 for (i = 0; i < KSTACK_PAGES; i++)
772 SMPpt[pg + 1 + i] = (pt_entry_t)
773 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
774
775 /* prime data page for it to use */
776 pcpu_init(pc, cpu, sizeof(struct pcpu));
777 pc->pc_apic_id = apic_id;
778
779 /* setup a vector to our boot code */
780 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
781 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
782 #ifndef PC98
783 outb(CMOS_REG, BIOS_RESET);
784 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
785 #endif
786
787 bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
788 PAGE_SIZE];
789 bootAP = cpu;
790
791 /* attempt to start the Application Processor */
792 CHECK_INIT(99); /* setup checkpoints */
793 if (!start_ap(apic_id)) {
794 printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
795 CHECK_PRINT("trace"); /* show checkpoints */
796 /* better panic as the AP may be running loose */
797 printf("panic y/n? [y] ");
798 if (cngetc() != 'n')
799 panic("bye-bye");
800 }
801 CHECK_PRINT("trace"); /* show checkpoints */
802
803 all_cpus |= (1 << cpu); /* record AP in CPU map */
804 }
805
806 /* build our map of 'other' CPUs */
807 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
808
809 /* restore the warmstart vector */
810 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
811
812 #ifndef PC98
813 outb(CMOS_REG, BIOS_RESET);
814 outb(CMOS_DATA, mpbiosreason);
815 #endif
816
817 /*
818 * Set up the idle context for the BSP. Similar to above except
819 * that some was done by locore, some by pmap.c and some is implicit
820 * because the BSP is cpu#0 and the page is initially zero and also
821 * because we can refer to variables by name on the BSP..
822 */
823
824 /* Allocate and setup BSP idle stack */
825 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
826 for (i = 0; i < KSTACK_PAGES; i++)
827 SMPpt[1 + i] = (pt_entry_t)
828 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
829
830 for (i = 0; i < NKPT; i++)
831 PTD[i] = 0;
832 pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
833
834 /* number of APs actually started */
835 return mp_naps;
836 }
837
838 /*
839 * load the 1st level AP boot code into base memory.
840 */
841
842 /* targets for relocation */
843 extern void bigJump(void);
844 extern void bootCodeSeg(void);
845 extern void bootDataSeg(void);
846 extern void MPentry(void);
847 extern u_int MP_GDT;
848 extern u_int mp_gdtbase;
849
850 static void
851 install_ap_tramp(void)
852 {
853 int x;
854 int size = *(int *) ((u_long) & bootMP_size);
855 vm_offset_t va = boot_address + KERNBASE;
856 u_char *src = (u_char *) ((u_long) bootMP);
857 u_char *dst = (u_char *) va;
858 u_int boot_base = (u_int) bootMP;
859 u_int8_t *dst8;
860 u_int16_t *dst16;
861 u_int32_t *dst32;
862
863 KASSERT (size <= PAGE_SIZE,
864 ("'size' do not fit into PAGE_SIZE, as expected."));
865 pmap_kenter(va, boot_address);
866 pmap_invalidate_page (kernel_pmap, va);
867 for (x = 0; x < size; ++x)
868 *dst++ = *src++;
869
870 /*
871 * modify addresses in code we just moved to basemem. unfortunately we
872 * need fairly detailed info about mpboot.s for this to work. changes
873 * to mpboot.s might require changes here.
874 */
875
876 /* boot code is located in KERNEL space */
877 dst = (u_char *) va;
878
879 /* modify the lgdt arg */
880 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
881 *dst32 = boot_address + ((u_int) & MP_GDT - boot_base);
882
883 /* modify the ljmp target for MPentry() */
884 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
885 *dst32 = ((u_int) MPentry - KERNBASE);
886
887 /* modify the target for boot code segment */
888 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
889 dst8 = (u_int8_t *) (dst16 + 1);
890 *dst16 = (u_int) boot_address & 0xffff;
891 *dst8 = ((u_int) boot_address >> 16) & 0xff;
892
893 /* modify the target for boot data segment */
894 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
895 dst8 = (u_int8_t *) (dst16 + 1);
896 *dst16 = (u_int) boot_address & 0xffff;
897 *dst8 = ((u_int) boot_address >> 16) & 0xff;
898 }
899
900 /*
901 * This function starts the AP (application processor) identified
902 * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
903 * to accomplish this. This is necessary because of the nuances
904 * of the different hardware we might encounter. It isn't pretty,
905 * but it seems to work.
906 */
907 static int
908 start_ap(int apic_id)
909 {
910 int vector, ms;
911 int cpus;
912
913 /* calculate the vector */
914 vector = (boot_address >> 12) & 0xff;
915
916 /* used as a watchpoint to signal AP startup */
917 cpus = mp_naps;
918
919 /*
920 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
921 * and running the target CPU. OR this INIT IPI might be latched (P5
922 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
923 * ignored.
924 */
925
926 /* do an INIT IPI: assert RESET */
927 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
928 APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
929
930 /* wait for pending status end */
931 lapic_ipi_wait(-1);
932
933 /* do an INIT IPI: deassert RESET */
934 lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
935 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
936
937 /* wait for pending status end */
938 DELAY(10000); /* wait ~10mS */
939 lapic_ipi_wait(-1);
940
941 /*
942 * next we do a STARTUP IPI: the previous INIT IPI might still be
943 * latched, (P5 bug) this 1st STARTUP would then terminate
944 * immediately, and the previously started INIT IPI would continue. OR
945 * the previous INIT IPI has already run. and this STARTUP IPI will
946 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
947 * will run.
948 */
949
950 /* do a STARTUP IPI */
951 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
952 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
953 vector, apic_id);
954 lapic_ipi_wait(-1);
955 DELAY(200); /* wait ~200uS */
956
957 /*
958 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
959 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
960 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
961 * recognized after hardware RESET or INIT IPI.
962 */
963
964 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
965 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
966 vector, apic_id);
967 lapic_ipi_wait(-1);
968 DELAY(200); /* wait ~200uS */
969
970 /* Wait up to 5 seconds for it to start. */
971 for (ms = 0; ms < 5000; ms++) {
972 if (mp_naps > cpus)
973 return 1; /* return SUCCESS */
974 DELAY(1000);
975 }
976 return 0; /* return FAILURE */
977 }
978
979 #ifdef COUNT_XINVLTLB_HITS
980 u_int xhits_gbl[MAXCPU];
981 u_int xhits_pg[MAXCPU];
982 u_int xhits_rng[MAXCPU];
983 SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
984 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
985 sizeof(xhits_gbl), "IU", "");
986 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
987 sizeof(xhits_pg), "IU", "");
988 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
989 sizeof(xhits_rng), "IU", "");
990
991 u_int ipi_global;
992 u_int ipi_page;
993 u_int ipi_range;
994 u_int ipi_range_size;
995 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
996 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
997 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
998 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
999 0, "");
1000
1001 u_int ipi_masked_global;
1002 u_int ipi_masked_page;
1003 u_int ipi_masked_range;
1004 u_int ipi_masked_range_size;
1005 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
1006 &ipi_masked_global, 0, "");
1007 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
1008 &ipi_masked_page, 0, "");
1009 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
1010 &ipi_masked_range, 0, "");
1011 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
1012 &ipi_masked_range_size, 0, "");
1013 #endif /* COUNT_XINVLTLB_HITS */
1014
1015 /*
1016 * Flush the TLB on all other CPU's
1017 */
1018 static void
1019 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
1020 {
1021 u_int ncpu;
1022
1023 ncpu = mp_ncpus - 1; /* does not shootdown self */
1024 if (ncpu < 1)
1025 return; /* no other cpus */
1026 if (!(read_eflags() & PSL_I))
1027 panic("%s: interrupts disabled", __func__);
1028 mtx_lock_spin(&smp_ipi_mtx);
1029 smp_tlb_addr1 = addr1;
1030 smp_tlb_addr2 = addr2;
1031 atomic_store_rel_int(&smp_tlb_wait, 0);
1032 ipi_all_but_self(vector);
1033 while (smp_tlb_wait < ncpu)
1034 ia32_pause();
1035 mtx_unlock_spin(&smp_ipi_mtx);
1036 }
1037
1038 static void
1039 smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
1040 {
1041 int ncpu, othercpus;
1042
1043 othercpus = mp_ncpus - 1;
1044 if (mask == (u_int)-1) {
1045 ncpu = othercpus;
1046 if (ncpu < 1)
1047 return;
1048 } else {
1049 mask &= ~PCPU_GET(cpumask);
1050 if (mask == 0)
1051 return;
1052 ncpu = bitcount32(mask);
1053 if (ncpu > othercpus) {
1054 /* XXX this should be a panic offence */
1055 printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
1056 ncpu, othercpus);
1057 ncpu = othercpus;
1058 }
1059 /* XXX should be a panic, implied by mask == 0 above */
1060 if (ncpu < 1)
1061 return;
1062 }
1063 if (!(read_eflags() & PSL_I))
1064 panic("%s: interrupts disabled", __func__);
1065 mtx_lock_spin(&smp_ipi_mtx);
1066 smp_tlb_addr1 = addr1;
1067 smp_tlb_addr2 = addr2;
1068 atomic_store_rel_int(&smp_tlb_wait, 0);
1069 if (mask == (u_int)-1)
1070 ipi_all_but_self(vector);
1071 else
1072 ipi_selected(mask, vector);
1073 while (smp_tlb_wait < ncpu)
1074 ia32_pause();
1075 mtx_unlock_spin(&smp_ipi_mtx);
1076 }
1077
1078 void
1079 smp_cache_flush(void)
1080 {
1081
1082 if (smp_started)
1083 smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
1084 }
1085
1086 void
1087 smp_invltlb(void)
1088 {
1089
1090 if (smp_started) {
1091 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
1092 #ifdef COUNT_XINVLTLB_HITS
1093 ipi_global++;
1094 #endif
1095 }
1096 }
1097
1098 void
1099 smp_invlpg(vm_offset_t addr)
1100 {
1101
1102 if (smp_started) {
1103 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
1104 #ifdef COUNT_XINVLTLB_HITS
1105 ipi_page++;
1106 #endif
1107 }
1108 }
1109
1110 void
1111 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
1112 {
1113
1114 if (smp_started) {
1115 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
1116 #ifdef COUNT_XINVLTLB_HITS
1117 ipi_range++;
1118 ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
1119 #endif
1120 }
1121 }
1122
1123 void
1124 smp_masked_invltlb(u_int mask)
1125 {
1126
1127 if (smp_started) {
1128 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
1129 #ifdef COUNT_XINVLTLB_HITS
1130 ipi_masked_global++;
1131 #endif
1132 }
1133 }
1134
1135 void
1136 smp_masked_invlpg(u_int mask, vm_offset_t addr)
1137 {
1138
1139 if (smp_started) {
1140 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
1141 #ifdef COUNT_XINVLTLB_HITS
1142 ipi_masked_page++;
1143 #endif
1144 }
1145 }
1146
1147 void
1148 smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
1149 {
1150
1151 if (smp_started) {
1152 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
1153 #ifdef COUNT_XINVLTLB_HITS
1154 ipi_masked_range++;
1155 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
1156 #endif
1157 }
1158 }
1159
1160 void
1161 ipi_bitmap_handler(struct trapframe frame)
1162 {
1163 int cpu = PCPU_GET(cpuid);
1164 u_int ipi_bitmap;
1165
1166 ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
1167
1168 if (ipi_bitmap & (1 << IPI_PREEMPT)) {
1169 struct thread *running_thread = curthread;
1170 #ifdef COUNT_IPIS
1171 (*ipi_preempt_counts[cpu])++;
1172 #endif
1173 thread_lock(running_thread);
1174 if (running_thread->td_critnest > 1)
1175 running_thread->td_owepreempt = 1;
1176 else
1177 mi_switch(SW_INVOL | SW_PREEMPT, NULL);
1178 thread_unlock(running_thread);
1179 }
1180
1181 if (ipi_bitmap & (1 << IPI_AST)) {
1182 #ifdef COUNT_IPIS
1183 (*ipi_ast_counts[cpu])++;
1184 #endif
1185 /* Nothing to do for AST */
1186 }
1187 }
1188
1189 /*
1190 * send an IPI to a set of cpus.
1191 */
1192 void
1193 ipi_selected(u_int32_t cpus, u_int ipi)
1194 {
1195 int cpu;
1196 u_int bitmap = 0;
1197 u_int old_pending;
1198 u_int new_pending;
1199
1200 if (IPI_IS_BITMAPED(ipi)) {
1201 bitmap = 1 << ipi;
1202 ipi = IPI_BITMAP_VECTOR;
1203 }
1204
1205 #ifdef STOP_NMI
1206 if (ipi == IPI_STOP && stop_cpus_with_nmi) {
1207 ipi_nmi_selected(cpus);
1208 return;
1209 }
1210 #endif
1211 CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
1212 while ((cpu = ffs(cpus)) != 0) {
1213 cpu--;
1214 cpus &= ~(1 << cpu);
1215
1216 KASSERT(cpu_apic_ids[cpu] != -1,
1217 ("IPI to non-existent CPU %d", cpu));
1218
1219 if (bitmap) {
1220 do {
1221 old_pending = cpu_ipi_pending[cpu];
1222 new_pending = old_pending | bitmap;
1223 } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending));
1224
1225 if (old_pending)
1226 continue;
1227 }
1228
1229 lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
1230 }
1231
1232 }
1233
1234 /*
1235 * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
1236 */
1237 void
1238 ipi_all(u_int ipi)
1239 {
1240
1241 if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
1242 ipi_selected(all_cpus, ipi);
1243 return;
1244 }
1245 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
1246 lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
1247 }
1248
1249 /*
1250 * send an IPI to all CPUs EXCEPT myself
1251 */
1252 void
1253 ipi_all_but_self(u_int ipi)
1254 {
1255
1256 if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
1257 ipi_selected(PCPU_GET(other_cpus), ipi);
1258 return;
1259 }
1260 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
1261 lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
1262 }
1263
1264 /*
1265 * send an IPI to myself
1266 */
1267 void
1268 ipi_self(u_int ipi)
1269 {
1270
1271 if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
1272 ipi_selected(PCPU_GET(cpumask), ipi);
1273 return;
1274 }
1275 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
1276 lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
1277 }
1278
1279 #ifdef STOP_NMI
1280 /*
1281 * send NMI IPI to selected CPUs
1282 */
1283
1284 #define BEFORE_SPIN 1000000
1285
1286 void
1287 ipi_nmi_selected(u_int32_t cpus)
1288 {
1289 int cpu;
1290 register_t icrlo;
1291
1292 icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT
1293 | APIC_TRIGMOD_EDGE;
1294
1295 CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus);
1296
1297 atomic_set_int(&ipi_nmi_pending, cpus);
1298
1299 while ((cpu = ffs(cpus)) != 0) {
1300 cpu--;
1301 cpus &= ~(1 << cpu);
1302
1303 KASSERT(cpu_apic_ids[cpu] != -1,
1304 ("IPI NMI to non-existent CPU %d", cpu));
1305
1306 /* Wait for an earlier IPI to finish. */
1307 if (!lapic_ipi_wait(BEFORE_SPIN))
1308 panic("ipi_nmi_selected: previous IPI has not cleared");
1309
1310 lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]);
1311 }
1312 }
1313
1314 int
1315 ipi_nmi_handler(void)
1316 {
1317 int cpumask = PCPU_GET(cpumask);
1318
1319 if (!(ipi_nmi_pending & cpumask))
1320 return 1;
1321
1322 atomic_clear_int(&ipi_nmi_pending, cpumask);
1323 cpustop_handler();
1324 return 0;
1325 }
1326
1327 #endif /* STOP_NMI */
1328
1329 /*
1330 * Handle an IPI_STOP by saving our current context and spinning until we
1331 * are resumed.
1332 */
1333 void
1334 cpustop_handler(void)
1335 {
1336 int cpu = PCPU_GET(cpuid);
1337 int cpumask = PCPU_GET(cpumask);
1338
1339 savectx(&stoppcbs[cpu]);
1340
1341 /* Indicate that we are stopped */
1342 atomic_set_int(&stopped_cpus, cpumask);
1343
1344 /* Wait for restart */
1345 while (!(started_cpus & cpumask))
1346 ia32_pause();
1347
1348 atomic_clear_int(&started_cpus, cpumask);
1349 atomic_clear_int(&stopped_cpus, cpumask);
1350
1351 if (cpu == 0 && cpustop_restartfunc != NULL) {
1352 cpustop_restartfunc();
1353 cpustop_restartfunc = NULL;
1354 }
1355 }
1356
1357 /*
1358 * This is called once the rest of the system is up and running and we're
1359 * ready to let the AP's out of the pen.
1360 */
1361 static void
1362 release_aps(void *dummy __unused)
1363 {
1364
1365 if (mp_ncpus == 1)
1366 return;
1367 atomic_store_rel_int(&aps_ready, 1);
1368 while (smp_started == 0)
1369 ia32_pause();
1370 }
1371 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
1372
1373 static int
1374 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
1375 {
1376 u_int mask;
1377 int error;
1378
1379 mask = hlt_cpus_mask;
1380 error = sysctl_handle_int(oidp, &mask, 0, req);
1381 if (error || !req->newptr)
1382 return (error);
1383
1384 if (logical_cpus_mask != 0 &&
1385 (mask & logical_cpus_mask) == logical_cpus_mask)
1386 hlt_logical_cpus = 1;
1387 else
1388 hlt_logical_cpus = 0;
1389
1390 if (! hyperthreading_allowed)
1391 mask |= hyperthreading_cpus_mask;
1392
1393 if ((mask & all_cpus) == all_cpus)
1394 mask &= ~(1<<0);
1395 hlt_cpus_mask = mask;
1396 return (error);
1397 }
1398 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
1399 0, 0, sysctl_hlt_cpus, "IU",
1400 "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2.");
1401
1402 static int
1403 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
1404 {
1405 int disable, error;
1406
1407 disable = hlt_logical_cpus;
1408 error = sysctl_handle_int(oidp, &disable, 0, req);
1409 if (error || !req->newptr)
1410 return (error);
1411
1412 if (disable)
1413 hlt_cpus_mask |= logical_cpus_mask;
1414 else
1415 hlt_cpus_mask &= ~logical_cpus_mask;
1416
1417 if (! hyperthreading_allowed)
1418 hlt_cpus_mask |= hyperthreading_cpus_mask;
1419
1420 if ((hlt_cpus_mask & all_cpus) == all_cpus)
1421 hlt_cpus_mask &= ~(1<<0);
1422
1423 hlt_logical_cpus = disable;
1424 return (error);
1425 }
1426
1427 static int
1428 sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
1429 {
1430 int allowed, error;
1431
1432 allowed = hyperthreading_allowed;
1433 error = sysctl_handle_int(oidp, &allowed, 0, req);
1434 if (error || !req->newptr)
1435 return (error);
1436
1437 if (allowed)
1438 hlt_cpus_mask &= ~hyperthreading_cpus_mask;
1439 else
1440 hlt_cpus_mask |= hyperthreading_cpus_mask;
1441
1442 if (logical_cpus_mask != 0 &&
1443 (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
1444 hlt_logical_cpus = 1;
1445 else
1446 hlt_logical_cpus = 0;
1447
1448 if ((hlt_cpus_mask & all_cpus) == all_cpus)
1449 hlt_cpus_mask &= ~(1<<0);
1450
1451 hyperthreading_allowed = allowed;
1452 return (error);
1453 }
1454
1455 static void
1456 cpu_hlt_setup(void *dummy __unused)
1457 {
1458
1459 if (logical_cpus_mask != 0) {
1460 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
1461 &hlt_logical_cpus);
1462 sysctl_ctx_init(&logical_cpu_clist);
1463 SYSCTL_ADD_PROC(&logical_cpu_clist,
1464 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
1465 "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
1466 sysctl_hlt_logical_cpus, "IU", "");
1467 SYSCTL_ADD_UINT(&logical_cpu_clist,
1468 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
1469 "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
1470 &logical_cpus_mask, 0, "");
1471
1472 if (hlt_logical_cpus)
1473 hlt_cpus_mask |= logical_cpus_mask;
1474
1475 /*
1476 * If necessary for security purposes, force
1477 * hyperthreading off, regardless of the value
1478 * of hlt_logical_cpus.
1479 */
1480 if (hyperthreading_cpus_mask) {
1481 TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
1482 &hyperthreading_allowed);
1483 SYSCTL_ADD_PROC(&logical_cpu_clist,
1484 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
1485 "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
1486 0, 0, sysctl_hyperthreading_allowed, "IU", "");
1487 if (! hyperthreading_allowed)
1488 hlt_cpus_mask |= hyperthreading_cpus_mask;
1489 }
1490 }
1491 }
1492 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
1493
1494 int
1495 mp_grab_cpu_hlt(void)
1496 {
1497 u_int mask = PCPU_GET(cpumask);
1498 #ifdef MP_WATCHDOG
1499 u_int cpuid = PCPU_GET(cpuid);
1500 #endif
1501 int retval;
1502
1503 #ifdef MP_WATCHDOG
1504 ap_watchdog(cpuid);
1505 #endif
1506
1507 retval = mask & hlt_cpus_mask;
1508 while (mask & hlt_cpus_mask)
1509 __asm __volatile("sti; hlt" : : : "memory");
1510 return (retval);
1511 }
1512
1513 #ifdef COUNT_IPIS
1514 /*
1515 * Setup interrupt counters for IPI handlers.
1516 */
1517 static void
1518 mp_ipi_intrcnt(void *dummy)
1519 {
1520 char buf[64];
1521 int i;
1522
1523 for (i = 0; i < mp_maxid; i++) {
1524 if (CPU_ABSENT(i))
1525 continue;
1526 snprintf(buf, sizeof(buf), "cpu%d: invltlb", i);
1527 intrcnt_add(buf, &ipi_invltlb_counts[i]);
1528 snprintf(buf, sizeof(buf), "cpu%d: invlrng", i);
1529 intrcnt_add(buf, &ipi_invlrng_counts[i]);
1530 snprintf(buf, sizeof(buf), "cpu%d: invlpg", i);
1531 intrcnt_add(buf, &ipi_invlpg_counts[i]);
1532 snprintf(buf, sizeof(buf), "cpu%d: preempt", i);
1533 intrcnt_add(buf, &ipi_preempt_counts[i]);
1534 snprintf(buf, sizeof(buf), "cpu%d: ast", i);
1535 intrcnt_add(buf, &ipi_ast_counts[i]);
1536 snprintf(buf, sizeof(buf), "cpu%d: rendezvous", i);
1537 intrcnt_add(buf, &ipi_rendezvous_counts[i]);
1538 snprintf(buf, sizeof(buf), "cpu%d: lazypmap", i);
1539 intrcnt_add(buf, &ipi_lazypmap_counts[i]);
1540 }
1541 }
1542 SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL)
1543 #endif
Cache object: 2acbc5133982a9af0d59fbb69c7a08aa
|