1 /*-
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26 #include <sys/cdefs.h>
27 __FBSDID("$FreeBSD$");
28
29 #include "opt_apic.h"
30 #include "opt_cpu.h"
31 #include "opt_kstack_pages.h"
32 #include "opt_pmap.h"
33 #include "opt_sched.h"
34 #include "opt_smp.h"
35
36 #if !defined(lint)
37 #if !defined(SMP)
38 #error How did you get here?
39 #endif
40
41 #ifndef DEV_APIC
42 #error The apic device is required for SMP, add "device apic" to your config file.
43 #endif
44 #if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
45 #error SMP not supported with CPU_DISABLE_CMPXCHG
46 #endif
47 #endif /* not lint */
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/bus.h>
52 #include <sys/cons.h> /* cngetc() */
53 #include <sys/cpuset.h>
54 #ifdef GPROF
55 #include <sys/gmon.h>
56 #endif
57 #include <sys/kernel.h>
58 #include <sys/ktr.h>
59 #include <sys/lock.h>
60 #include <sys/malloc.h>
61 #include <sys/memrange.h>
62 #include <sys/mutex.h>
63 #include <sys/pcpu.h>
64 #include <sys/proc.h>
65 #include <sys/sched.h>
66 #include <sys/smp.h>
67 #include <sys/sysctl.h>
68
69 #include <vm/vm.h>
70 #include <vm/vm_param.h>
71 #include <vm/pmap.h>
72 #include <vm/vm_kern.h>
73 #include <vm/vm_extern.h>
74
75 #include <x86/apicreg.h>
76 #include <machine/clock.h>
77 #include <machine/cputypes.h>
78 #include <x86/mca.h>
79 #include <machine/md_var.h>
80 #include <machine/pcb.h>
81 #include <machine/psl.h>
82 #include <machine/smp.h>
83 #include <machine/specialreg.h>
84 #include <machine/cpu.h>
85
86 #define WARMBOOT_TARGET 0
87 #define WARMBOOT_OFF (KERNBASE + 0x0467)
88 #define WARMBOOT_SEG (KERNBASE + 0x0469)
89
90 #define CMOS_REG (0x70)
91 #define CMOS_DATA (0x71)
92 #define BIOS_RESET (0x0f)
93 #define BIOS_WARM (0x0a)
94
95 /*
96 * this code MUST be enabled here and in mpboot.s.
97 * it follows the very early stages of AP boot by placing values in CMOS ram.
98 * it NORMALLY will never be needed and thus the primitive method for enabling.
99 *
100 #define CHECK_POINTS
101 */
102
103 #if defined(CHECK_POINTS) && !defined(PC98)
104 #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
105 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
106
107 #define CHECK_INIT(D); \
108 CHECK_WRITE(0x34, (D)); \
109 CHECK_WRITE(0x35, (D)); \
110 CHECK_WRITE(0x36, (D)); \
111 CHECK_WRITE(0x37, (D)); \
112 CHECK_WRITE(0x38, (D)); \
113 CHECK_WRITE(0x39, (D));
114
115 #define CHECK_PRINT(S); \
116 printf("%s: %d, %d, %d, %d, %d, %d\n", \
117 (S), \
118 CHECK_READ(0x34), \
119 CHECK_READ(0x35), \
120 CHECK_READ(0x36), \
121 CHECK_READ(0x37), \
122 CHECK_READ(0x38), \
123 CHECK_READ(0x39));
124
125 #else /* CHECK_POINTS */
126
127 #define CHECK_INIT(D)
128 #define CHECK_PRINT(S)
129 #define CHECK_WRITE(A, D)
130
131 #endif /* CHECK_POINTS */
132
133 /* lock region used by kernel profiling */
134 int mcount_lock;
135
136 int mp_naps; /* # of Applications processors */
137 int boot_cpu_id = -1; /* designated BSP */
138
139 extern struct pcpu __pcpu[];
140
141 /* AP uses this during bootstrap. Do not staticize. */
142 char *bootSTK;
143 static int bootAP;
144
145 /* Free these after use */
146 void *bootstacks[MAXCPU];
147 static void *dpcpu;
148
149 struct pcb stoppcbs[MAXCPU];
150 struct susppcb **susppcbs;
151
152 /* Variables needed for SMP tlb shootdown. */
153 vm_offset_t smp_tlb_addr1;
154 vm_offset_t smp_tlb_addr2;
155 volatile int smp_tlb_wait;
156
157 #ifdef COUNT_IPIS
158 /* Interrupt counts. */
159 static u_long *ipi_preempt_counts[MAXCPU];
160 static u_long *ipi_ast_counts[MAXCPU];
161 u_long *ipi_invltlb_counts[MAXCPU];
162 u_long *ipi_invlrng_counts[MAXCPU];
163 u_long *ipi_invlpg_counts[MAXCPU];
164 u_long *ipi_invlcache_counts[MAXCPU];
165 u_long *ipi_rendezvous_counts[MAXCPU];
166 u_long *ipi_lazypmap_counts[MAXCPU];
167 static u_long *ipi_hardclock_counts[MAXCPU];
168 #endif
169
170 /* Default cpu_ops implementation. */
171 struct cpu_ops cpu_ops = {
172 .ipi_vectored = lapic_ipi_vectored
173 };
174
175 /*
176 * Local data and functions.
177 */
178
179 static volatile cpuset_t ipi_nmi_pending;
180
181 volatile cpuset_t resuming_cpus;
182 volatile cpuset_t toresume_cpus;
183
184 /* used to hold the AP's until we are ready to release them */
185 static struct mtx ap_boot_mtx;
186
187 /* Set to 1 once we're ready to let the APs out of the pen. */
188 static volatile int aps_ready = 0;
189
190 /*
191 * Store data from cpu_add() until later in the boot when we actually setup
192 * the APs.
193 */
194 struct cpu_info {
195 int cpu_present:1;
196 int cpu_bsp:1;
197 int cpu_disabled:1;
198 int cpu_hyperthread:1;
199 } static cpu_info[MAX_APIC_ID + 1];
200 int cpu_apic_ids[MAXCPU];
201 int apic_cpuids[MAX_APIC_ID + 1];
202
203 /* Holds pending bitmap based IPIs per CPU */
204 volatile u_int cpu_ipi_pending[MAXCPU];
205
206 static u_int boot_address;
207 static int cpu_logical; /* logical cpus per core */
208 static int cpu_cores; /* cores per package */
209
210 static void assign_cpu_ids(void);
211 static void install_ap_tramp(void);
212 static void set_interrupt_apic_ids(void);
213 static int start_all_aps(void);
214 static int start_ap(int apic_id);
215 static void release_aps(void *dummy);
216
217 static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */
218 static int hyperthreading_allowed = 1;
219
220 static void
221 mem_range_AP_init(void)
222 {
223 if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
224 mem_range_softc.mr_op->initAP(&mem_range_softc);
225 }
226
227 static void
228 topo_probe_amd(void)
229 {
230 int core_id_bits;
231 int id;
232
233 /* AMD processors do not support HTT. */
234 cpu_logical = 1;
235
236 if ((amd_feature2 & AMDID2_CMP) == 0) {
237 cpu_cores = 1;
238 return;
239 }
240
241 core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
242 AMDID_COREID_SIZE_SHIFT;
243 if (core_id_bits == 0) {
244 cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1;
245 return;
246 }
247
248 /* Fam 10h and newer should get here. */
249 for (id = 0; id <= MAX_APIC_ID; id++) {
250 /* Check logical CPU availability. */
251 if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
252 continue;
253 /* Check if logical CPU has the same package ID. */
254 if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits))
255 continue;
256 cpu_cores++;
257 }
258 }
259
260 /*
261 * Round up to the next power of two, if necessary, and then
262 * take log2.
263 * Returns -1 if argument is zero.
264 */
265 static __inline int
266 mask_width(u_int x)
267 {
268
269 return (fls(x << (1 - powerof2(x))) - 1);
270 }
271
272 static void
273 topo_probe_0x4(void)
274 {
275 u_int p[4];
276 int pkg_id_bits;
277 int core_id_bits;
278 int max_cores;
279 int max_logical;
280 int id;
281
282 /* Both zero and one here mean one logical processor per package. */
283 max_logical = (cpu_feature & CPUID_HTT) != 0 ?
284 (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1;
285 if (max_logical <= 1)
286 return;
287
288 /*
289 * Because of uniformity assumption we examine only
290 * those logical processors that belong to the same
291 * package as BSP. Further, we count number of
292 * logical processors that belong to the same core
293 * as BSP thus deducing number of threads per core.
294 */
295 if (cpu_high >= 0x4) {
296 cpuid_count(0x04, 0, p);
297 max_cores = ((p[0] >> 26) & 0x3f) + 1;
298 } else
299 max_cores = 1;
300 core_id_bits = mask_width(max_logical/max_cores);
301 if (core_id_bits < 0)
302 return;
303 pkg_id_bits = core_id_bits + mask_width(max_cores);
304
305 for (id = 0; id <= MAX_APIC_ID; id++) {
306 /* Check logical CPU availability. */
307 if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
308 continue;
309 /* Check if logical CPU has the same package ID. */
310 if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits))
311 continue;
312 cpu_cores++;
313 /* Check if logical CPU has the same package and core IDs. */
314 if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits))
315 cpu_logical++;
316 }
317
318 KASSERT(cpu_cores >= 1 && cpu_logical >= 1,
319 ("topo_probe_0x4 couldn't find BSP"));
320
321 cpu_cores /= cpu_logical;
322 hyperthreading_cpus = cpu_logical;
323 }
324
325 static void
326 topo_probe_0xb(void)
327 {
328 u_int p[4];
329 int bits;
330 int cnt;
331 int i;
332 int logical;
333 int type;
334 int x;
335
336 /* We only support three levels for now. */
337 for (i = 0; i < 3; i++) {
338 cpuid_count(0x0b, i, p);
339
340 /* Fall back if CPU leaf 11 doesn't really exist. */
341 if (i == 0 && p[1] == 0) {
342 topo_probe_0x4();
343 return;
344 }
345
346 bits = p[0] & 0x1f;
347 logical = p[1] &= 0xffff;
348 type = (p[2] >> 8) & 0xff;
349 if (type == 0 || logical == 0)
350 break;
351 /*
352 * Because of uniformity assumption we examine only
353 * those logical processors that belong to the same
354 * package as BSP.
355 */
356 for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) {
357 if (!cpu_info[x].cpu_present ||
358 cpu_info[x].cpu_disabled)
359 continue;
360 if (x >> bits == boot_cpu_id >> bits)
361 cnt++;
362 }
363 if (type == CPUID_TYPE_SMT)
364 cpu_logical = cnt;
365 else if (type == CPUID_TYPE_CORE)
366 cpu_cores = cnt;
367 }
368 if (cpu_logical == 0)
369 cpu_logical = 1;
370 cpu_cores /= cpu_logical;
371 }
372
373 /*
374 * Both topology discovery code and code that consumes topology
375 * information assume top-down uniformity of the topology.
376 * That is, all physical packages must be identical and each
377 * core in a package must have the same number of threads.
378 * Topology information is queried only on BSP, on which this
379 * code runs and for which it can query CPUID information.
380 * Then topology is extrapolated on all packages using the
381 * uniformity assumption.
382 */
383 static void
384 topo_probe(void)
385 {
386 static int cpu_topo_probed = 0;
387
388 if (cpu_topo_probed)
389 return;
390
391 CPU_ZERO(&logical_cpus_mask);
392 if (mp_ncpus <= 1)
393 cpu_cores = cpu_logical = 1;
394 else if (cpu_vendor_id == CPU_VENDOR_AMD)
395 topo_probe_amd();
396 else if (cpu_vendor_id == CPU_VENDOR_INTEL) {
397 /*
398 * See Intel(R) 64 Architecture Processor
399 * Topology Enumeration article for details.
400 *
401 * Note that 0x1 <= cpu_high < 4 case should be
402 * compatible with topo_probe_0x4() logic when
403 * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
404 * or it should trigger the fallback otherwise.
405 */
406 if (cpu_high >= 0xb)
407 topo_probe_0xb();
408 else if (cpu_high >= 0x1)
409 topo_probe_0x4();
410 }
411
412 /*
413 * Fallback: assume each logical CPU is in separate
414 * physical package. That is, no multi-core, no SMT.
415 */
416 if (cpu_cores == 0 || cpu_logical == 0)
417 cpu_cores = cpu_logical = 1;
418 cpu_topo_probed = 1;
419 }
420
421 struct cpu_group *
422 cpu_topo(void)
423 {
424 int cg_flags;
425
426 /*
427 * Determine whether any threading flags are
428 * necessry.
429 */
430 topo_probe();
431 if (cpu_logical > 1 && hyperthreading_cpus)
432 cg_flags = CG_FLAG_HTT;
433 else if (cpu_logical > 1)
434 cg_flags = CG_FLAG_SMT;
435 else
436 cg_flags = 0;
437 if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
438 printf("WARNING: Non-uniform processors.\n");
439 printf("WARNING: Using suboptimal topology.\n");
440 return (smp_topo_none());
441 }
442 /*
443 * No multi-core or hyper-threaded.
444 */
445 if (cpu_logical * cpu_cores == 1)
446 return (smp_topo_none());
447 /*
448 * Only HTT no multi-core.
449 */
450 if (cpu_logical > 1 && cpu_cores == 1)
451 return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags));
452 /*
453 * Only multi-core no HTT.
454 */
455 if (cpu_cores > 1 && cpu_logical == 1)
456 return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags));
457 /*
458 * Both HTT and multi-core.
459 */
460 return (smp_topo_2level(CG_SHARE_L2, cpu_cores,
461 CG_SHARE_L1, cpu_logical, cg_flags));
462 }
463
464
465 /*
466 * Calculate usable address in base memory for AP trampoline code.
467 */
468 u_int
469 mp_bootaddress(u_int basemem)
470 {
471
472 boot_address = trunc_page(basemem); /* round down to 4k boundary */
473 if ((basemem - boot_address) < bootMP_size)
474 boot_address -= PAGE_SIZE; /* not enough, lower by 4k */
475
476 return boot_address;
477 }
478
479 void
480 cpu_add(u_int apic_id, char boot_cpu)
481 {
482
483 if (apic_id > MAX_APIC_ID) {
484 panic("SMP: APIC ID %d too high", apic_id);
485 return;
486 }
487 KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
488 apic_id));
489 cpu_info[apic_id].cpu_present = 1;
490 if (boot_cpu) {
491 KASSERT(boot_cpu_id == -1,
492 ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
493 boot_cpu_id));
494 boot_cpu_id = apic_id;
495 cpu_info[apic_id].cpu_bsp = 1;
496 }
497 if (mp_ncpus < MAXCPU) {
498 mp_ncpus++;
499 mp_maxid = mp_ncpus - 1;
500 }
501 if (bootverbose)
502 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
503 "AP");
504 }
505
506 void
507 cpu_mp_setmaxid(void)
508 {
509
510 /*
511 * mp_maxid should be already set by calls to cpu_add().
512 * Just sanity check its value here.
513 */
514 if (mp_ncpus == 0)
515 KASSERT(mp_maxid == 0,
516 ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
517 else if (mp_ncpus == 1)
518 mp_maxid = 0;
519 else
520 KASSERT(mp_maxid >= mp_ncpus - 1,
521 ("%s: counters out of sync: max %d, count %d", __func__,
522 mp_maxid, mp_ncpus));
523 }
524
525 int
526 cpu_mp_probe(void)
527 {
528
529 /*
530 * Always record BSP in CPU map so that the mbuf init code works
531 * correctly.
532 */
533 CPU_SETOF(0, &all_cpus);
534 if (mp_ncpus == 0) {
535 /*
536 * No CPUs were found, so this must be a UP system. Setup
537 * the variables to represent a system with a single CPU
538 * with an id of 0.
539 */
540 mp_ncpus = 1;
541 return (0);
542 }
543
544 /* At least one CPU was found. */
545 if (mp_ncpus == 1) {
546 /*
547 * One CPU was found, so this must be a UP system with
548 * an I/O APIC.
549 */
550 mp_maxid = 0;
551 return (0);
552 }
553
554 /* At least two CPUs were found. */
555 return (1);
556 }
557
558 /*
559 * Initialize the IPI handlers and start up the AP's.
560 */
561 void
562 cpu_mp_start(void)
563 {
564 int i;
565
566 /* Initialize the logical ID to APIC ID table. */
567 for (i = 0; i < MAXCPU; i++) {
568 cpu_apic_ids[i] = -1;
569 cpu_ipi_pending[i] = 0;
570 }
571
572 /* Install an inter-CPU IPI for TLB invalidation */
573 setidt(IPI_INVLTLB, IDTVEC(invltlb),
574 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
575 setidt(IPI_INVLPG, IDTVEC(invlpg),
576 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
577 setidt(IPI_INVLRNG, IDTVEC(invlrng),
578 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
579
580 /* Install an inter-CPU IPI for cache invalidation. */
581 setidt(IPI_INVLCACHE, IDTVEC(invlcache),
582 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
583
584 /* Install an inter-CPU IPI for lazy pmap release */
585 setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
586 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
587
588 /* Install an inter-CPU IPI for all-CPU rendezvous */
589 setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
590 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
591
592 /* Install generic inter-CPU IPI handler */
593 setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
594 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
595
596 /* Install an inter-CPU IPI for CPU stop/restart */
597 setidt(IPI_STOP, IDTVEC(cpustop),
598 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
599
600 /* Install an inter-CPU IPI for CPU suspend/resume */
601 setidt(IPI_SUSPEND, IDTVEC(cpususpend),
602 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
603
604 /* Set boot_cpu_id if needed. */
605 if (boot_cpu_id == -1) {
606 boot_cpu_id = PCPU_GET(apic_id);
607 cpu_info[boot_cpu_id].cpu_bsp = 1;
608 } else
609 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
610 ("BSP's APIC ID doesn't match boot_cpu_id"));
611
612 /* Probe logical/physical core configuration. */
613 topo_probe();
614
615 assign_cpu_ids();
616
617 /* Start each Application Processor */
618 start_all_aps();
619
620 set_interrupt_apic_ids();
621 }
622
623
624 /*
625 * Print various information about the SMP system hardware and setup.
626 */
627 void
628 cpu_mp_announce(void)
629 {
630 const char *hyperthread;
631 int i;
632
633 printf("FreeBSD/SMP: %d package(s) x %d core(s)",
634 mp_ncpus / (cpu_cores * cpu_logical), cpu_cores);
635 if (hyperthreading_cpus > 1)
636 printf(" x %d HTT threads", cpu_logical);
637 else if (cpu_logical > 1)
638 printf(" x %d SMT threads", cpu_logical);
639 printf("\n");
640
641 /* List active CPUs first. */
642 printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
643 for (i = 1; i < mp_ncpus; i++) {
644 if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread)
645 hyperthread = "/HT";
646 else
647 hyperthread = "";
648 printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread,
649 cpu_apic_ids[i]);
650 }
651
652 /* List disabled CPUs last. */
653 for (i = 0; i <= MAX_APIC_ID; i++) {
654 if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled)
655 continue;
656 if (cpu_info[i].cpu_hyperthread)
657 hyperthread = "/HT";
658 else
659 hyperthread = "";
660 printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread,
661 i);
662 }
663 }
664
665 /*
666 * AP CPU's call this to initialize themselves.
667 */
668 void
669 init_secondary(void)
670 {
671 struct pcpu *pc;
672 vm_offset_t addr;
673 int gsel_tss;
674 int x, myid;
675 u_int cpuid, cr0;
676
677 /* bootAP is set in start_ap() to our ID. */
678 myid = bootAP;
679
680 /* Get per-cpu data */
681 pc = &__pcpu[myid];
682
683 /* prime data page for it to use */
684 pcpu_init(pc, myid, sizeof(struct pcpu));
685 dpcpu_init(dpcpu, myid);
686 pc->pc_apic_id = cpu_apic_ids[myid];
687 pc->pc_prvspace = pc;
688 pc->pc_curthread = 0;
689
690 fix_cpuid();
691
692 gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
693 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
694
695 for (x = 0; x < NGDT; x++) {
696 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
697 }
698
699 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
700 r_gdt.rd_base = (int) &gdt[myid * NGDT];
701 lgdt(&r_gdt); /* does magic intra-segment return */
702
703 lidt(&r_idt);
704
705 lldt(_default_ldt);
706 PCPU_SET(currentldt, _default_ldt);
707
708 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
709 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
710 PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
711 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
712 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
713 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
714 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
715 ltr(gsel_tss);
716
717 PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd);
718
719 /*
720 * Set to a known state:
721 * Set by mpboot.s: CR0_PG, CR0_PE
722 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
723 */
724 cr0 = rcr0();
725 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
726 load_cr0(cr0);
727 CHECK_WRITE(0x38, 5);
728
729 /* Disable local APIC just to be sure. */
730 lapic_disable();
731
732 /* signal our startup to the BSP. */
733 mp_naps++;
734 CHECK_WRITE(0x39, 6);
735
736 /* Spin until the BSP releases the AP's. */
737 while (!aps_ready)
738 ia32_pause();
739
740 /* BSP may have changed PTD while we were waiting */
741 invltlb();
742 for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
743 invlpg(addr);
744
745 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
746 lidt(&r_idt);
747 #endif
748
749 /* Initialize the PAT MSR if present. */
750 pmap_init_pat();
751
752 /* set up CPU registers and state */
753 cpu_setregs();
754
755 /* set up SSE/NX */
756 initializecpu();
757
758 /* set up FPU state on the AP */
759 npxinit(false);
760
761 if (cpu_ops.cpu_init)
762 cpu_ops.cpu_init();
763
764 /* A quick check from sanity claus */
765 cpuid = PCPU_GET(cpuid);
766 if (PCPU_GET(apic_id) != lapic_id()) {
767 printf("SMP: cpuid = %d\n", cpuid);
768 printf("SMP: actual apic_id = %d\n", lapic_id());
769 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
770 panic("cpuid mismatch! boom!!");
771 }
772
773 /* Initialize curthread. */
774 KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
775 PCPU_SET(curthread, PCPU_GET(idlethread));
776
777 mca_init();
778
779 mtx_lock_spin(&ap_boot_mtx);
780
781 /* Init local apic for irq's */
782 lapic_setup(1);
783
784 /* Set memory range attributes for this CPU to match the BSP */
785 mem_range_AP_init();
786
787 smp_cpus++;
788
789 CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
790 printf("SMP: AP CPU #%d Launched!\n", cpuid);
791
792 /* Determine if we are a logical CPU. */
793 /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
794 if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
795 CPU_SET(cpuid, &logical_cpus_mask);
796
797 if (bootverbose)
798 lapic_dump("AP");
799
800 if (smp_cpus == mp_ncpus) {
801 /* enable IPI's, tlb shootdown, freezes etc */
802 atomic_store_rel_int(&smp_started, 1);
803 }
804
805 mtx_unlock_spin(&ap_boot_mtx);
806
807 /* Wait until all the AP's are up. */
808 while (smp_started == 0)
809 ia32_pause();
810
811 /* Start per-CPU event timers. */
812 cpu_initclocks_ap();
813
814 /* Enter the scheduler. */
815 sched_throw(NULL);
816
817 panic("scheduler returned us to %s", __func__);
818 /* NOTREACHED */
819 }
820
821 /*******************************************************************
822 * local functions and data
823 */
824
825 /*
826 * We tell the I/O APIC code about all the CPUs we want to receive
827 * interrupts. If we don't want certain CPUs to receive IRQs we
828 * can simply not tell the I/O APIC code about them in this function.
829 * We also do not tell it about the BSP since it tells itself about
830 * the BSP internally to work with UP kernels and on UP machines.
831 */
832 static void
833 set_interrupt_apic_ids(void)
834 {
835 u_int i, apic_id;
836
837 for (i = 0; i < MAXCPU; i++) {
838 apic_id = cpu_apic_ids[i];
839 if (apic_id == -1)
840 continue;
841 if (cpu_info[apic_id].cpu_bsp)
842 continue;
843 if (cpu_info[apic_id].cpu_disabled)
844 continue;
845
846 /* Don't let hyperthreads service interrupts. */
847 if (hyperthreading_cpus > 1 &&
848 apic_id % hyperthreading_cpus != 0)
849 continue;
850
851 intr_add_cpu(i);
852 }
853 }
854
855 /*
856 * Assign logical CPU IDs to local APICs.
857 */
858 static void
859 assign_cpu_ids(void)
860 {
861 u_int i;
862
863 TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
864 &hyperthreading_allowed);
865
866 /* Check for explicitly disabled CPUs. */
867 for (i = 0; i <= MAX_APIC_ID; i++) {
868 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
869 continue;
870
871 if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
872 cpu_info[i].cpu_hyperthread = 1;
873
874 /*
875 * Don't use HT CPU if it has been disabled by a
876 * tunable.
877 */
878 if (hyperthreading_allowed == 0) {
879 cpu_info[i].cpu_disabled = 1;
880 continue;
881 }
882 }
883
884 /* Don't use this CPU if it has been disabled by a tunable. */
885 if (resource_disabled("lapic", i)) {
886 cpu_info[i].cpu_disabled = 1;
887 continue;
888 }
889 }
890
891 if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
892 hyperthreading_cpus = 0;
893 cpu_logical = 1;
894 }
895
896 /*
897 * Assign CPU IDs to local APIC IDs and disable any CPUs
898 * beyond MAXCPU. CPU 0 is always assigned to the BSP.
899 *
900 * To minimize confusion for userland, we attempt to number
901 * CPUs such that all threads and cores in a package are
902 * grouped together. For now we assume that the BSP is always
903 * the first thread in a package and just start adding APs
904 * starting with the BSP's APIC ID.
905 */
906 mp_ncpus = 1;
907 cpu_apic_ids[0] = boot_cpu_id;
908 apic_cpuids[boot_cpu_id] = 0;
909 for (i = boot_cpu_id + 1; i != boot_cpu_id;
910 i == MAX_APIC_ID ? i = 0 : i++) {
911 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
912 cpu_info[i].cpu_disabled)
913 continue;
914
915 if (mp_ncpus < MAXCPU) {
916 cpu_apic_ids[mp_ncpus] = i;
917 apic_cpuids[i] = mp_ncpus;
918 mp_ncpus++;
919 } else
920 cpu_info[i].cpu_disabled = 1;
921 }
922 KASSERT(mp_maxid >= mp_ncpus - 1,
923 ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
924 mp_ncpus));
925 }
926
927 /*
928 * start each AP in our list
929 */
930 /* Lowest 1MB is already mapped: don't touch*/
931 #define TMPMAP_START 1
932 static int
933 start_all_aps(void)
934 {
935 #ifndef PC98
936 u_char mpbiosreason;
937 #endif
938 u_int32_t mpbioswarmvec;
939 int apic_id, cpu, i;
940
941 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
942
943 /* install the AP 1st level boot code */
944 install_ap_tramp();
945
946 /* save the current value of the warm-start vector */
947 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
948 #ifndef PC98
949 outb(CMOS_REG, BIOS_RESET);
950 mpbiosreason = inb(CMOS_DATA);
951 #endif
952
953 /* set up temporary P==V mapping for AP boot */
954 /* XXX this is a hack, we should boot the AP on its own stack/PTD */
955 for (i = TMPMAP_START; i < NKPT; i++)
956 PTD[i] = PTD[KPTDI + i];
957 invltlb();
958
959 /* start each AP */
960 for (cpu = 1; cpu < mp_ncpus; cpu++) {
961 apic_id = cpu_apic_ids[cpu];
962
963 /* allocate and set up a boot stack data page */
964 bootstacks[cpu] =
965 (char *)kmem_malloc(kernel_arena, KSTACK_PAGES * PAGE_SIZE,
966 M_WAITOK | M_ZERO);
967 dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
968 M_WAITOK | M_ZERO);
969 /* setup a vector to our boot code */
970 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
971 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
972 #ifndef PC98
973 outb(CMOS_REG, BIOS_RESET);
974 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
975 #endif
976
977 bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 4;
978 bootAP = cpu;
979
980 /* attempt to start the Application Processor */
981 CHECK_INIT(99); /* setup checkpoints */
982 if (!start_ap(apic_id)) {
983 printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
984 CHECK_PRINT("trace"); /* show checkpoints */
985 /* better panic as the AP may be running loose */
986 printf("panic y/n? [y] ");
987 if (cngetc() != 'n')
988 panic("bye-bye");
989 }
990 CHECK_PRINT("trace"); /* show checkpoints */
991
992 CPU_SET(cpu, &all_cpus); /* record AP in CPU map */
993 }
994
995 /* restore the warmstart vector */
996 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
997
998 #ifndef PC98
999 outb(CMOS_REG, BIOS_RESET);
1000 outb(CMOS_DATA, mpbiosreason);
1001 #endif
1002
1003 /* Undo V==P hack from above */
1004 for (i = TMPMAP_START; i < NKPT; i++)
1005 PTD[i] = 0;
1006 pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
1007
1008 /* number of APs actually started */
1009 return mp_naps;
1010 }
1011
1012 /*
1013 * load the 1st level AP boot code into base memory.
1014 */
1015
1016 /* targets for relocation */
1017 extern void bigJump(void);
1018 extern void bootCodeSeg(void);
1019 extern void bootDataSeg(void);
1020 extern void MPentry(void);
1021 extern u_int MP_GDT;
1022 extern u_int mp_gdtbase;
1023
1024 static void
1025 install_ap_tramp(void)
1026 {
1027 int x;
1028 int size = *(int *) ((u_long) & bootMP_size);
1029 vm_offset_t va = boot_address + KERNBASE;
1030 u_char *src = (u_char *) ((u_long) bootMP);
1031 u_char *dst = (u_char *) va;
1032 u_int boot_base = (u_int) bootMP;
1033 u_int8_t *dst8;
1034 u_int16_t *dst16;
1035 u_int32_t *dst32;
1036
1037 KASSERT (size <= PAGE_SIZE,
1038 ("'size' do not fit into PAGE_SIZE, as expected."));
1039 pmap_kenter(va, boot_address);
1040 pmap_invalidate_page (kernel_pmap, va);
1041 for (x = 0; x < size; ++x)
1042 *dst++ = *src++;
1043
1044 /*
1045 * modify addresses in code we just moved to basemem. unfortunately we
1046 * need fairly detailed info about mpboot.s for this to work. changes
1047 * to mpboot.s might require changes here.
1048 */
1049
1050 /* boot code is located in KERNEL space */
1051 dst = (u_char *) va;
1052
1053 /* modify the lgdt arg */
1054 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
1055 *dst32 = boot_address + ((u_int) & MP_GDT - boot_base);
1056
1057 /* modify the ljmp target for MPentry() */
1058 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
1059 *dst32 = ((u_int) MPentry - KERNBASE);
1060
1061 /* modify the target for boot code segment */
1062 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
1063 dst8 = (u_int8_t *) (dst16 + 1);
1064 *dst16 = (u_int) boot_address & 0xffff;
1065 *dst8 = ((u_int) boot_address >> 16) & 0xff;
1066
1067 /* modify the target for boot data segment */
1068 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
1069 dst8 = (u_int8_t *) (dst16 + 1);
1070 *dst16 = (u_int) boot_address & 0xffff;
1071 *dst8 = ((u_int) boot_address >> 16) & 0xff;
1072 }
1073
1074 /*
1075 * This function starts the AP (application processor) identified
1076 * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
1077 * to accomplish this. This is necessary because of the nuances
1078 * of the different hardware we might encounter. It isn't pretty,
1079 * but it seems to work.
1080 */
1081 static int
1082 start_ap(int apic_id)
1083 {
1084 int vector, ms;
1085 int cpus;
1086
1087 /* calculate the vector */
1088 vector = (boot_address >> 12) & 0xff;
1089
1090 /* used as a watchpoint to signal AP startup */
1091 cpus = mp_naps;
1092
1093 ipi_startup(apic_id, vector);
1094
1095 /* Wait up to 5 seconds for it to start. */
1096 for (ms = 0; ms < 5000; ms++) {
1097 if (mp_naps > cpus)
1098 return 1; /* return SUCCESS */
1099 DELAY(1000);
1100 }
1101 return 0; /* return FAILURE */
1102 }
1103
1104 #ifdef COUNT_XINVLTLB_HITS
1105 u_int xhits_gbl[MAXCPU];
1106 u_int xhits_pg[MAXCPU];
1107 u_int xhits_rng[MAXCPU];
1108 static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
1109 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
1110 sizeof(xhits_gbl), "IU", "");
1111 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
1112 sizeof(xhits_pg), "IU", "");
1113 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
1114 sizeof(xhits_rng), "IU", "");
1115
1116 u_int ipi_global;
1117 u_int ipi_page;
1118 u_int ipi_range;
1119 u_int ipi_range_size;
1120 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
1121 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
1122 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
1123 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
1124 0, "");
1125
1126 u_int ipi_masked_global;
1127 u_int ipi_masked_page;
1128 u_int ipi_masked_range;
1129 u_int ipi_masked_range_size;
1130 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
1131 &ipi_masked_global, 0, "");
1132 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
1133 &ipi_masked_page, 0, "");
1134 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
1135 &ipi_masked_range, 0, "");
1136 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
1137 &ipi_masked_range_size, 0, "");
1138 #endif /* COUNT_XINVLTLB_HITS */
1139
1140 /*
1141 * Init and startup IPI.
1142 */
1143 void
1144 ipi_startup(int apic_id, int vector)
1145 {
1146
1147 /*
1148 * This attempts to follow the algorithm described in the
1149 * Intel Multiprocessor Specification v1.4 in section B.4.
1150 * For each IPI, we allow the local APIC ~20us to deliver the
1151 * IPI. If that times out, we panic.
1152 */
1153
1154 /*
1155 * first we do an INIT IPI: this INIT IPI might be run, resetting
1156 * and running the target CPU. OR this INIT IPI might be latched (P5
1157 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
1158 * ignored.
1159 */
1160 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
1161 APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
1162 lapic_ipi_wait(100);
1163
1164 /* Explicitly deassert the INIT IPI. */
1165 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
1166 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
1167 apic_id);
1168
1169 DELAY(10000); /* wait ~10mS */
1170
1171 /*
1172 * next we do a STARTUP IPI: the previous INIT IPI might still be
1173 * latched, (P5 bug) this 1st STARTUP would then terminate
1174 * immediately, and the previously started INIT IPI would continue. OR
1175 * the previous INIT IPI has already run. and this STARTUP IPI will
1176 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
1177 * will run.
1178 */
1179 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
1180 APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
1181 vector, apic_id);
1182 if (!lapic_ipi_wait(100))
1183 panic("Failed to deliver first STARTUP IPI to APIC %d",
1184 apic_id);
1185 DELAY(200); /* wait ~200uS */
1186
1187 /*
1188 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
1189 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
1190 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
1191 * recognized after hardware RESET or INIT IPI.
1192 */
1193 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
1194 APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
1195 vector, apic_id);
1196 if (!lapic_ipi_wait(100))
1197 panic("Failed to deliver second STARTUP IPI to APIC %d",
1198 apic_id);
1199
1200 DELAY(200); /* wait ~200uS */
1201 }
1202
1203 /*
1204 * Send an IPI to specified CPU handling the bitmap logic.
1205 */
1206 static void
1207 ipi_send_cpu(int cpu, u_int ipi)
1208 {
1209 u_int bitmap, old_pending, new_pending;
1210
1211 KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
1212
1213 if (IPI_IS_BITMAPED(ipi)) {
1214 bitmap = 1 << ipi;
1215 ipi = IPI_BITMAP_VECTOR;
1216 do {
1217 old_pending = cpu_ipi_pending[cpu];
1218 new_pending = old_pending | bitmap;
1219 } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
1220 old_pending, new_pending));
1221 if (old_pending)
1222 return;
1223 }
1224 cpu_ops.ipi_vectored(ipi, cpu_apic_ids[cpu]);
1225 }
1226
1227 /*
1228 * Flush the TLB on all other CPU's
1229 */
1230 static void
1231 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
1232 {
1233 u_int ncpu;
1234
1235 ncpu = mp_ncpus - 1; /* does not shootdown self */
1236 if (ncpu < 1)
1237 return; /* no other cpus */
1238 if (!(read_eflags() & PSL_I))
1239 panic("%s: interrupts disabled", __func__);
1240 mtx_lock_spin(&smp_ipi_mtx);
1241 smp_tlb_addr1 = addr1;
1242 smp_tlb_addr2 = addr2;
1243 atomic_store_rel_int(&smp_tlb_wait, 0);
1244 ipi_all_but_self(vector);
1245 while (smp_tlb_wait < ncpu)
1246 ia32_pause();
1247 mtx_unlock_spin(&smp_ipi_mtx);
1248 }
1249
1250 static void
1251 smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
1252 {
1253 int cpu, ncpu, othercpus;
1254
1255 othercpus = mp_ncpus - 1;
1256 if (CPU_ISFULLSET(&mask)) {
1257 if (othercpus < 1)
1258 return;
1259 } else {
1260 CPU_CLR(PCPU_GET(cpuid), &mask);
1261 if (CPU_EMPTY(&mask))
1262 return;
1263 }
1264 if (!(read_eflags() & PSL_I))
1265 panic("%s: interrupts disabled", __func__);
1266 mtx_lock_spin(&smp_ipi_mtx);
1267 smp_tlb_addr1 = addr1;
1268 smp_tlb_addr2 = addr2;
1269 atomic_store_rel_int(&smp_tlb_wait, 0);
1270 if (CPU_ISFULLSET(&mask)) {
1271 ncpu = othercpus;
1272 ipi_all_but_self(vector);
1273 } else {
1274 ncpu = 0;
1275 while ((cpu = CPU_FFS(&mask)) != 0) {
1276 cpu--;
1277 CPU_CLR(cpu, &mask);
1278 CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
1279 vector);
1280 ipi_send_cpu(cpu, vector);
1281 ncpu++;
1282 }
1283 }
1284 while (smp_tlb_wait < ncpu)
1285 ia32_pause();
1286 mtx_unlock_spin(&smp_ipi_mtx);
1287 }
1288
1289 void
1290 smp_cache_flush(void)
1291 {
1292
1293 if (smp_started)
1294 smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
1295 }
1296
1297 void
1298 smp_invltlb(void)
1299 {
1300
1301 if (smp_started) {
1302 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
1303 #ifdef COUNT_XINVLTLB_HITS
1304 ipi_global++;
1305 #endif
1306 }
1307 }
1308
1309 void
1310 smp_invlpg(vm_offset_t addr)
1311 {
1312
1313 if (smp_started) {
1314 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
1315 #ifdef COUNT_XINVLTLB_HITS
1316 ipi_page++;
1317 #endif
1318 }
1319 }
1320
1321 void
1322 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
1323 {
1324
1325 if (smp_started) {
1326 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
1327 #ifdef COUNT_XINVLTLB_HITS
1328 ipi_range++;
1329 ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
1330 #endif
1331 }
1332 }
1333
1334 void
1335 smp_masked_invltlb(cpuset_t mask)
1336 {
1337
1338 if (smp_started) {
1339 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
1340 #ifdef COUNT_XINVLTLB_HITS
1341 ipi_masked_global++;
1342 #endif
1343 }
1344 }
1345
1346 void
1347 smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
1348 {
1349
1350 if (smp_started) {
1351 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
1352 #ifdef COUNT_XINVLTLB_HITS
1353 ipi_masked_page++;
1354 #endif
1355 }
1356 }
1357
1358 void
1359 smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
1360 {
1361
1362 if (smp_started) {
1363 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
1364 #ifdef COUNT_XINVLTLB_HITS
1365 ipi_masked_range++;
1366 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
1367 #endif
1368 }
1369 }
1370
1371 void
1372 ipi_bitmap_handler(struct trapframe frame)
1373 {
1374 struct trapframe *oldframe;
1375 struct thread *td;
1376 int cpu = PCPU_GET(cpuid);
1377 u_int ipi_bitmap;
1378
1379 critical_enter();
1380 td = curthread;
1381 td->td_intr_nesting_level++;
1382 oldframe = td->td_intr_frame;
1383 td->td_intr_frame = &frame;
1384 ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
1385 if (ipi_bitmap & (1 << IPI_PREEMPT)) {
1386 #ifdef COUNT_IPIS
1387 (*ipi_preempt_counts[cpu])++;
1388 #endif
1389 sched_preempt(td);
1390 }
1391 if (ipi_bitmap & (1 << IPI_AST)) {
1392 #ifdef COUNT_IPIS
1393 (*ipi_ast_counts[cpu])++;
1394 #endif
1395 /* Nothing to do for AST */
1396 }
1397 if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
1398 #ifdef COUNT_IPIS
1399 (*ipi_hardclock_counts[cpu])++;
1400 #endif
1401 hardclockintr();
1402 }
1403 td->td_intr_frame = oldframe;
1404 td->td_intr_nesting_level--;
1405 critical_exit();
1406 }
1407
1408 /*
1409 * send an IPI to a set of cpus.
1410 */
1411 void
1412 ipi_selected(cpuset_t cpus, u_int ipi)
1413 {
1414 int cpu;
1415
1416 /*
1417 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
1418 * of help in order to understand what is the source.
1419 * Set the mask of receiving CPUs for this purpose.
1420 */
1421 if (ipi == IPI_STOP_HARD)
1422 CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
1423
1424 while ((cpu = CPU_FFS(&cpus)) != 0) {
1425 cpu--;
1426 CPU_CLR(cpu, &cpus);
1427 CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
1428 ipi_send_cpu(cpu, ipi);
1429 }
1430 }
1431
1432 /*
1433 * send an IPI to a specific CPU.
1434 */
1435 void
1436 ipi_cpu(int cpu, u_int ipi)
1437 {
1438
1439 /*
1440 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
1441 * of help in order to understand what is the source.
1442 * Set the mask of receiving CPUs for this purpose.
1443 */
1444 if (ipi == IPI_STOP_HARD)
1445 CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
1446
1447 CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
1448 ipi_send_cpu(cpu, ipi);
1449 }
1450
1451 /*
1452 * send an IPI to all CPUs EXCEPT myself
1453 */
1454 void
1455 ipi_all_but_self(u_int ipi)
1456 {
1457 cpuset_t other_cpus;
1458
1459 other_cpus = all_cpus;
1460 CPU_CLR(PCPU_GET(cpuid), &other_cpus);
1461 if (IPI_IS_BITMAPED(ipi)) {
1462 ipi_selected(other_cpus, ipi);
1463 return;
1464 }
1465
1466 /*
1467 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
1468 * of help in order to understand what is the source.
1469 * Set the mask of receiving CPUs for this purpose.
1470 */
1471 if (ipi == IPI_STOP_HARD)
1472 CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
1473
1474 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
1475 cpu_ops.ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
1476 }
1477
1478 int
1479 ipi_nmi_handler()
1480 {
1481 u_int cpuid;
1482
1483 /*
1484 * As long as there is not a simple way to know about a NMI's
1485 * source, if the bitmask for the current CPU is present in
1486 * the global pending bitword an IPI_STOP_HARD has been issued
1487 * and should be handled.
1488 */
1489 cpuid = PCPU_GET(cpuid);
1490 if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
1491 return (1);
1492
1493 CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
1494 cpustop_handler();
1495 return (0);
1496 }
1497
1498 /*
1499 * Handle an IPI_STOP by saving our current context and spinning until we
1500 * are resumed.
1501 */
1502 void
1503 cpustop_handler(void)
1504 {
1505 u_int cpu;
1506
1507 cpu = PCPU_GET(cpuid);
1508
1509 savectx(&stoppcbs[cpu]);
1510
1511 /* Indicate that we are stopped */
1512 CPU_SET_ATOMIC(cpu, &stopped_cpus);
1513
1514 /* Wait for restart */
1515 while (!CPU_ISSET(cpu, &started_cpus))
1516 ia32_pause();
1517
1518 CPU_CLR_ATOMIC(cpu, &started_cpus);
1519 CPU_CLR_ATOMIC(cpu, &stopped_cpus);
1520
1521 if (cpu == 0 && cpustop_restartfunc != NULL) {
1522 cpustop_restartfunc();
1523 cpustop_restartfunc = NULL;
1524 }
1525 }
1526
1527 /*
1528 * Handle an IPI_SUSPEND by saving our current context and spinning until we
1529 * are resumed.
1530 */
1531 void
1532 cpususpend_handler(void)
1533 {
1534 u_int cpu;
1535
1536 mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
1537
1538 cpu = PCPU_GET(cpuid);
1539 if (savectx(&susppcbs[cpu]->sp_pcb)) {
1540 npxsuspend(susppcbs[cpu]->sp_fpususpend);
1541 wbinvd();
1542 CPU_SET_ATOMIC(cpu, &suspended_cpus);
1543 /*
1544 * Hack for xen, which does not use resumectx() so never
1545 * uses the next clause: set resuming_cpus early so that
1546 * resume_cpus() can wait on the same bitmap for acpi and
1547 * xen. resuming_cpus now means eventually_resumable_cpus.
1548 */
1549 CPU_SET_ATOMIC(cpu, &resuming_cpus);
1550 } else {
1551 npxresume(susppcbs[cpu]->sp_fpususpend);
1552 pmap_init_pat();
1553 initializecpu();
1554 PCPU_SET(switchtime, 0);
1555 PCPU_SET(switchticks, ticks);
1556
1557 /* Indicate that we are resuming */
1558 CPU_CLR_ATOMIC(cpu, &suspended_cpus);
1559 }
1560
1561 /* Wait for resume directive */
1562 while (!CPU_ISSET(cpu, &toresume_cpus))
1563 ia32_pause();
1564
1565 if (cpu_ops.cpu_resume)
1566 cpu_ops.cpu_resume();
1567
1568 /* Resume MCA and local APIC */
1569 mca_resume();
1570 lapic_setup(0);
1571
1572 /* Indicate that we are resumed */
1573 CPU_CLR_ATOMIC(cpu, &resuming_cpus);
1574 CPU_CLR_ATOMIC(cpu, &suspended_cpus);
1575 CPU_CLR_ATOMIC(cpu, &toresume_cpus);
1576 }
1577
1578 /*
1579 * Handlers for TLB related IPIs
1580 */
1581 void
1582 invltlb_handler(void)
1583 {
1584 uint64_t cr3;
1585 #ifdef COUNT_XINVLTLB_HITS
1586 xhits_gbl[PCPU_GET(cpuid)]++;
1587 #endif /* COUNT_XINVLTLB_HITS */
1588 #ifdef COUNT_IPIS
1589 (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
1590 #endif /* COUNT_IPIS */
1591
1592 cr3 = rcr3();
1593 load_cr3(cr3);
1594 atomic_add_int(&smp_tlb_wait, 1);
1595 }
1596
1597 void
1598 invlpg_handler(void)
1599 {
1600 #ifdef COUNT_XINVLTLB_HITS
1601 xhits_pg[PCPU_GET(cpuid)]++;
1602 #endif /* COUNT_XINVLTLB_HITS */
1603 #ifdef COUNT_IPIS
1604 (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
1605 #endif /* COUNT_IPIS */
1606
1607 invlpg(smp_tlb_addr1);
1608
1609 atomic_add_int(&smp_tlb_wait, 1);
1610 }
1611
1612 void
1613 invlrng_handler(void)
1614 {
1615 vm_offset_t addr;
1616 #ifdef COUNT_XINVLTLB_HITS
1617 xhits_rng[PCPU_GET(cpuid)]++;
1618 #endif /* COUNT_XINVLTLB_HITS */
1619 #ifdef COUNT_IPIS
1620 (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
1621 #endif /* COUNT_IPIS */
1622
1623 addr = smp_tlb_addr1;
1624 do {
1625 invlpg(addr);
1626 addr += PAGE_SIZE;
1627 } while (addr < smp_tlb_addr2);
1628
1629 atomic_add_int(&smp_tlb_wait, 1);
1630 }
1631
1632 void
1633 invlcache_handler(void)
1634 {
1635 #ifdef COUNT_IPIS
1636 (*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
1637 #endif /* COUNT_IPIS */
1638
1639 wbinvd();
1640 atomic_add_int(&smp_tlb_wait, 1);
1641 }
1642
1643 /*
1644 * This is called once the rest of the system is up and running and we're
1645 * ready to let the AP's out of the pen.
1646 */
1647 static void
1648 release_aps(void *dummy __unused)
1649 {
1650
1651 if (mp_ncpus == 1)
1652 return;
1653 atomic_store_rel_int(&aps_ready, 1);
1654 while (smp_started == 0)
1655 ia32_pause();
1656 }
1657 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
1658
1659 #ifdef COUNT_IPIS
1660 /*
1661 * Setup interrupt counters for IPI handlers.
1662 */
1663 static void
1664 mp_ipi_intrcnt(void *dummy)
1665 {
1666 char buf[64];
1667 int i;
1668
1669 CPU_FOREACH(i) {
1670 snprintf(buf, sizeof(buf), "cpu%d:invltlb", i);
1671 intrcnt_add(buf, &ipi_invltlb_counts[i]);
1672 snprintf(buf, sizeof(buf), "cpu%d:invlrng", i);
1673 intrcnt_add(buf, &ipi_invlrng_counts[i]);
1674 snprintf(buf, sizeof(buf), "cpu%d:invlpg", i);
1675 intrcnt_add(buf, &ipi_invlpg_counts[i]);
1676 snprintf(buf, sizeof(buf), "cpu%d:invlcache", i);
1677 intrcnt_add(buf, &ipi_invlcache_counts[i]);
1678 snprintf(buf, sizeof(buf), "cpu%d:preempt", i);
1679 intrcnt_add(buf, &ipi_preempt_counts[i]);
1680 snprintf(buf, sizeof(buf), "cpu%d:ast", i);
1681 intrcnt_add(buf, &ipi_ast_counts[i]);
1682 snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
1683 intrcnt_add(buf, &ipi_rendezvous_counts[i]);
1684 snprintf(buf, sizeof(buf), "cpu%d:lazypmap", i);
1685 intrcnt_add(buf, &ipi_lazypmap_counts[i]);
1686 snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
1687 intrcnt_add(buf, &ipi_hardclock_counts[i]);
1688 }
1689 }
1690 SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
1691 #endif
Cache object: 3c5970b7e8f1f691b3449bba9d60c741
|