1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 1996, by Steve Passe
5 * Copyright (c) 2003, by Peter Wemm
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. The name of the developer may NOT be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include "opt_acpi.h"
33 #include "opt_cpu.h"
34 #include "opt_ddb.h"
35 #include "opt_kstack_pages.h"
36 #include "opt_sched.h"
37 #include "opt_smp.h"
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/bus.h>
42 #include <sys/cpuset.h>
43 #include <sys/domainset.h>
44 #ifdef GPROF
45 #include <sys/gmon.h>
46 #endif
47 #include <sys/kernel.h>
48 #include <sys/ktr.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/memrange.h>
52 #include <sys/mutex.h>
53 #include <sys/pcpu.h>
54 #include <sys/proc.h>
55 #include <sys/sched.h>
56 #include <sys/smp.h>
57 #include <sys/sysctl.h>
58
59 #include <vm/vm.h>
60 #include <vm/vm_param.h>
61 #include <vm/pmap.h>
62 #include <vm/vm_kern.h>
63 #include <vm/vm_extern.h>
64 #include <vm/vm_page.h>
65 #include <vm/vm_phys.h>
66
67 #include <x86/apicreg.h>
68 #include <machine/clock.h>
69 #include <machine/cputypes.h>
70 #include <machine/cpufunc.h>
71 #include <x86/mca.h>
72 #include <machine/md_var.h>
73 #include <machine/pcb.h>
74 #include <machine/psl.h>
75 #include <machine/smp.h>
76 #include <machine/specialreg.h>
77 #include <machine/tss.h>
78 #include <x86/ucode.h>
79 #include <machine/cpu.h>
80 #include <x86/init.h>
81
82 #ifdef DEV_ACPI
83 #include <contrib/dev/acpica/include/acpi.h>
84 #include <dev/acpica/acpivar.h>
85 #endif
86
87 #define WARMBOOT_TARGET 0
88 #define WARMBOOT_OFF (KERNBASE + 0x0467)
89 #define WARMBOOT_SEG (KERNBASE + 0x0469)
90
91 #define CMOS_REG (0x70)
92 #define CMOS_DATA (0x71)
93 #define BIOS_RESET (0x0f)
94 #define BIOS_WARM (0x0a)
95
96 #define GiB(v) (v ## ULL << 30)
97
98 #define AP_BOOTPT_SZ (PAGE_SIZE * 3)
99
100 /* Temporary variables for init_secondary() */
101 char *doublefault_stack;
102 char *mce_stack;
103 char *nmi_stack;
104 char *dbg_stack;
105
106 /*
107 * Local data and functions.
108 */
109
110 static int start_ap(int apic_id);
111
112 static bool
113 is_kernel_paddr(vm_paddr_t pa)
114 {
115
116 return (pa >= trunc_2mpage(btext - KERNBASE) &&
117 pa < round_page(_end - KERNBASE));
118 }
119
120 static bool
121 is_mpboot_good(vm_paddr_t start, vm_paddr_t end)
122 {
123
124 return (start + AP_BOOTPT_SZ <= GiB(4) && atop(end) < Maxmem);
125 }
126
127 /*
128 * Calculate usable address in base memory for AP trampoline code.
129 */
130 void
131 mp_bootaddress(vm_paddr_t *physmap, unsigned int *physmap_idx)
132 {
133 vm_paddr_t start, end;
134 unsigned int i;
135 bool allocated;
136
137 alloc_ap_trampoline(physmap, physmap_idx);
138
139 /*
140 * Find a memory region big enough below the 4GB boundary to
141 * store the initial page tables. Region must be mapped by
142 * the direct map.
143 *
144 * Note that it needs to be aligned to a page boundary.
145 */
146 allocated = false;
147 for (i = *physmap_idx; i <= *physmap_idx; i -= 2) {
148 /*
149 * First, try to chomp at the start of the physmap region.
150 * Kernel binary might claim it already.
151 */
152 start = round_page(physmap[i]);
153 end = start + AP_BOOTPT_SZ;
154 if (start < end && end <= physmap[i + 1] &&
155 is_mpboot_good(start, end) &&
156 !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) {
157 allocated = true;
158 physmap[i] = end;
159 break;
160 }
161
162 /*
163 * Second, try to chomp at the end. Again, check
164 * against kernel.
165 */
166 end = trunc_page(physmap[i + 1]);
167 start = end - AP_BOOTPT_SZ;
168 if (start < end && start >= physmap[i] &&
169 is_mpboot_good(start, end) &&
170 !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) {
171 allocated = true;
172 physmap[i + 1] = start;
173 break;
174 }
175 }
176 if (allocated) {
177 mptramp_pagetables = start;
178 if (physmap[i] == physmap[i + 1] && *physmap_idx != 0) {
179 memmove(&physmap[i], &physmap[i + 2],
180 sizeof(*physmap) * (*physmap_idx - i + 2));
181 *physmap_idx -= 2;
182 }
183 } else {
184 mptramp_pagetables = trunc_page(boot_address) - AP_BOOTPT_SZ;
185 if (bootverbose)
186 printf(
187 "Cannot find enough space for the initial AP page tables, placing them at %#x",
188 mptramp_pagetables);
189 }
190 }
191
192 /*
193 * Initialize the IPI handlers and start up the AP's.
194 */
195 void
196 cpu_mp_start(void)
197 {
198 int i;
199
200 /* Initialize the logical ID to APIC ID table. */
201 for (i = 0; i < MAXCPU; i++) {
202 cpu_apic_ids[i] = -1;
203 cpu_ipi_pending[i] = 0;
204 }
205
206 /* Install an inter-CPU IPI for TLB invalidation */
207 if (pmap_pcid_enabled) {
208 if (invpcid_works) {
209 setidt(IPI_INVLTLB, pti ?
210 IDTVEC(invltlb_invpcid_pti_pti) :
211 IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT,
212 SEL_KPL, 0);
213 setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) :
214 IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0);
215 setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) :
216 IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0);
217 } else {
218 setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) :
219 IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0);
220 setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) :
221 IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0);
222 setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) :
223 IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0);
224 }
225 } else {
226 setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb),
227 SDT_SYSIGT, SEL_KPL, 0);
228 setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg),
229 SDT_SYSIGT, SEL_KPL, 0);
230 setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng),
231 SDT_SYSIGT, SEL_KPL, 0);
232 }
233
234 /* Install an inter-CPU IPI for cache invalidation. */
235 setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache),
236 SDT_SYSIGT, SEL_KPL, 0);
237
238 /* Install an inter-CPU IPI for all-CPU rendezvous */
239 setidt(IPI_RENDEZVOUS, pti ? IDTVEC(rendezvous_pti) :
240 IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
241
242 /* Install generic inter-CPU IPI handler */
243 setidt(IPI_BITMAP_VECTOR, pti ? IDTVEC(ipi_intr_bitmap_handler_pti) :
244 IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0);
245
246 /* Install an inter-CPU IPI for CPU stop/restart */
247 setidt(IPI_STOP, pti ? IDTVEC(cpustop_pti) : IDTVEC(cpustop),
248 SDT_SYSIGT, SEL_KPL, 0);
249
250 /* Install an inter-CPU IPI for CPU suspend/resume */
251 setidt(IPI_SUSPEND, pti ? IDTVEC(cpususpend_pti) : IDTVEC(cpususpend),
252 SDT_SYSIGT, SEL_KPL, 0);
253
254 /* Install an IPI for calling delayed SWI */
255 setidt(IPI_SWI, pti ? IDTVEC(ipi_swi_pti) : IDTVEC(ipi_swi),
256 SDT_SYSIGT, SEL_KPL, 0);
257
258 /* Set boot_cpu_id if needed. */
259 if (boot_cpu_id == -1) {
260 boot_cpu_id = PCPU_GET(apic_id);
261 cpu_info[boot_cpu_id].cpu_bsp = 1;
262 } else
263 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
264 ("BSP's APIC ID doesn't match boot_cpu_id"));
265
266 /* Probe logical/physical core configuration. */
267 topo_probe();
268
269 assign_cpu_ids();
270
271 /* Start each Application Processor */
272 init_ops.start_all_aps();
273
274 set_interrupt_apic_ids();
275
276 #if defined(DEV_ACPI) && MAXMEMDOM > 1
277 acpi_pxm_set_cpu_locality();
278 #endif
279 }
280
281 /*
282 * AP CPU's call this to initialize themselves.
283 */
284 void
285 init_secondary(void)
286 {
287 struct pcpu *pc;
288 struct nmi_pcpu *np;
289 u_int64_t cr0;
290 int cpu, gsel_tss, x;
291 struct region_descriptor ap_gdt;
292
293 /* Set by the startup code for us to use */
294 cpu = bootAP;
295
296 /* Update microcode before doing anything else. */
297 ucode_load_ap(cpu);
298
299 /* Init tss */
300 common_tss[cpu] = common_tss[0];
301 common_tss[cpu].tss_iobase = sizeof(struct amd64tss) +
302 IOPERM_BITMAP_SIZE;
303 common_tss[cpu].tss_ist1 =
304 (long)&doublefault_stack[DBLFAULT_STACK_SIZE];
305
306 /* The NMI stack runs on IST2. */
307 np = ((struct nmi_pcpu *)&nmi_stack[NMI_STACK_SIZE]) - 1;
308 common_tss[cpu].tss_ist2 = (long) np;
309
310 /* The MC# stack runs on IST3. */
311 np = ((struct nmi_pcpu *)&mce_stack[MCE_STACK_SIZE]) - 1;
312 common_tss[cpu].tss_ist3 = (long) np;
313
314 /* The DB# stack runs on IST4. */
315 np = ((struct nmi_pcpu *)&dbg_stack[DBG_STACK_SIZE]) - 1;
316 common_tss[cpu].tss_ist4 = (long) np;
317
318 /* Prepare private GDT */
319 gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
320 for (x = 0; x < NGDT; x++) {
321 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
322 x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1))
323 ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]);
324 }
325 ssdtosyssd(&gdt_segs[GPROC0_SEL],
326 (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]);
327 ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
328 ap_gdt.rd_base = (long) &gdt[NGDT * cpu];
329 lgdt(&ap_gdt); /* does magic intra-segment return */
330
331 /* Get per-cpu data */
332 pc = &__pcpu[cpu];
333
334 /* prime data page for it to use */
335 pcpu_init(pc, cpu, sizeof(struct pcpu));
336 dpcpu_init(dpcpu, cpu);
337 pc->pc_apic_id = cpu_apic_ids[cpu];
338 pc->pc_prvspace = pc;
339 pc->pc_curthread = 0;
340 pc->pc_tssp = &common_tss[cpu];
341 pc->pc_commontssp = &common_tss[cpu];
342 pc->pc_rsp0 = 0;
343 pc->pc_pti_rsp0 = (((vm_offset_t)&pc->pc_pti_stack +
344 PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful);
345 pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
346 GPROC0_SEL];
347 pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL];
348 pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
349 pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
350 GUSERLDT_SEL];
351 /* See comment in pmap_bootstrap(). */
352 pc->pc_pcid_next = PMAP_PCID_KERN + 2;
353 pc->pc_pcid_gen = 1;
354 common_tss[cpu].tss_rsp0 = 0;
355
356 /* Save the per-cpu pointer for use by the NMI handler. */
357 np = ((struct nmi_pcpu *)&nmi_stack[NMI_STACK_SIZE]) - 1;
358 np->np_pcpu = (register_t) pc;
359
360 /* Save the per-cpu pointer for use by the MC# handler. */
361 np = ((struct nmi_pcpu *)&mce_stack[MCE_STACK_SIZE]) - 1;
362 np->np_pcpu = (register_t) pc;
363
364 /* Save the per-cpu pointer for use by the DB# handler. */
365 np = ((struct nmi_pcpu *)&dbg_stack[DBG_STACK_SIZE]) - 1;
366 np->np_pcpu = (register_t) pc;
367
368 wrmsr(MSR_FSBASE, 0); /* User value */
369 wrmsr(MSR_GSBASE, (u_int64_t)pc);
370 wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */
371 fix_cpuid();
372
373 lidt(&r_idt);
374
375 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
376 ltr(gsel_tss);
377
378 /*
379 * Set to a known state:
380 * Set by mpboot.s: CR0_PG, CR0_PE
381 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
382 */
383 cr0 = rcr0();
384 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
385 load_cr0(cr0);
386
387 amd64_conf_fast_syscall();
388
389 /* signal our startup to the BSP. */
390 mp_naps++;
391
392 /* Spin until the BSP releases the AP's. */
393 while (atomic_load_acq_int(&aps_ready) == 0)
394 ia32_pause();
395
396 init_secondary_tail();
397 }
398
399 /*******************************************************************
400 * local functions and data
401 */
402
403 #ifdef NUMA
404 static void
405 mp_realloc_pcpu(int cpuid, int domain)
406 {
407 vm_page_t m;
408 vm_offset_t oa, na;
409
410 oa = (vm_offset_t)&__pcpu[cpuid];
411 if (_vm_phys_domain(pmap_kextract(oa)) == domain)
412 return;
413 m = vm_page_alloc_domain(NULL, 0, domain,
414 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ);
415 if (m == NULL)
416 return;
417 na = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
418 pagecopy((void *)oa, (void *)na);
419 pmap_qenter((vm_offset_t)&__pcpu[cpuid], &m, 1);
420 /* XXX old pcpu page leaked. */
421 }
422 #endif
423
424 /*
425 * start each AP in our list
426 */
427 int
428 native_start_all_aps(void)
429 {
430 u_int64_t *pt4, *pt3, *pt2;
431 u_int32_t mpbioswarmvec;
432 int apic_id, cpu, domain, i;
433 u_char mpbiosreason;
434
435 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
436
437 /* copy the AP 1st level boot code */
438 bcopy(mptramp_start, (void *)PHYS_TO_DMAP(boot_address), bootMP_size);
439
440 /* Locate the page tables, they'll be below the trampoline */
441 pt4 = (uint64_t *)PHYS_TO_DMAP(mptramp_pagetables);
442 pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
443 pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
444
445 /* Create the initial 1GB replicated page tables */
446 for (i = 0; i < 512; i++) {
447 /* Each slot of the level 4 pages points to the same level 3 page */
448 pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
449 pt4[i] |= PG_V | PG_RW | PG_U;
450
451 /* Each slot of the level 3 pages points to the same level 2 page */
452 pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
453 pt3[i] |= PG_V | PG_RW | PG_U;
454
455 /* The level 2 page slots are mapped with 2MB pages for 1GB. */
456 pt2[i] = i * (2 * 1024 * 1024);
457 pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
458 }
459
460 /* save the current value of the warm-start vector */
461 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
462 outb(CMOS_REG, BIOS_RESET);
463 mpbiosreason = inb(CMOS_DATA);
464
465 /* setup a vector to our boot code */
466 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
467 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
468 outb(CMOS_REG, BIOS_RESET);
469 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
470
471 /* Relocate pcpu areas to the correct domain. */
472 #ifdef NUMA
473 if (vm_ndomains > 1)
474 for (cpu = 1; cpu < mp_ncpus; cpu++) {
475 apic_id = cpu_apic_ids[cpu];
476 domain = acpi_pxm_get_cpu_locality(apic_id);
477 mp_realloc_pcpu(cpu, domain);
478 }
479 #endif
480
481 /* start each AP */
482 domain = 0;
483 for (cpu = 1; cpu < mp_ncpus; cpu++) {
484 apic_id = cpu_apic_ids[cpu];
485 #ifdef NUMA
486 if (vm_ndomains > 1)
487 domain = acpi_pxm_get_cpu_locality(apic_id);
488 #endif
489 /* allocate and set up an idle stack data page */
490 bootstacks[cpu] = (void *)kmem_malloc(kstack_pages * PAGE_SIZE,
491 M_WAITOK | M_ZERO);
492 doublefault_stack = (char *)kmem_malloc(DBLFAULT_STACK_SIZE,
493 M_WAITOK | M_ZERO);
494 mce_stack = (char *)kmem_malloc(MCE_STACK_SIZE,
495 M_WAITOK | M_ZERO);
496 nmi_stack = (char *)kmem_malloc_domainset(
497 DOMAINSET_PREF(domain), NMI_STACK_SIZE, M_WAITOK | M_ZERO);
498 dbg_stack = (char *)kmem_malloc_domainset(
499 DOMAINSET_PREF(domain), DBG_STACK_SIZE, M_WAITOK | M_ZERO);
500 dpcpu = (void *)kmem_malloc_domainset(DOMAINSET_PREF(domain),
501 DPCPU_SIZE, M_WAITOK | M_ZERO);
502
503 bootSTK = (char *)bootstacks[cpu] +
504 kstack_pages * PAGE_SIZE - 8;
505 bootAP = cpu;
506
507 /* attempt to start the Application Processor */
508 if (!start_ap(apic_id)) {
509 /* restore the warmstart vector */
510 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
511 panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
512 }
513
514 CPU_SET(cpu, &all_cpus); /* record AP in CPU map */
515 }
516
517 /* restore the warmstart vector */
518 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
519
520 outb(CMOS_REG, BIOS_RESET);
521 outb(CMOS_DATA, mpbiosreason);
522
523 /* number of APs actually started */
524 return mp_naps;
525 }
526
527
528 /*
529 * This function starts the AP (application processor) identified
530 * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
531 * to accomplish this. This is necessary because of the nuances
532 * of the different hardware we might encounter. It isn't pretty,
533 * but it seems to work.
534 */
535 static int
536 start_ap(int apic_id)
537 {
538 int vector, ms;
539 int cpus;
540
541 /* calculate the vector */
542 vector = (boot_address >> 12) & 0xff;
543
544 /* used as a watchpoint to signal AP startup */
545 cpus = mp_naps;
546
547 ipi_startup(apic_id, vector);
548
549 /* Wait up to 5 seconds for it to start. */
550 for (ms = 0; ms < 5000; ms++) {
551 if (mp_naps > cpus)
552 return 1; /* return SUCCESS */
553 DELAY(1000);
554 }
555 return 0; /* return FAILURE */
556 }
557
558 void
559 invltlb_invpcid_handler(void)
560 {
561 struct invpcid_descr d;
562 uint32_t generation;
563
564 #ifdef COUNT_XINVLTLB_HITS
565 xhits_gbl[PCPU_GET(cpuid)]++;
566 #endif /* COUNT_XINVLTLB_HITS */
567 #ifdef COUNT_IPIS
568 (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
569 #endif /* COUNT_IPIS */
570
571 generation = smp_tlb_generation;
572 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
573 d.pad = 0;
574 d.addr = 0;
575 invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB :
576 INVPCID_CTX);
577 PCPU_SET(smp_tlb_done, generation);
578 }
579
580 void
581 invltlb_invpcid_pti_handler(void)
582 {
583 struct invpcid_descr d;
584 uint32_t generation;
585
586 #ifdef COUNT_XINVLTLB_HITS
587 xhits_gbl[PCPU_GET(cpuid)]++;
588 #endif /* COUNT_XINVLTLB_HITS */
589 #ifdef COUNT_IPIS
590 (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
591 #endif /* COUNT_IPIS */
592
593 generation = smp_tlb_generation;
594 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
595 d.pad = 0;
596 d.addr = 0;
597 if (smp_tlb_pmap == kernel_pmap) {
598 /*
599 * This invalidation actually needs to clear kernel
600 * mappings from the TLB in the current pmap, but
601 * since we were asked for the flush in the kernel
602 * pmap, achieve it by performing global flush.
603 */
604 invpcid(&d, INVPCID_CTXGLOB);
605 } else {
606 invpcid(&d, INVPCID_CTX);
607 d.pcid |= PMAP_PCID_USER_PT;
608 invpcid(&d, INVPCID_CTX);
609 }
610 PCPU_SET(smp_tlb_done, generation);
611 }
612
613 void
614 invltlb_pcid_handler(void)
615 {
616 uint64_t kcr3, ucr3;
617 uint32_t generation, pcid;
618
619 #ifdef COUNT_XINVLTLB_HITS
620 xhits_gbl[PCPU_GET(cpuid)]++;
621 #endif /* COUNT_XINVLTLB_HITS */
622 #ifdef COUNT_IPIS
623 (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
624 #endif /* COUNT_IPIS */
625
626 generation = smp_tlb_generation; /* Overlap with serialization */
627 if (smp_tlb_pmap == kernel_pmap) {
628 invltlb_glob();
629 } else {
630 /*
631 * The current pmap might not be equal to
632 * smp_tlb_pmap. The clearing of the pm_gen in
633 * pmap_invalidate_all() takes care of TLB
634 * invalidation when switching to the pmap on this
635 * CPU.
636 */
637 if (PCPU_GET(curpmap) == smp_tlb_pmap) {
638 pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
639 kcr3 = smp_tlb_pmap->pm_cr3 | pcid;
640 ucr3 = smp_tlb_pmap->pm_ucr3;
641 if (ucr3 != PMAP_NO_CR3) {
642 ucr3 |= PMAP_PCID_USER_PT | pcid;
643 pmap_pti_pcid_invalidate(ucr3, kcr3);
644 } else
645 load_cr3(kcr3);
646 }
647 }
648 PCPU_SET(smp_tlb_done, generation);
649 }
650
651 void
652 invlpg_invpcid_handler(void)
653 {
654 struct invpcid_descr d;
655 uint32_t generation;
656
657 #ifdef COUNT_XINVLTLB_HITS
658 xhits_pg[PCPU_GET(cpuid)]++;
659 #endif /* COUNT_XINVLTLB_HITS */
660 #ifdef COUNT_IPIS
661 (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
662 #endif /* COUNT_IPIS */
663
664 generation = smp_tlb_generation; /* Overlap with serialization */
665 invlpg(smp_tlb_addr1);
666 if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
667 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
668 PMAP_PCID_USER_PT;
669 d.pad = 0;
670 d.addr = smp_tlb_addr1;
671 invpcid(&d, INVPCID_ADDR);
672 }
673 PCPU_SET(smp_tlb_done, generation);
674 }
675
676 void
677 invlpg_pcid_handler(void)
678 {
679 uint64_t kcr3, ucr3;
680 uint32_t generation;
681 uint32_t pcid;
682
683 #ifdef COUNT_XINVLTLB_HITS
684 xhits_pg[PCPU_GET(cpuid)]++;
685 #endif /* COUNT_XINVLTLB_HITS */
686 #ifdef COUNT_IPIS
687 (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
688 #endif /* COUNT_IPIS */
689
690 generation = smp_tlb_generation; /* Overlap with serialization */
691 invlpg(smp_tlb_addr1);
692 if (smp_tlb_pmap == PCPU_GET(curpmap) &&
693 (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
694 pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
695 kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
696 ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
697 pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1);
698 }
699 PCPU_SET(smp_tlb_done, generation);
700 }
701
702 void
703 invlrng_invpcid_handler(void)
704 {
705 struct invpcid_descr d;
706 vm_offset_t addr, addr2;
707 uint32_t generation;
708
709 #ifdef COUNT_XINVLTLB_HITS
710 xhits_rng[PCPU_GET(cpuid)]++;
711 #endif /* COUNT_XINVLTLB_HITS */
712 #ifdef COUNT_IPIS
713 (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
714 #endif /* COUNT_IPIS */
715
716 addr = smp_tlb_addr1;
717 addr2 = smp_tlb_addr2;
718 generation = smp_tlb_generation; /* Overlap with serialization */
719 do {
720 invlpg(addr);
721 addr += PAGE_SIZE;
722 } while (addr < addr2);
723 if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
724 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
725 PMAP_PCID_USER_PT;
726 d.pad = 0;
727 d.addr = smp_tlb_addr1;
728 do {
729 invpcid(&d, INVPCID_ADDR);
730 d.addr += PAGE_SIZE;
731 } while (d.addr < addr2);
732 }
733 PCPU_SET(smp_tlb_done, generation);
734 }
735
736 void
737 invlrng_pcid_handler(void)
738 {
739 vm_offset_t addr, addr2;
740 uint64_t kcr3, ucr3;
741 uint32_t generation;
742 uint32_t pcid;
743
744 #ifdef COUNT_XINVLTLB_HITS
745 xhits_rng[PCPU_GET(cpuid)]++;
746 #endif /* COUNT_XINVLTLB_HITS */
747 #ifdef COUNT_IPIS
748 (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
749 #endif /* COUNT_IPIS */
750
751 addr = smp_tlb_addr1;
752 addr2 = smp_tlb_addr2;
753 generation = smp_tlb_generation; /* Overlap with serialization */
754 do {
755 invlpg(addr);
756 addr += PAGE_SIZE;
757 } while (addr < addr2);
758 if (smp_tlb_pmap == PCPU_GET(curpmap) &&
759 (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
760 pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
761 kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
762 ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
763 pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2);
764 }
765 PCPU_SET(smp_tlb_done, generation);
766 }
Cache object: e9224a0956e7297586734ecfe7510fa8
|