The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/mp_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 1996, by Steve Passe
    5  * Copyright (c) 2003, by Peter Wemm
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. The name of the developer may NOT be used to endorse or promote products
   14  *    derived from this software without specific prior written permission.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD$");
   31 
   32 #include "opt_acpi.h"
   33 #include "opt_cpu.h"
   34 #include "opt_ddb.h"
   35 #include "opt_kstack_pages.h"
   36 #include "opt_sched.h"
   37 #include "opt_smp.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/systm.h>
   41 #include <sys/bus.h>
   42 #include <sys/cpuset.h>
   43 #include <sys/domainset.h>
   44 #ifdef GPROF 
   45 #include <sys/gmon.h>
   46 #endif
   47 #include <sys/kernel.h>
   48 #include <sys/ktr.h>
   49 #include <sys/lock.h>
   50 #include <sys/malloc.h>
   51 #include <sys/memrange.h>
   52 #include <sys/mutex.h>
   53 #include <sys/pcpu.h>
   54 #include <sys/proc.h>
   55 #include <sys/sched.h>
   56 #include <sys/smp.h>
   57 #include <sys/sysctl.h>
   58 
   59 #include <vm/vm.h>
   60 #include <vm/vm_param.h>
   61 #include <vm/pmap.h>
   62 #include <vm/vm_kern.h>
   63 #include <vm/vm_extern.h>
   64 #include <vm/vm_page.h>
   65 #include <vm/vm_phys.h>
   66 
   67 #include <x86/apicreg.h>
   68 #include <machine/clock.h>
   69 #include <machine/cputypes.h>
   70 #include <machine/cpufunc.h>
   71 #include <x86/mca.h>
   72 #include <machine/md_var.h>
   73 #include <machine/pcb.h>
   74 #include <machine/psl.h>
   75 #include <machine/smp.h>
   76 #include <machine/specialreg.h>
   77 #include <machine/tss.h>
   78 #include <x86/ucode.h>
   79 #include <machine/cpu.h>
   80 #include <x86/init.h>
   81 
   82 #ifdef DEV_ACPI
   83 #include <contrib/dev/acpica/include/acpi.h>
   84 #include <dev/acpica/acpivar.h>
   85 #endif
   86 
   87 #define WARMBOOT_TARGET         0
   88 #define WARMBOOT_OFF            (KERNBASE + 0x0467)
   89 #define WARMBOOT_SEG            (KERNBASE + 0x0469)
   90 
   91 #define CMOS_REG                (0x70)
   92 #define CMOS_DATA               (0x71)
   93 #define BIOS_RESET              (0x0f)
   94 #define BIOS_WARM               (0x0a)
   95 
   96 #define GiB(v)                  (v ## ULL << 30)
   97 
   98 #define AP_BOOTPT_SZ            (PAGE_SIZE * 3)
   99 
  100 /* Temporary variables for init_secondary()  */
  101 char *doublefault_stack;
  102 char *mce_stack;
  103 char *nmi_stack;
  104 char *dbg_stack;
  105 
  106 /*
  107  * Local data and functions.
  108  */
  109 
  110 static int      start_ap(int apic_id);
  111 
  112 static bool
  113 is_kernel_paddr(vm_paddr_t pa)
  114 {
  115 
  116         return (pa >= trunc_2mpage(btext - KERNBASE) &&
  117            pa < round_page(_end - KERNBASE));
  118 }
  119 
  120 static bool
  121 is_mpboot_good(vm_paddr_t start, vm_paddr_t end)
  122 {
  123 
  124         return (start + AP_BOOTPT_SZ <= GiB(4) && atop(end) < Maxmem);
  125 }
  126 
  127 /*
  128  * Calculate usable address in base memory for AP trampoline code.
  129  */
  130 void
  131 mp_bootaddress(vm_paddr_t *physmap, unsigned int *physmap_idx)
  132 {
  133         vm_paddr_t start, end;
  134         unsigned int i;
  135         bool allocated;
  136 
  137         alloc_ap_trampoline(physmap, physmap_idx);
  138 
  139         /*
  140          * Find a memory region big enough below the 4GB boundary to
  141          * store the initial page tables.  Region must be mapped by
  142          * the direct map.
  143          *
  144          * Note that it needs to be aligned to a page boundary.
  145          */
  146         allocated = false;
  147         for (i = *physmap_idx; i <= *physmap_idx; i -= 2) {
  148                 /*
  149                  * First, try to chomp at the start of the physmap region.
  150                  * Kernel binary might claim it already.
  151                  */
  152                 start = round_page(physmap[i]);
  153                 end = start + AP_BOOTPT_SZ;
  154                 if (start < end && end <= physmap[i + 1] &&
  155                     is_mpboot_good(start, end) &&
  156                     !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) {
  157                         allocated = true;
  158                         physmap[i] = end;
  159                         break;
  160                 }
  161 
  162                 /*
  163                  * Second, try to chomp at the end.  Again, check
  164                  * against kernel.
  165                  */
  166                 end = trunc_page(physmap[i + 1]);
  167                 start = end - AP_BOOTPT_SZ;
  168                 if (start < end && start >= physmap[i] &&
  169                     is_mpboot_good(start, end) &&
  170                     !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) {
  171                         allocated = true;
  172                         physmap[i + 1] = start;
  173                         break;
  174                 }
  175         }
  176         if (allocated) {
  177                 mptramp_pagetables = start;
  178                 if (physmap[i] == physmap[i + 1] && *physmap_idx != 0) {
  179                         memmove(&physmap[i], &physmap[i + 2],
  180                             sizeof(*physmap) * (*physmap_idx - i + 2));
  181                         *physmap_idx -= 2;
  182                 }
  183         } else {
  184                 mptramp_pagetables = trunc_page(boot_address) - AP_BOOTPT_SZ;
  185                 if (bootverbose)
  186                         printf(
  187 "Cannot find enough space for the initial AP page tables, placing them at %#x",
  188                             mptramp_pagetables);
  189         }
  190 }
  191 
  192 /*
  193  * Initialize the IPI handlers and start up the AP's.
  194  */
  195 void
  196 cpu_mp_start(void)
  197 {
  198         int i;
  199 
  200         /* Initialize the logical ID to APIC ID table. */
  201         for (i = 0; i < MAXCPU; i++) {
  202                 cpu_apic_ids[i] = -1;
  203                 cpu_ipi_pending[i] = 0;
  204         }
  205 
  206         /* Install an inter-CPU IPI for TLB invalidation */
  207         if (pmap_pcid_enabled) {
  208                 if (invpcid_works) {
  209                         setidt(IPI_INVLTLB, pti ?
  210                             IDTVEC(invltlb_invpcid_pti_pti) :
  211                             IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT,
  212                             SEL_KPL, 0);
  213                         setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) :
  214                             IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0);
  215                         setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) :
  216                             IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0);
  217                 } else {
  218                         setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) :
  219                             IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0);
  220                         setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) :
  221                             IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0);
  222                         setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) :
  223                             IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0);
  224                 }
  225         } else {
  226                 setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb),
  227                     SDT_SYSIGT, SEL_KPL, 0);
  228                 setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg),
  229                     SDT_SYSIGT, SEL_KPL, 0);
  230                 setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng),
  231                     SDT_SYSIGT, SEL_KPL, 0);
  232         }
  233 
  234         /* Install an inter-CPU IPI for cache invalidation. */
  235         setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache),
  236             SDT_SYSIGT, SEL_KPL, 0);
  237 
  238         /* Install an inter-CPU IPI for all-CPU rendezvous */
  239         setidt(IPI_RENDEZVOUS, pti ? IDTVEC(rendezvous_pti) :
  240             IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
  241 
  242         /* Install generic inter-CPU IPI handler */
  243         setidt(IPI_BITMAP_VECTOR, pti ? IDTVEC(ipi_intr_bitmap_handler_pti) :
  244             IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0);
  245 
  246         /* Install an inter-CPU IPI for CPU stop/restart */
  247         setidt(IPI_STOP, pti ? IDTVEC(cpustop_pti) : IDTVEC(cpustop),
  248             SDT_SYSIGT, SEL_KPL, 0);
  249 
  250         /* Install an inter-CPU IPI for CPU suspend/resume */
  251         setidt(IPI_SUSPEND, pti ? IDTVEC(cpususpend_pti) : IDTVEC(cpususpend),
  252             SDT_SYSIGT, SEL_KPL, 0);
  253 
  254         /* Install an IPI for calling delayed SWI */
  255         setidt(IPI_SWI, pti ? IDTVEC(ipi_swi_pti) : IDTVEC(ipi_swi),
  256             SDT_SYSIGT, SEL_KPL, 0);
  257 
  258         /* Set boot_cpu_id if needed. */
  259         if (boot_cpu_id == -1) {
  260                 boot_cpu_id = PCPU_GET(apic_id);
  261                 cpu_info[boot_cpu_id].cpu_bsp = 1;
  262         } else
  263                 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
  264                     ("BSP's APIC ID doesn't match boot_cpu_id"));
  265 
  266         /* Probe logical/physical core configuration. */
  267         topo_probe();
  268 
  269         assign_cpu_ids();
  270 
  271         /* Start each Application Processor */
  272         init_ops.start_all_aps();
  273 
  274         set_interrupt_apic_ids();
  275 
  276 #if defined(DEV_ACPI) && MAXMEMDOM > 1
  277         acpi_pxm_set_cpu_locality();
  278 #endif
  279 }
  280 
  281 /*
  282  * AP CPU's call this to initialize themselves.
  283  */
  284 void
  285 init_secondary(void)
  286 {
  287         struct pcpu *pc;
  288         struct nmi_pcpu *np;
  289         u_int64_t cr0;
  290         int cpu, gsel_tss, x;
  291         struct region_descriptor ap_gdt;
  292 
  293         /* Set by the startup code for us to use */
  294         cpu = bootAP;
  295 
  296         /* Update microcode before doing anything else. */
  297         ucode_load_ap(cpu);
  298 
  299         /* Init tss */
  300         common_tss[cpu] = common_tss[0];
  301         common_tss[cpu].tss_iobase = sizeof(struct amd64tss) +
  302             IOPERM_BITMAP_SIZE;
  303         common_tss[cpu].tss_ist1 =
  304             (long)&doublefault_stack[DBLFAULT_STACK_SIZE];
  305 
  306         /* The NMI stack runs on IST2. */
  307         np = ((struct nmi_pcpu *)&nmi_stack[NMI_STACK_SIZE]) - 1;
  308         common_tss[cpu].tss_ist2 = (long) np;
  309 
  310         /* The MC# stack runs on IST3. */
  311         np = ((struct nmi_pcpu *)&mce_stack[MCE_STACK_SIZE]) - 1;
  312         common_tss[cpu].tss_ist3 = (long) np;
  313 
  314         /* The DB# stack runs on IST4. */
  315         np = ((struct nmi_pcpu *)&dbg_stack[DBG_STACK_SIZE]) - 1;
  316         common_tss[cpu].tss_ist4 = (long) np;
  317 
  318         /* Prepare private GDT */
  319         gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
  320         for (x = 0; x < NGDT; x++) {
  321                 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
  322                     x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1))
  323                         ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]);
  324         }
  325         ssdtosyssd(&gdt_segs[GPROC0_SEL],
  326             (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]);
  327         ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
  328         ap_gdt.rd_base =  (long) &gdt[NGDT * cpu];
  329         lgdt(&ap_gdt);                  /* does magic intra-segment return */
  330 
  331         /* Get per-cpu data */
  332         pc = &__pcpu[cpu];
  333 
  334         /* prime data page for it to use */
  335         pcpu_init(pc, cpu, sizeof(struct pcpu));
  336         dpcpu_init(dpcpu, cpu);
  337         pc->pc_apic_id = cpu_apic_ids[cpu];
  338         pc->pc_prvspace = pc;
  339         pc->pc_curthread = 0;
  340         pc->pc_tssp = &common_tss[cpu];
  341         pc->pc_commontssp = &common_tss[cpu];
  342         pc->pc_rsp0 = 0;
  343         pc->pc_pti_rsp0 = (((vm_offset_t)&pc->pc_pti_stack +
  344             PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful);
  345         pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
  346             GPROC0_SEL];
  347         pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL];
  348         pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
  349         pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
  350             GUSERLDT_SEL];
  351         /* See comment in pmap_bootstrap(). */
  352         pc->pc_pcid_next = PMAP_PCID_KERN + 2;
  353         pc->pc_pcid_gen = 1;
  354         common_tss[cpu].tss_rsp0 = 0;
  355 
  356         /* Save the per-cpu pointer for use by the NMI handler. */
  357         np = ((struct nmi_pcpu *)&nmi_stack[NMI_STACK_SIZE]) - 1;
  358         np->np_pcpu = (register_t) pc;
  359 
  360         /* Save the per-cpu pointer for use by the MC# handler. */
  361         np = ((struct nmi_pcpu *)&mce_stack[MCE_STACK_SIZE]) - 1;
  362         np->np_pcpu = (register_t) pc;
  363 
  364         /* Save the per-cpu pointer for use by the DB# handler. */
  365         np = ((struct nmi_pcpu *)&dbg_stack[DBG_STACK_SIZE]) - 1;
  366         np->np_pcpu = (register_t) pc;
  367 
  368         wrmsr(MSR_FSBASE, 0);           /* User value */
  369         wrmsr(MSR_GSBASE, (u_int64_t)pc);
  370         wrmsr(MSR_KGSBASE, (u_int64_t)pc);      /* XXX User value while we're in the kernel */
  371         fix_cpuid();
  372 
  373         lidt(&r_idt);
  374 
  375         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
  376         ltr(gsel_tss);
  377 
  378         /*
  379          * Set to a known state:
  380          * Set by mpboot.s: CR0_PG, CR0_PE
  381          * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
  382          */
  383         cr0 = rcr0();
  384         cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
  385         load_cr0(cr0);
  386 
  387         amd64_conf_fast_syscall();
  388 
  389         /* signal our startup to the BSP. */
  390         mp_naps++;
  391 
  392         /* Spin until the BSP releases the AP's. */
  393         while (atomic_load_acq_int(&aps_ready) == 0)
  394                 ia32_pause();
  395 
  396         init_secondary_tail();
  397 }
  398 
  399 /*******************************************************************
  400  * local functions and data
  401  */
  402 
  403 #ifdef NUMA
  404 static void
  405 mp_realloc_pcpu(int cpuid, int domain)
  406 {
  407         vm_page_t m;
  408         vm_offset_t oa, na;
  409 
  410         oa = (vm_offset_t)&__pcpu[cpuid];
  411         if (_vm_phys_domain(pmap_kextract(oa)) == domain)
  412                 return;
  413         m = vm_page_alloc_domain(NULL, 0, domain,
  414             VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ);
  415         if (m == NULL)
  416                 return;
  417         na = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
  418         pagecopy((void *)oa, (void *)na);
  419         pmap_qenter((vm_offset_t)&__pcpu[cpuid], &m, 1);
  420         /* XXX old pcpu page leaked. */
  421 }
  422 #endif
  423 
  424 /*
  425  * start each AP in our list
  426  */
  427 int
  428 native_start_all_aps(void)
  429 {
  430         u_int64_t *pt4, *pt3, *pt2;
  431         u_int32_t mpbioswarmvec;
  432         int apic_id, cpu, domain, i;
  433         u_char mpbiosreason;
  434 
  435         mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
  436 
  437         /* copy the AP 1st level boot code */
  438         bcopy(mptramp_start, (void *)PHYS_TO_DMAP(boot_address), bootMP_size);
  439 
  440         /* Locate the page tables, they'll be below the trampoline */
  441         pt4 = (uint64_t *)PHYS_TO_DMAP(mptramp_pagetables);
  442         pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
  443         pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
  444 
  445         /* Create the initial 1GB replicated page tables */
  446         for (i = 0; i < 512; i++) {
  447                 /* Each slot of the level 4 pages points to the same level 3 page */
  448                 pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
  449                 pt4[i] |= PG_V | PG_RW | PG_U;
  450 
  451                 /* Each slot of the level 3 pages points to the same level 2 page */
  452                 pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
  453                 pt3[i] |= PG_V | PG_RW | PG_U;
  454 
  455                 /* The level 2 page slots are mapped with 2MB pages for 1GB. */
  456                 pt2[i] = i * (2 * 1024 * 1024);
  457                 pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
  458         }
  459 
  460         /* save the current value of the warm-start vector */
  461         mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
  462         outb(CMOS_REG, BIOS_RESET);
  463         mpbiosreason = inb(CMOS_DATA);
  464 
  465         /* setup a vector to our boot code */
  466         *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
  467         *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
  468         outb(CMOS_REG, BIOS_RESET);
  469         outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
  470 
  471         /* Relocate pcpu areas to the correct domain. */
  472 #ifdef NUMA
  473         if (vm_ndomains > 1)
  474                 for (cpu = 1; cpu < mp_ncpus; cpu++) {
  475                         apic_id = cpu_apic_ids[cpu];
  476                         domain = acpi_pxm_get_cpu_locality(apic_id);
  477                         mp_realloc_pcpu(cpu, domain);
  478                 }
  479 #endif
  480 
  481         /* start each AP */
  482         domain = 0;
  483         for (cpu = 1; cpu < mp_ncpus; cpu++) {
  484                 apic_id = cpu_apic_ids[cpu];
  485 #ifdef NUMA
  486                 if (vm_ndomains > 1)
  487                         domain = acpi_pxm_get_cpu_locality(apic_id);
  488 #endif
  489                 /* allocate and set up an idle stack data page */
  490                 bootstacks[cpu] = (void *)kmem_malloc(kstack_pages * PAGE_SIZE,
  491                     M_WAITOK | M_ZERO);
  492                 doublefault_stack = (char *)kmem_malloc(DBLFAULT_STACK_SIZE,
  493                     M_WAITOK | M_ZERO);
  494                 mce_stack = (char *)kmem_malloc(MCE_STACK_SIZE,
  495                     M_WAITOK | M_ZERO);
  496                 nmi_stack = (char *)kmem_malloc_domainset(
  497                     DOMAINSET_PREF(domain), NMI_STACK_SIZE, M_WAITOK | M_ZERO);
  498                 dbg_stack = (char *)kmem_malloc_domainset(
  499                     DOMAINSET_PREF(domain), DBG_STACK_SIZE, M_WAITOK | M_ZERO);
  500                 dpcpu = (void *)kmem_malloc_domainset(DOMAINSET_PREF(domain),
  501                     DPCPU_SIZE, M_WAITOK | M_ZERO);
  502 
  503                 bootSTK = (char *)bootstacks[cpu] +
  504                     kstack_pages * PAGE_SIZE - 8;
  505                 bootAP = cpu;
  506 
  507                 /* attempt to start the Application Processor */
  508                 if (!start_ap(apic_id)) {
  509                         /* restore the warmstart vector */
  510                         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  511                         panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
  512                 }
  513 
  514                 CPU_SET(cpu, &all_cpus);        /* record AP in CPU map */
  515         }
  516 
  517         /* restore the warmstart vector */
  518         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  519 
  520         outb(CMOS_REG, BIOS_RESET);
  521         outb(CMOS_DATA, mpbiosreason);
  522 
  523         /* number of APs actually started */
  524         return mp_naps;
  525 }
  526 
  527 
  528 /*
  529  * This function starts the AP (application processor) identified
  530  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  531  * to accomplish this.  This is necessary because of the nuances
  532  * of the different hardware we might encounter.  It isn't pretty,
  533  * but it seems to work.
  534  */
  535 static int
  536 start_ap(int apic_id)
  537 {
  538         int vector, ms;
  539         int cpus;
  540 
  541         /* calculate the vector */
  542         vector = (boot_address >> 12) & 0xff;
  543 
  544         /* used as a watchpoint to signal AP startup */
  545         cpus = mp_naps;
  546 
  547         ipi_startup(apic_id, vector);
  548 
  549         /* Wait up to 5 seconds for it to start. */
  550         for (ms = 0; ms < 5000; ms++) {
  551                 if (mp_naps > cpus)
  552                         return 1;       /* return SUCCESS */
  553                 DELAY(1000);
  554         }
  555         return 0;               /* return FAILURE */
  556 }
  557 
  558 void
  559 invltlb_invpcid_handler(void)
  560 {
  561         struct invpcid_descr d;
  562         uint32_t generation;
  563 
  564 #ifdef COUNT_XINVLTLB_HITS
  565         xhits_gbl[PCPU_GET(cpuid)]++;
  566 #endif /* COUNT_XINVLTLB_HITS */
  567 #ifdef COUNT_IPIS
  568         (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
  569 #endif /* COUNT_IPIS */
  570 
  571         generation = smp_tlb_generation;
  572         d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
  573         d.pad = 0;
  574         d.addr = 0;
  575         invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB :
  576             INVPCID_CTX);
  577         PCPU_SET(smp_tlb_done, generation);
  578 }
  579 
  580 void
  581 invltlb_invpcid_pti_handler(void)
  582 {
  583         struct invpcid_descr d;
  584         uint32_t generation;
  585 
  586 #ifdef COUNT_XINVLTLB_HITS
  587         xhits_gbl[PCPU_GET(cpuid)]++;
  588 #endif /* COUNT_XINVLTLB_HITS */
  589 #ifdef COUNT_IPIS
  590         (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
  591 #endif /* COUNT_IPIS */
  592 
  593         generation = smp_tlb_generation;
  594         d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
  595         d.pad = 0;
  596         d.addr = 0;
  597         if (smp_tlb_pmap == kernel_pmap) {
  598                 /*
  599                  * This invalidation actually needs to clear kernel
  600                  * mappings from the TLB in the current pmap, but
  601                  * since we were asked for the flush in the kernel
  602                  * pmap, achieve it by performing global flush.
  603                  */
  604                 invpcid(&d, INVPCID_CTXGLOB);
  605         } else {
  606                 invpcid(&d, INVPCID_CTX);
  607                 d.pcid |= PMAP_PCID_USER_PT;
  608                 invpcid(&d, INVPCID_CTX);
  609         }
  610         PCPU_SET(smp_tlb_done, generation);
  611 }
  612 
  613 void
  614 invltlb_pcid_handler(void)
  615 {
  616         uint64_t kcr3, ucr3;
  617         uint32_t generation, pcid;
  618   
  619 #ifdef COUNT_XINVLTLB_HITS
  620         xhits_gbl[PCPU_GET(cpuid)]++;
  621 #endif /* COUNT_XINVLTLB_HITS */
  622 #ifdef COUNT_IPIS
  623         (*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
  624 #endif /* COUNT_IPIS */
  625 
  626         generation = smp_tlb_generation;        /* Overlap with serialization */
  627         if (smp_tlb_pmap == kernel_pmap) {
  628                 invltlb_glob();
  629         } else {
  630                 /*
  631                  * The current pmap might not be equal to
  632                  * smp_tlb_pmap.  The clearing of the pm_gen in
  633                  * pmap_invalidate_all() takes care of TLB
  634                  * invalidation when switching to the pmap on this
  635                  * CPU.
  636                  */
  637                 if (PCPU_GET(curpmap) == smp_tlb_pmap) {
  638                         pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
  639                         kcr3 = smp_tlb_pmap->pm_cr3 | pcid;
  640                         ucr3 = smp_tlb_pmap->pm_ucr3;
  641                         if (ucr3 != PMAP_NO_CR3) {
  642                                 ucr3 |= PMAP_PCID_USER_PT | pcid;
  643                                 pmap_pti_pcid_invalidate(ucr3, kcr3);
  644                         } else
  645                                 load_cr3(kcr3);
  646                 }
  647         }
  648         PCPU_SET(smp_tlb_done, generation);
  649 }
  650 
  651 void
  652 invlpg_invpcid_handler(void)
  653 {
  654         struct invpcid_descr d;
  655         uint32_t generation;
  656 
  657 #ifdef COUNT_XINVLTLB_HITS
  658         xhits_pg[PCPU_GET(cpuid)]++;
  659 #endif /* COUNT_XINVLTLB_HITS */
  660 #ifdef COUNT_IPIS
  661         (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
  662 #endif /* COUNT_IPIS */
  663 
  664         generation = smp_tlb_generation;        /* Overlap with serialization */
  665         invlpg(smp_tlb_addr1);
  666         if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
  667                 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
  668                     PMAP_PCID_USER_PT;
  669                 d.pad = 0;
  670                 d.addr = smp_tlb_addr1;
  671                 invpcid(&d, INVPCID_ADDR);
  672         }
  673         PCPU_SET(smp_tlb_done, generation);
  674 }
  675 
  676 void
  677 invlpg_pcid_handler(void)
  678 {
  679         uint64_t kcr3, ucr3;
  680         uint32_t generation;
  681         uint32_t pcid;
  682 
  683 #ifdef COUNT_XINVLTLB_HITS
  684         xhits_pg[PCPU_GET(cpuid)]++;
  685 #endif /* COUNT_XINVLTLB_HITS */
  686 #ifdef COUNT_IPIS
  687         (*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
  688 #endif /* COUNT_IPIS */
  689 
  690         generation = smp_tlb_generation;        /* Overlap with serialization */
  691         invlpg(smp_tlb_addr1);
  692         if (smp_tlb_pmap == PCPU_GET(curpmap) &&
  693             (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
  694                 pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
  695                 kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
  696                 ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
  697                 pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1);
  698         }
  699         PCPU_SET(smp_tlb_done, generation);
  700 }
  701 
  702 void
  703 invlrng_invpcid_handler(void)
  704 {
  705         struct invpcid_descr d;
  706         vm_offset_t addr, addr2;
  707         uint32_t generation;
  708 
  709 #ifdef COUNT_XINVLTLB_HITS
  710         xhits_rng[PCPU_GET(cpuid)]++;
  711 #endif /* COUNT_XINVLTLB_HITS */
  712 #ifdef COUNT_IPIS
  713         (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
  714 #endif /* COUNT_IPIS */
  715 
  716         addr = smp_tlb_addr1;
  717         addr2 = smp_tlb_addr2;
  718         generation = smp_tlb_generation;        /* Overlap with serialization */
  719         do {
  720                 invlpg(addr);
  721                 addr += PAGE_SIZE;
  722         } while (addr < addr2);
  723         if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
  724                 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
  725                     PMAP_PCID_USER_PT;
  726                 d.pad = 0;
  727                 d.addr = smp_tlb_addr1;
  728                 do {
  729                         invpcid(&d, INVPCID_ADDR);
  730                         d.addr += PAGE_SIZE;
  731                 } while (d.addr < addr2);
  732         }
  733         PCPU_SET(smp_tlb_done, generation);
  734 }
  735 
  736 void
  737 invlrng_pcid_handler(void)
  738 {
  739         vm_offset_t addr, addr2;
  740         uint64_t kcr3, ucr3;
  741         uint32_t generation;
  742         uint32_t pcid;
  743 
  744 #ifdef COUNT_XINVLTLB_HITS
  745         xhits_rng[PCPU_GET(cpuid)]++;
  746 #endif /* COUNT_XINVLTLB_HITS */
  747 #ifdef COUNT_IPIS
  748         (*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
  749 #endif /* COUNT_IPIS */
  750 
  751         addr = smp_tlb_addr1;
  752         addr2 = smp_tlb_addr2;
  753         generation = smp_tlb_generation;        /* Overlap with serialization */
  754         do {
  755                 invlpg(addr);
  756                 addr += PAGE_SIZE;
  757         } while (addr < addr2);
  758         if (smp_tlb_pmap == PCPU_GET(curpmap) &&
  759             (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
  760                 pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
  761                 kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
  762                 ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
  763                 pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2);
  764         }
  765         PCPU_SET(smp_tlb_done, generation);
  766 }

Cache object: e9224a0956e7297586734ecfe7510fa8


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.