The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/mp_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1996, by Steve Passe
    3  * Copyright (c) 2003, by Peter Wemm
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. The name of the developer may NOT be used to endorse or promote products
   12  *    derived from this software without specific prior written permission.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD: releng/5.2/sys/amd64/amd64/mp_machdep.c 144700 2005-04-06 01:44:36Z peter $");
   29 
   30 #include "opt_cpu.h"
   31 #include "opt_kstack_pages.h"
   32 
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/bus.h>
   36 #ifdef GPROF 
   37 #include <sys/gmon.h>
   38 #endif
   39 #include <sys/kernel.h>
   40 #include <sys/ktr.h>
   41 #include <sys/lock.h>
   42 #include <sys/malloc.h>
   43 #include <sys/memrange.h>
   44 #include <sys/mutex.h>
   45 #include <sys/pcpu.h>
   46 #include <sys/proc.h>
   47 #include <sys/smp.h>
   48 #include <sys/sysctl.h>
   49 
   50 #include <vm/vm.h>
   51 #include <vm/vm_param.h>
   52 #include <vm/pmap.h>
   53 #include <vm/vm_kern.h>
   54 #include <vm/vm_extern.h>
   55 
   56 #include <machine/apicreg.h>
   57 #include <machine/clock.h>
   58 #include <machine/md_var.h>
   59 #include <machine/pcb.h>
   60 #include <machine/smp.h>
   61 #include <machine/specialreg.h>
   62 #include <machine/tss.h>
   63 
   64 #define WARMBOOT_TARGET         0
   65 #define WARMBOOT_OFF            (KERNBASE + 0x0467)
   66 #define WARMBOOT_SEG            (KERNBASE + 0x0469)
   67 
   68 #define CMOS_REG                (0x70)
   69 #define CMOS_DATA               (0x71)
   70 #define BIOS_RESET              (0x0f)
   71 #define BIOS_WARM               (0x0a)
   72 
   73 /* lock region used by kernel profiling */
   74 int     mcount_lock;
   75 
   76 int     mp_naps;                /* # of Applications processors */
   77 int     boot_cpu_id = -1;       /* designated BSP */
   78 extern  int nkpt;
   79 
   80 /*
   81  * CPU topology map datastructures for HTT. (XXX)
   82  */
   83 struct cpu_group mp_groups[MAXCPU];
   84 struct cpu_top mp_top;
   85 struct cpu_top *smp_topology;
   86 
   87 /* AP uses this during bootstrap.  Do not staticize.  */
   88 char *bootSTK;
   89 static int bootAP;
   90 
   91 /* Free these after use */
   92 void *bootstacks[MAXCPU];
   93 
   94 /* Hotwire a 0->4MB V==P mapping */
   95 extern pt_entry_t *KPTphys;
   96 
   97 /* SMP page table page */
   98 extern pt_entry_t *SMPpt;
   99 
  100 struct pcb stoppcbs[MAXCPU];
  101 
  102 /* Variables needed for SMP tlb shootdown. */
  103 vm_offset_t smp_tlb_addr1;
  104 vm_offset_t smp_tlb_addr2;
  105 volatile int smp_tlb_wait;
  106 struct mtx smp_tlb_mtx;
  107 
  108 extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
  109 
  110 /*
  111  * Local data and functions.
  112  */
  113 
  114 static u_int logical_cpus;
  115 static u_int logical_cpus_mask;
  116 
  117 /* used to hold the AP's until we are ready to release them */
  118 static struct mtx ap_boot_mtx;
  119 
  120 /* Set to 1 once we're ready to let the APs out of the pen. */
  121 static volatile int aps_ready = 0;
  122 
  123 /*
  124  * Store data from cpu_add() until later in the boot when we actually setup
  125  * the APs.
  126  */
  127 struct cpu_info {
  128         int     cpu_present:1;
  129         int     cpu_bsp:1;
  130 } static cpu_info[MAXCPU];
  131 static int cpu_apic_ids[MAXCPU];
  132 
  133 static u_int boot_address;
  134 
  135 static void     set_logical_apic_ids(void);
  136 static int      start_all_aps(void);
  137 static int      start_ap(int apic_id);
  138 static void     release_aps(void *dummy);
  139 
  140 static int      hlt_cpus_mask;
  141 static int      hlt_logical_cpus;
  142 static struct   sysctl_ctx_list logical_cpu_clist;
  143 static u_int    bootMP_size;
  144 
  145 /*
  146  * Calculate usable address in base memory for AP trampoline code.
  147  */
  148 u_int
  149 mp_bootaddress(u_int basemem)
  150 {
  151 
  152         bootMP_size = mptramp_end - mptramp_start;
  153         boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */
  154         if ((basemem - boot_address) < bootMP_size)
  155                 boot_address -= PAGE_SIZE;      /* not enough, lower by 4k */
  156         /* 3 levels of page table pages */
  157         mptramp_pagetables = boot_address - (PAGE_SIZE * 3);
  158 
  159         return mptramp_pagetables;
  160 }
  161 
  162 void
  163 cpu_add(u_int apic_id, char boot_cpu)
  164 {
  165 
  166         if (apic_id > MAXCPU) {
  167                 printf("SMP: CPU %d exceeds maximum CPU %d, ignoring\n",
  168                     apic_id, MAXCPU);
  169                 return;
  170         }
  171         KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
  172             apic_id));
  173         cpu_info[apic_id].cpu_present = 1;
  174         if (boot_cpu) {
  175                 KASSERT(boot_cpu_id == -1,
  176                     ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
  177                     boot_cpu_id));
  178                 boot_cpu_id = apic_id;
  179                 cpu_info[apic_id].cpu_bsp = 1;
  180         }
  181         mp_ncpus++;
  182         if (apic_id > mp_maxid)
  183                 mp_maxid = apic_id;
  184         if (bootverbose)
  185                 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
  186                     "AP");
  187         
  188 }
  189 
  190 void
  191 cpu_mp_setmaxid(void)
  192 {
  193 
  194         /*
  195          * mp_maxid should be already set by calls to cpu_add().
  196          * Just sanity check its value here.
  197          */
  198         if (mp_ncpus == 0)
  199                 KASSERT(mp_maxid == 0,
  200                     ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
  201         else if (mp_ncpus == 1)
  202                 mp_maxid = 0;
  203         else
  204                 KASSERT(mp_maxid >= mp_ncpus - 1,
  205                     ("%s: counters out of sync: max %d, count %d", __func__,
  206                         mp_maxid, mp_ncpus));
  207                 
  208 }
  209 
  210 int
  211 cpu_mp_probe(void)
  212 {
  213 
  214         /*
  215          * Always record BSP in CPU map so that the mbuf init code works
  216          * correctly.
  217          */
  218         all_cpus = 1;
  219         if (mp_ncpus == 0) {
  220                 /*
  221                  * No CPUs were found, so this must be a UP system.  Setup
  222                  * the variables to represent a system with a single CPU
  223                  * with an id of 0.
  224                  */
  225                 mp_ncpus = 1;
  226                 return (0);
  227         }
  228 
  229         /* At least one CPU was found. */
  230         if (mp_ncpus == 1) {
  231                 /*
  232                  * One CPU was found, so this must be a UP system with
  233                  * an I/O APIC.
  234                  */
  235                 mp_maxid = 0;
  236                 return (0);
  237         }
  238 
  239         /* At least two CPUs were found. */
  240         return (1);
  241 }
  242 
  243 /*
  244  * Initialize the IPI handlers and start up the AP's.
  245  */
  246 void
  247 cpu_mp_start(void)
  248 {
  249         int i;
  250 
  251         /* Initialize the logical ID to APIC ID table. */
  252         for (i = 0; i < MAXCPU; i++)
  253                 cpu_apic_ids[i] = -1;
  254 
  255         /* Install an inter-CPU IPI for TLB invalidation */
  256         setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
  257         setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
  258         setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
  259 
  260         /* Install an inter-CPU IPI for forwarding hardclock() */
  261         setidt(IPI_HARDCLOCK, IDTVEC(hardclock), SDT_SYSIGT, SEL_KPL, 0);
  262         
  263         /* Install an inter-CPU IPI for forwarding statclock() */
  264         setidt(IPI_STATCLOCK, IDTVEC(statclock), SDT_SYSIGT, SEL_KPL, 0);
  265         
  266 #ifdef LAZY_SWITCH
  267         /* Install an inter-CPU IPI for lazy pmap release */
  268         setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), SDT_SYSIGT, SEL_KPL, 0);
  269 #endif
  270 
  271         /* Install an inter-CPU IPI for all-CPU rendezvous */
  272         setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
  273 
  274         /* Install an inter-CPU IPI for forcing an additional software trap */
  275         setidt(IPI_AST, IDTVEC(cpuast), SDT_SYSIGT, SEL_KPL, 0);
  276 
  277         /* Install an inter-CPU IPI for CPU stop/restart */
  278         setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0);
  279 
  280         mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
  281 
  282         /* Set boot_cpu_id if needed. */
  283         if (boot_cpu_id == -1) {
  284                 boot_cpu_id = PCPU_GET(apic_id);
  285                 cpu_info[boot_cpu_id].cpu_bsp = 1;
  286         } else
  287                 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
  288                     ("BSP's APIC ID doesn't match boot_cpu_id"));
  289         cpu_apic_ids[0] = boot_cpu_id;
  290 
  291         /* Start each Application Processor */
  292         start_all_aps();
  293 
  294         /* Setup the initial logical CPUs info. */
  295         logical_cpus = logical_cpus_mask = 0;
  296         if (cpu_feature & CPUID_HTT)
  297                 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
  298 
  299         set_logical_apic_ids();
  300 }
  301 
  302 
  303 /*
  304  * Print various information about the SMP system hardware and setup.
  305  */
  306 void
  307 cpu_mp_announce(void)
  308 {
  309         int i, x;
  310 
  311         /* List CPUs */
  312         printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
  313         for (i = 1, x = 0; x < MAXCPU; x++) {
  314                 if (cpu_info[x].cpu_present && !cpu_info[x].cpu_bsp) {
  315                         KASSERT(i < mp_ncpus,
  316                             ("mp_ncpus and actual cpus are out of whack"));
  317                         printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
  318                 }
  319         }
  320 }
  321 
  322 /*
  323  * AP CPU's call this to initialize themselves.
  324  */
  325 void
  326 init_secondary(void)
  327 {
  328         struct pcpu *pc;
  329         u_int64_t msr, cr0;
  330         int cpu, gsel_tss;
  331 
  332         /* Set by the startup code for us to use */
  333         cpu = bootAP;
  334 
  335         /* Init tss */
  336         common_tss[cpu] = common_tss[0];
  337         common_tss[cpu].tss_rsp0 = 0;   /* not used until after switch */
  338         common_tss[cpu].tss_iobase = sizeof(struct amd64tss);
  339 
  340         gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
  341         ssdtosyssd(&gdt_segs[GPROC0_SEL],
  342            (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
  343 
  344         lgdt(&r_gdt);                   /* does magic intra-segment return */
  345 
  346         /* Get per-cpu data */
  347         pc = &__pcpu[cpu];
  348 
  349         /* prime data page for it to use */
  350         pcpu_init(pc, cpu, sizeof(struct pcpu));
  351         pc->pc_apic_id = cpu_apic_ids[cpu];
  352         pc->pc_prvspace = pc;
  353         pc->pc_curthread = 0;
  354         pc->pc_tssp = &common_tss[cpu];
  355         pc->pc_rsp0 = 0;
  356 
  357         wrmsr(MSR_FSBASE, 0);           /* User value */
  358         wrmsr(MSR_GSBASE, (u_int64_t)pc);
  359         wrmsr(MSR_KGSBASE, (u_int64_t)pc);      /* XXX User value while we're in the kernel */
  360 
  361         lidt(&r_idt);
  362 
  363         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
  364         ltr(gsel_tss);
  365 
  366         /*
  367          * Set to a known state:
  368          * Set by mpboot.s: CR0_PG, CR0_PE
  369          * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
  370          */
  371         cr0 = rcr0();
  372         cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
  373         load_cr0(cr0);
  374 
  375         /* Set up the fast syscall stuff */
  376         msr = rdmsr(MSR_EFER) | EFER_SCE;
  377         wrmsr(MSR_EFER, msr);
  378         wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
  379         wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
  380         msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
  381               ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
  382         wrmsr(MSR_STAR, msr);
  383         wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
  384 
  385         /* Disable local apic just to be sure. */
  386         lapic_disable();
  387 
  388         /* signal our startup to the BSP. */
  389         mp_naps++;
  390 
  391         /* Spin until the BSP releases the AP's. */
  392         while (!aps_ready)
  393                 ia32_pause();
  394 
  395         /* set up CPU registers and state */
  396         cpu_setregs();
  397 
  398         /* set up FPU state on the AP */
  399         fpuinit();
  400 
  401         /* set up SSE registers */
  402         enable_sse();
  403 
  404         /* A quick check from sanity claus */
  405         if (PCPU_GET(apic_id) != lapic_id()) {
  406                 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
  407                 printf("SMP: actual apic_id = %d\n", lapic_id());
  408                 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
  409                 panic("cpuid mismatch! boom!!");
  410         }
  411 
  412         mtx_lock_spin(&ap_boot_mtx);
  413 
  414         /* Init local apic for irq's */
  415         lapic_setup();
  416 
  417         /* Set memory range attributes for this CPU to match the BSP */
  418         mem_range_AP_init();
  419 
  420         smp_cpus++;
  421 
  422         CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
  423         printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
  424 
  425         /* Determine if we are a logical CPU. */
  426         if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
  427                 logical_cpus_mask |= PCPU_GET(cpumask);
  428         
  429         /* Build our map of 'other' CPUs. */
  430         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  431 
  432         if (bootverbose)
  433                 lapic_dump("AP");
  434 
  435         if (smp_cpus == mp_ncpus) {
  436                 /* enable IPI's, tlb shootdown, freezes etc */
  437                 atomic_store_rel_int(&smp_started, 1);
  438                 smp_active = 1;  /* historic */
  439         }
  440 
  441         mtx_unlock_spin(&ap_boot_mtx);
  442 
  443         /* wait until all the AP's are up */
  444         while (smp_started == 0)
  445                 ia32_pause();
  446 
  447         /* ok, now grab sched_lock and enter the scheduler */
  448         mtx_lock_spin(&sched_lock);
  449 
  450         binuptime(PCPU_PTR(switchtime));
  451         PCPU_SET(switchticks, ticks);
  452 
  453         cpu_throw(NULL, choosethread());        /* doesn't return */
  454 
  455         panic("scheduler returned us to %s", __func__);
  456         /* NOTREACHED */
  457 }
  458 
  459 /*******************************************************************
  460  * local functions and data
  461  */
  462 
  463 /*
  464  * Set the APIC logical IDs.
  465  *
  466  * We want to cluster logical CPU's within the same APIC ID cluster.
  467  * Since logical CPU's are aligned simply filling in the clusters in
  468  * APIC ID order works fine.  Note that this does not try to balance
  469  * the number of CPU's in each cluster. (XXX?)
  470  */
  471 static void
  472 set_logical_apic_ids(void)
  473 {
  474         u_int apic_id, cluster, cluster_id;
  475 
  476         /* Force us to allocate cluster 0 at the start. */
  477         cluster = -1;
  478         cluster_id = APIC_MAX_INTRACLUSTER_ID;
  479         for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
  480                 if (!cpu_info[apic_id].cpu_present)
  481                         continue;
  482                 if (cluster_id == APIC_MAX_INTRACLUSTER_ID) {
  483                         cluster = ioapic_next_logical_cluster();
  484                         cluster_id = 0;
  485                 } else
  486                         cluster_id++;
  487                 if (bootverbose)
  488                         printf("APIC ID: physical %u, logical %u:%u\n",
  489                             apic_id, cluster, cluster_id);
  490                 lapic_set_logical_id(apic_id, cluster, cluster_id);
  491         }
  492 }
  493 
  494 /*
  495  * start each AP in our list
  496  */
  497 static int
  498 start_all_aps(void)
  499 {
  500         u_char mpbiosreason;
  501         u_int32_t mpbioswarmvec;
  502         int apic_id, cpu, i;
  503         u_int64_t *pt4, *pt3, *pt2;
  504 
  505         mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
  506 
  507         /* install the AP 1st level boot code */
  508         pmap_kenter(boot_address + KERNBASE, boot_address);
  509         bcopy(mptramp_start, (void *)((uintptr_t)boot_address + KERNBASE), bootMP_size);
  510 
  511         /* Locate the page tables, they'll be below the trampoline */
  512         pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE);
  513         pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
  514         pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
  515 
  516         /* Create the initial 1GB replicated page tables */
  517         for (i = 0; i < 512; i++) {
  518                 /* Each slot of the level 4 pages points to the same level 3 page */
  519                 pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
  520                 pt4[i] |= PG_V | PG_RW | PG_U;
  521 
  522                 /* Each slot of the level 3 pages points to the same level 2 page */
  523                 pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
  524                 pt3[i] |= PG_V | PG_RW | PG_U;
  525 
  526                 /* The level 2 page slots are mapped with 2MB pages for 1GB. */
  527                 pt2[i] = i * (2 * 1024 * 1024);
  528                 pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
  529         }
  530 
  531         /* save the current value of the warm-start vector */
  532         mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
  533         outb(CMOS_REG, BIOS_RESET);
  534         mpbiosreason = inb(CMOS_DATA);
  535 
  536         /* setup a vector to our boot code */
  537         *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
  538         *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
  539         outb(CMOS_REG, BIOS_RESET);
  540         outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
  541 
  542         /* start each AP */
  543         cpu = 0;
  544         for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
  545                 if (!cpu_info[apic_id].cpu_present ||
  546                     cpu_info[apic_id].cpu_bsp)
  547                         continue;
  548                 cpu++;
  549 
  550                 /* save APIC ID for this logical ID */
  551                 cpu_apic_ids[cpu] = apic_id;
  552 
  553                 /* allocate and set up an idle stack data page */
  554                 bootstacks[cpu] = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
  555 
  556                 bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8;
  557                 bootAP = cpu;
  558 
  559                 /* attempt to start the Application Processor */
  560                 if (!start_ap(apic_id)) {
  561                         /* restore the warmstart vector */
  562                         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  563                         panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
  564                 }
  565 
  566                 all_cpus |= (1 << cpu);         /* record AP in CPU map */
  567         }
  568 
  569         /* build our map of 'other' CPUs */
  570         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  571 
  572         /* restore the warmstart vector */
  573         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  574 
  575         outb(CMOS_REG, BIOS_RESET);
  576         outb(CMOS_DATA, mpbiosreason);
  577 
  578         /* number of APs actually started */
  579         return mp_naps;
  580 }
  581 
  582 
  583 /*
  584  * This function starts the AP (application processor) identified
  585  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  586  * to accomplish this.  This is necessary because of the nuances
  587  * of the different hardware we might encounter.  It isn't pretty,
  588  * but it seems to work.
  589  */
  590 static int
  591 start_ap(int apic_id)
  592 {
  593         int vector, ms;
  594         int cpus;
  595 
  596         /* calculate the vector */
  597         vector = (boot_address >> 12) & 0xff;
  598 
  599         /* used as a watchpoint to signal AP startup */
  600         cpus = mp_naps;
  601 
  602         /*
  603          * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
  604          * and running the target CPU. OR this INIT IPI might be latched (P5
  605          * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
  606          * ignored.
  607          */
  608 
  609         /* do an INIT IPI: assert RESET */
  610         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  611             APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
  612 
  613         /* wait for pending status end */
  614         lapic_ipi_wait(-1);
  615 
  616         /* do an INIT IPI: deassert RESET */
  617         lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
  618             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
  619 
  620         /* wait for pending status end */
  621         DELAY(10000);           /* wait ~10mS */
  622         lapic_ipi_wait(-1);
  623 
  624         /*
  625          * next we do a STARTUP IPI: the previous INIT IPI might still be
  626          * latched, (P5 bug) this 1st STARTUP would then terminate
  627          * immediately, and the previously started INIT IPI would continue. OR
  628          * the previous INIT IPI has already run. and this STARTUP IPI will
  629          * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
  630          * will run.
  631          */
  632 
  633         /* do a STARTUP IPI */
  634         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  635             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  636             vector, apic_id);
  637         lapic_ipi_wait(-1);
  638         DELAY(200);             /* wait ~200uS */
  639 
  640         /*
  641          * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
  642          * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
  643          * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
  644          * recognized after hardware RESET or INIT IPI.
  645          */
  646 
  647         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  648             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  649             vector, apic_id);
  650         lapic_ipi_wait(-1);
  651         DELAY(200);             /* wait ~200uS */
  652 
  653         /* Wait up to 5 seconds for it to start. */
  654         for (ms = 0; ms < 50; ms++) {
  655                 if (mp_naps > cpus)
  656                         return 1;       /* return SUCCESS */
  657                 DELAY(100000);
  658         }
  659         return 0;               /* return FAILURE */
  660 }
  661 
  662 /*
  663  * Flush the TLB on all other CPU's
  664  */
  665 static void
  666 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
  667 {
  668         u_int ncpu;
  669 
  670         ncpu = mp_ncpus - 1;    /* does not shootdown self */
  671         if (ncpu < 1)
  672                 return;         /* no other cpus */
  673         mtx_assert(&smp_tlb_mtx, MA_OWNED);
  674         smp_tlb_addr1 = addr1;
  675         smp_tlb_addr2 = addr2;
  676         atomic_store_rel_int(&smp_tlb_wait, 0);
  677         ipi_all_but_self(vector);
  678         while (smp_tlb_wait < ncpu)
  679                 ia32_pause();
  680 }
  681 
  682 /*
  683  * This is about as magic as it gets.  fortune(1) has got similar code
  684  * for reversing bits in a word.  Who thinks up this stuff??
  685  *
  686  * Yes, it does appear to be consistently faster than:
  687  * while (i = ffs(m)) {
  688  *      m >>= i;
  689  *      bits++;
  690  * }
  691  * and
  692  * while (lsb = (m & -m)) {     // This is magic too
  693  *      m &= ~lsb;              // or: m ^= lsb
  694  *      bits++;
  695  * }
  696  * Both of these latter forms do some very strange things on gcc-3.1 with
  697  * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
  698  * There is probably an SSE or MMX popcnt instruction.
  699  *
  700  * I wonder if this should be in libkern?
  701  *
  702  * XXX Stop the presses!  Another one:
  703  * static __inline u_int32_t
  704  * popcnt1(u_int32_t v)
  705  * {
  706  *      v -= ((v >> 1) & 0x55555555);
  707  *      v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
  708  *      v = (v + (v >> 4)) & 0x0F0F0F0F;
  709  *      return (v * 0x01010101) >> 24;
  710  * }
  711  * The downside is that it has a multiply.  With a pentium3 with
  712  * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
  713  * an imull, and in that case it is faster.  In most other cases
  714  * it appears slightly slower.
  715  *
  716  * Another variant (also from fortune):
  717  * #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
  718  * #define  BX_(x)     ((x) - (((x)>>1)&0x77777777)            \
  719  *                          - (((x)>>2)&0x33333333)            \
  720  *                          - (((x)>>3)&0x11111111))
  721  */
  722 static __inline u_int32_t
  723 popcnt(u_int32_t m)
  724 {
  725 
  726         m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
  727         m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
  728         m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
  729         m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
  730         m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
  731         return m;
  732 }
  733 
  734 static void
  735 smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
  736 {
  737         int ncpu, othercpus;
  738 
  739         othercpus = mp_ncpus - 1;
  740         if (mask == (u_int)-1) {
  741                 ncpu = othercpus;
  742                 if (ncpu < 1)
  743                         return;
  744         } else {
  745                 mask &= ~PCPU_GET(cpumask);
  746                 if (mask == 0)
  747                         return;
  748                 ncpu = popcnt(mask);
  749                 if (ncpu > othercpus) {
  750                         /* XXX this should be a panic offence */
  751                         printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
  752                             ncpu, othercpus);
  753                         ncpu = othercpus;
  754                 }
  755                 /* XXX should be a panic, implied by mask == 0 above */
  756                 if (ncpu < 1)
  757                         return;
  758         }
  759         mtx_assert(&smp_tlb_mtx, MA_OWNED);
  760         smp_tlb_addr1 = addr1;
  761         smp_tlb_addr2 = addr2;
  762         atomic_store_rel_int(&smp_tlb_wait, 0);
  763         if (mask == (u_int)-1)
  764                 ipi_all_but_self(vector);
  765         else
  766                 ipi_selected(mask, vector);
  767         while (smp_tlb_wait < ncpu)
  768                 ia32_pause();
  769 }
  770 
  771 void
  772 smp_invltlb(void)
  773 {
  774 
  775         if (smp_started)
  776                 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
  777 }
  778 
  779 void
  780 smp_invlpg(vm_offset_t addr)
  781 {
  782 
  783         if (smp_started)
  784                 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
  785 }
  786 
  787 void
  788 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
  789 {
  790 
  791         if (smp_started)
  792                 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
  793 }
  794 
  795 void
  796 smp_masked_invltlb(u_int mask)
  797 {
  798 
  799         if (smp_started)
  800                 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
  801 }
  802 
  803 void
  804 smp_masked_invlpg(u_int mask, vm_offset_t addr)
  805 {
  806 
  807         if (smp_started)
  808                 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
  809 }
  810 
  811 void
  812 smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
  813 {
  814 
  815         if (smp_started)
  816                 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
  817 }
  818 
  819 
  820 /*
  821  * For statclock, we send an IPI to all CPU's to have them call this
  822  * function.
  823  */
  824 void
  825 forwarded_statclock(struct clockframe frame)
  826 {
  827         struct thread *td;
  828 
  829         CTR0(KTR_SMP, "forwarded_statclock");
  830         td = curthread;
  831         td->td_intr_nesting_level++;
  832         if (profprocs != 0)
  833                 profclock(&frame);
  834         if (pscnt == psdiv)
  835                 statclock(&frame);
  836         td->td_intr_nesting_level--;
  837 }
  838 
  839 void
  840 forward_statclock(void)
  841 {
  842         int map;
  843 
  844         CTR0(KTR_SMP, "forward_statclock");
  845 
  846         if (!smp_started || cold || panicstr)
  847                 return;
  848 
  849         map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
  850         if (map != 0)
  851                 ipi_selected(map, IPI_STATCLOCK);
  852 }
  853 
  854 /*
  855  * For each hardclock(), we send an IPI to all other CPU's to have them
  856  * execute this function.  It would be nice to reduce contention on
  857  * sched_lock if we could simply peek at the CPU to determine the user/kernel
  858  * state and call hardclock_process() on the CPU receiving the clock interrupt
  859  * and then just use a simple IPI to handle any ast's if needed.
  860  */
  861 void
  862 forwarded_hardclock(struct clockframe frame)
  863 {
  864         struct thread *td;
  865 
  866         CTR0(KTR_SMP, "forwarded_hardclock");
  867         td = curthread;
  868         td->td_intr_nesting_level++;
  869         hardclock_process(&frame);
  870         td->td_intr_nesting_level--;
  871 }
  872 
  873 void 
  874 forward_hardclock(void)
  875 {
  876         u_int map;
  877 
  878         CTR0(KTR_SMP, "forward_hardclock");
  879 
  880         if (!smp_started || cold || panicstr)
  881                 return;
  882 
  883         map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
  884         if (map != 0)
  885                 ipi_selected(map, IPI_HARDCLOCK);
  886 }
  887 
  888 /*
  889  * send an IPI to a set of cpus.
  890  */
  891 void
  892 ipi_selected(u_int32_t cpus, u_int ipi)
  893 {
  894         int cpu;
  895 
  896         CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
  897         while ((cpu = ffs(cpus)) != 0) {
  898                 cpu--;
  899                 KASSERT(cpu_apic_ids[cpu] != -1,
  900                     ("IPI to non-existent CPU %d", cpu));
  901                 lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
  902                 cpus &= ~(1 << cpu);
  903         }
  904 }
  905 
  906 /*
  907  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
  908  */
  909 void
  910 ipi_all(u_int ipi)
  911 {
  912 
  913         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
  914         lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
  915 }
  916 
  917 /*
  918  * send an IPI to all CPUs EXCEPT myself
  919  */
  920 void
  921 ipi_all_but_self(u_int ipi)
  922 {
  923 
  924         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
  925         lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
  926 }
  927 
  928 /*
  929  * send an IPI to myself
  930  */
  931 void
  932 ipi_self(u_int ipi)
  933 {
  934 
  935         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
  936         lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
  937 }
  938 
  939 /*
  940  * This is called once the rest of the system is up and running and we're
  941  * ready to let the AP's out of the pen.
  942  */
  943 static void
  944 release_aps(void *dummy __unused)
  945 {
  946 
  947         if (mp_ncpus == 1) 
  948                 return;
  949         mtx_lock_spin(&sched_lock);
  950         atomic_store_rel_int(&aps_ready, 1);
  951         while (smp_started == 0)
  952                 ia32_pause();
  953         mtx_unlock_spin(&sched_lock);
  954 }
  955 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
  956 
  957 static int
  958 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
  959 {
  960         u_int mask;
  961         int error;
  962 
  963         mask = hlt_cpus_mask;
  964         error = sysctl_handle_int(oidp, &mask, 0, req);
  965         if (error || !req->newptr)
  966                 return (error);
  967 
  968         if (logical_cpus_mask != 0 &&
  969             (mask & logical_cpus_mask) == logical_cpus_mask)
  970                 hlt_logical_cpus = 1;
  971         else
  972                 hlt_logical_cpus = 0;
  973 
  974         if ((mask & all_cpus) == all_cpus)
  975                 mask &= ~(1<<0);
  976         hlt_cpus_mask = mask;
  977         return (error);
  978 }
  979 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
  980     0, 0, sysctl_hlt_cpus, "IU", "");
  981 
  982 static int
  983 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
  984 {
  985         int disable, error;
  986 
  987         disable = hlt_logical_cpus;
  988         error = sysctl_handle_int(oidp, &disable, 0, req);
  989         if (error || !req->newptr)
  990                 return (error);
  991 
  992         if (disable)
  993                 hlt_cpus_mask |= logical_cpus_mask;
  994         else
  995                 hlt_cpus_mask &= ~logical_cpus_mask;
  996 
  997         if ((hlt_cpus_mask & all_cpus) == all_cpus)
  998                 hlt_cpus_mask &= ~(1<<0);
  999 
 1000         hlt_logical_cpus = disable;
 1001         return (error);
 1002 }
 1003 
 1004 static void
 1005 cpu_hlt_setup(void *dummy __unused)
 1006 {
 1007 
 1008         if (logical_cpus_mask != 0) {
 1009                 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 1010                     &hlt_logical_cpus);
 1011                 sysctl_ctx_init(&logical_cpu_clist);
 1012                 SYSCTL_ADD_PROC(&logical_cpu_clist,
 1013                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1014                     "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 1015                     sysctl_hlt_logical_cpus, "IU", "");
 1016                 SYSCTL_ADD_UINT(&logical_cpu_clist,
 1017                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1018                     "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
 1019                     &logical_cpus_mask, 0, "");
 1020 
 1021                 if (hlt_logical_cpus)
 1022                         hlt_cpus_mask |= logical_cpus_mask;
 1023         }
 1024 }
 1025 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 1026 
 1027 int
 1028 mp_grab_cpu_hlt(void)
 1029 {
 1030         u_int mask = PCPU_GET(cpumask);
 1031         int retval;
 1032 
 1033         retval = mask & hlt_cpus_mask;
 1034         while (mask & hlt_cpus_mask)
 1035                 __asm __volatile("sti; hlt" : : : "memory");
 1036         return (retval);
 1037 }

Cache object: 9e4f8c5538e8ae38612214300879685f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.