The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/mp_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1996, by Steve Passe
    3  * Copyright (c) 2003, by Peter Wemm
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. The name of the developer may NOT be used to endorse or promote products
   12  *    derived from this software without specific prior written permission.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD: releng/7.4/sys/amd64/amd64/mp_machdep.c 204910 2010-03-09 13:08:57Z jhb $");
   29 
   30 #include "opt_cpu.h"
   31 #include "opt_kstack_pages.h"
   32 #include "opt_mp_watchdog.h"
   33 #include "opt_sched.h"
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/bus.h>
   38 #ifdef GPROF 
   39 #include <sys/gmon.h>
   40 #endif
   41 #include <sys/kernel.h>
   42 #include <sys/ktr.h>
   43 #include <sys/lock.h>
   44 #include <sys/malloc.h>
   45 #include <sys/memrange.h>
   46 #include <sys/mutex.h>
   47 #include <sys/pcpu.h>
   48 #include <sys/proc.h>
   49 #include <sys/sched.h>
   50 #include <sys/smp.h>
   51 #include <sys/sysctl.h>
   52 
   53 #include <vm/vm.h>
   54 #include <vm/vm_param.h>
   55 #include <vm/pmap.h>
   56 #include <vm/vm_kern.h>
   57 #include <vm/vm_extern.h>
   58 
   59 #include <machine/apicreg.h>
   60 #include <machine/cputypes.h>
   61 #include <machine/mca.h>
   62 #include <machine/md_var.h>
   63 #include <machine/mp_watchdog.h>
   64 #include <machine/pcb.h>
   65 #include <machine/psl.h>
   66 #include <machine/smp.h>
   67 #include <machine/specialreg.h>
   68 #include <machine/tss.h>
   69 
   70 #define WARMBOOT_TARGET         0
   71 #define WARMBOOT_OFF            (KERNBASE + 0x0467)
   72 #define WARMBOOT_SEG            (KERNBASE + 0x0469)
   73 
   74 #define CMOS_REG                (0x70)
   75 #define CMOS_DATA               (0x71)
   76 #define BIOS_RESET              (0x0f)
   77 #define BIOS_WARM               (0x0a)
   78 
   79 /* lock region used by kernel profiling */
   80 int     mcount_lock;
   81 
   82 int     mp_naps;                /* # of Applications processors */
   83 int     boot_cpu_id = -1;       /* designated BSP */
   84 
   85 extern  struct pcpu __pcpu[];
   86 
   87 /*
   88  * CPU topology map datastructures for HTT.
   89  */
   90 static struct cpu_group mp_groups[MAXCPU];
   91 static struct cpu_top mp_top;
   92 
   93 /* AP uses this during bootstrap.  Do not staticize.  */
   94 char *bootSTK;
   95 static int bootAP;
   96 
   97 /* Free these after use */
   98 void *bootstacks[MAXCPU];
   99 
  100 /* Temporary holder for double fault stack */
  101 char *doublefault_stack;
  102 char *nmi_stack;
  103 
  104 /* Hotwire a 0->4MB V==P mapping */
  105 extern pt_entry_t *KPTphys;
  106 
  107 /* SMP page table page */
  108 extern pt_entry_t *SMPpt;
  109 
  110 struct pcb stoppcbs[MAXCPU];
  111 
  112 /* Variables needed for SMP tlb shootdown. */
  113 vm_offset_t smp_tlb_addr1;
  114 vm_offset_t smp_tlb_addr2;
  115 volatile int smp_tlb_wait;
  116 
  117 extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
  118 
  119 #ifdef STOP_NMI
  120 volatile cpumask_t ipi_nmi_pending;
  121 
  122 static void     ipi_nmi_selected(u_int32_t cpus);
  123 #endif 
  124 
  125 /*
  126  * Local data and functions.
  127  */
  128 
  129 #ifdef STOP_NMI
  130 /* 
  131  * Provide an alternate method of stopping other CPUs. If another CPU has
  132  * disabled interrupts the conventional STOP IPI will be blocked. This 
  133  * NMI-based stop should get through in that case.
  134  */
  135 static int stop_cpus_with_nmi = 1;
  136 SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW,
  137     &stop_cpus_with_nmi, 0, "");
  138 TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi);
  139 #else
  140 #define stop_cpus_with_nmi      0
  141 #endif
  142 
  143 static u_int logical_cpus;
  144 
  145 /* used to hold the AP's until we are ready to release them */
  146 static struct mtx ap_boot_mtx;
  147 
  148 /* Set to 1 once we're ready to let the APs out of the pen. */
  149 static volatile int aps_ready = 0;
  150 
  151 /*
  152  * Store data from cpu_add() until later in the boot when we actually setup
  153  * the APs.
  154  */
  155 struct cpu_info {
  156         int     cpu_present:1;
  157         int     cpu_bsp:1;
  158         int     cpu_disabled:1;
  159         int     cpu_hyperthread:1;
  160 } static cpu_info[MAX_APIC_ID + 1];
  161 int cpu_apic_ids[MAXCPU];
  162 
  163 /* Holds pending bitmap based IPIs per CPU */
  164 static volatile u_int cpu_ipi_pending[MAXCPU];
  165 
  166 static u_int boot_address;
  167 
  168 static void     assign_cpu_ids(void);
  169 static void     set_interrupt_apic_ids(void);
  170 static int      start_all_aps(void);
  171 static int      start_ap(int apic_id);
  172 static void     release_aps(void *dummy);
  173 
  174 static int      hlt_logical_cpus;
  175 static u_int    hyperthreading_cpus;
  176 static cpumask_t        hyperthreading_cpus_mask;
  177 static int      hyperthreading_allowed = 1;
  178 static struct   sysctl_ctx_list logical_cpu_clist;
  179 static u_int    bootMP_size;
  180 
  181 static void
  182 mem_range_AP_init(void)
  183 {
  184         if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
  185                 mem_range_softc.mr_op->initAP(&mem_range_softc);
  186 }
  187 
  188 void
  189 mp_topology(void)
  190 {
  191         struct cpu_group *group;
  192         int apic_id;
  193         int groups;
  194         int cpu;
  195 
  196         /* Build the smp_topology map. */
  197         /* Nothing to do if there is no HTT support. */
  198         if (hyperthreading_cpus <= 1)
  199                 return;
  200         group = &mp_groups[0];
  201         groups = 1;
  202         for (cpu = 0, apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
  203                 if (!cpu_info[apic_id].cpu_present ||
  204                     cpu_info[apic_id].cpu_disabled)
  205                         continue;
  206                 /*
  207                  * If the current group has members and we're not a logical
  208                  * cpu, create a new group.
  209                  */
  210                 if (group->cg_count != 0 &&
  211                     (apic_id % hyperthreading_cpus) == 0) {
  212                         group++;
  213                         groups++;
  214                 }
  215                 group->cg_count++;
  216                 group->cg_mask |= 1 << cpu;
  217                 cpu++;
  218         }
  219 
  220         mp_top.ct_count = groups;
  221         mp_top.ct_group = mp_groups;
  222         smp_topology = &mp_top;
  223 }
  224 
  225 /*
  226  * Calculate usable address in base memory for AP trampoline code.
  227  */
  228 u_int
  229 mp_bootaddress(u_int basemem)
  230 {
  231 
  232         bootMP_size = mptramp_end - mptramp_start;
  233         boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */
  234         if (((basemem * 1024) - boot_address) < bootMP_size)
  235                 boot_address -= PAGE_SIZE;      /* not enough, lower by 4k */
  236         /* 3 levels of page table pages */
  237         mptramp_pagetables = boot_address - (PAGE_SIZE * 3);
  238 
  239         return mptramp_pagetables;
  240 }
  241 
  242 void
  243 cpu_add(u_int apic_id, char boot_cpu)
  244 {
  245 
  246         if (apic_id > MAX_APIC_ID) {
  247                 panic("SMP: APIC ID %d too high", apic_id);
  248                 return;
  249         }
  250         KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
  251             apic_id));
  252         cpu_info[apic_id].cpu_present = 1;
  253         if (boot_cpu) {
  254                 KASSERT(boot_cpu_id == -1,
  255                     ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
  256                     boot_cpu_id));
  257                 boot_cpu_id = apic_id;
  258                 cpu_info[apic_id].cpu_bsp = 1;
  259         }
  260         if (mp_ncpus < MAXCPU) {
  261                 mp_ncpus++;
  262                 mp_maxid = mp_ncpus -1;
  263         }
  264         if (bootverbose)
  265                 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
  266                     "AP");
  267 }
  268 
  269 void
  270 cpu_mp_setmaxid(void)
  271 {
  272 
  273         /*
  274          * mp_maxid should be already set by calls to cpu_add().
  275          * Just sanity check its value here.
  276          */
  277         if (mp_ncpus == 0)
  278                 KASSERT(mp_maxid == 0,
  279                     ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
  280         else if (mp_ncpus == 1)
  281                 mp_maxid = 0;
  282         else
  283                 KASSERT(mp_maxid >= mp_ncpus - 1,
  284                     ("%s: counters out of sync: max %d, count %d", __func__,
  285                         mp_maxid, mp_ncpus));           
  286 }
  287 
  288 int
  289 cpu_mp_probe(void)
  290 {
  291 
  292         /*
  293          * Always record BSP in CPU map so that the mbuf init code works
  294          * correctly.
  295          */
  296         all_cpus = 1;
  297         if (mp_ncpus == 0) {
  298                 /*
  299                  * No CPUs were found, so this must be a UP system.  Setup
  300                  * the variables to represent a system with a single CPU
  301                  * with an id of 0.
  302                  */
  303                 mp_ncpus = 1;
  304                 return (0);
  305         }
  306 
  307         /* At least one CPU was found. */
  308         if (mp_ncpus == 1) {
  309                 /*
  310                  * One CPU was found, so this must be a UP system with
  311                  * an I/O APIC.
  312                  */
  313                 mp_maxid = 0;
  314                 return (0);
  315         }
  316 
  317         /* At least two CPUs were found. */
  318         return (1);
  319 }
  320 
  321 /*
  322  * Initialize the IPI handlers and start up the AP's.
  323  */
  324 void
  325 cpu_mp_start(void)
  326 {
  327         int i;
  328         u_int threads_per_cache, p[4];
  329 
  330         /* Initialize the logical ID to APIC ID table. */
  331         for (i = 0; i < MAXCPU; i++) {
  332                 cpu_apic_ids[i] = -1;
  333                 cpu_ipi_pending[i] = 0;
  334         }
  335 
  336         /* Install an inter-CPU IPI for TLB invalidation */
  337         setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
  338         setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
  339         setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
  340 
  341         /* Install an inter-CPU IPI for cache invalidation. */
  342         setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYSIGT, SEL_KPL, 0);
  343 
  344         /* Install an inter-CPU IPI for all-CPU rendezvous */
  345         setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
  346 
  347         /* Install generic inter-CPU IPI handler */
  348         setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
  349                SDT_SYSIGT, SEL_KPL, 0);
  350 
  351         /* Install an inter-CPU IPI for CPU stop/restart */
  352         setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0);
  353 
  354         /* Set boot_cpu_id if needed. */
  355         if (boot_cpu_id == -1) {
  356                 boot_cpu_id = PCPU_GET(apic_id);
  357                 cpu_info[boot_cpu_id].cpu_bsp = 1;
  358         } else
  359                 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
  360                     ("BSP's APIC ID doesn't match boot_cpu_id"));
  361 
  362         /* Setup the initial logical CPUs info. */
  363         logical_cpus = logical_cpus_mask = 0;
  364         if (cpu_feature & CPUID_HTT)
  365                 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
  366 
  367         /*
  368          * Work out if hyperthreading is *really* enabled.  This
  369          * is made really ugly by the fact that processors lie: Dual
  370          * core processors claim to be hyperthreaded even when they're
  371          * not, presumably because they want to be treated the same
  372          * way as HTT with respect to per-cpu software licensing.
  373          * At the time of writing (May 12, 2005) the only hyperthreaded
  374          * cpus are from Intel, and Intel's dual-core processors can be
  375          * identified via the "deterministic cache parameters" cpuid
  376          * calls.
  377          */
  378         /*
  379          * First determine if this is an Intel processor which claims
  380          * to have hyperthreading support.
  381          */
  382         if ((cpu_feature & CPUID_HTT) && cpu_vendor_id == CPU_VENDOR_INTEL) {
  383                 /*
  384                  * If the "deterministic cache parameters" cpuid calls
  385                  * are available, use them.
  386                  */
  387                 if (cpu_high >= 4) {
  388                         /* Ask the processor about the L1 cache. */
  389                         for (i = 0; i < 1; i++) {
  390                                 cpuid_count(4, i, p);
  391                                 threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1;
  392                                 if (hyperthreading_cpus < threads_per_cache)
  393                                         hyperthreading_cpus = threads_per_cache;
  394                                 if ((p[0] & 0x1f) == 0)
  395                                         break;
  396                         }
  397                 }
  398 
  399                 /*
  400                  * If the deterministic cache parameters are not
  401                  * available, or if no caches were reported to exist,
  402                  * just accept what the HTT flag indicated.
  403                  */
  404                 if (hyperthreading_cpus == 0)
  405                         hyperthreading_cpus = logical_cpus;
  406         }
  407 
  408         assign_cpu_ids();
  409 
  410         /* Start each Application Processor */
  411         start_all_aps();
  412 
  413         set_interrupt_apic_ids();
  414 
  415         /* Last, setup the cpu topology now that we have probed CPUs */
  416         mp_topology();
  417 }
  418 
  419 
  420 /*
  421  * Print various information about the SMP system hardware and setup.
  422  */
  423 void
  424 cpu_mp_announce(void)
  425 {
  426         const char *hyperthread;
  427         int i;
  428 
  429         /* List active CPUs first. */
  430         printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
  431         for (i = 1; i < mp_ncpus; i++) {
  432                 if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread)
  433                         hyperthread = "/HT";
  434                 else
  435                         hyperthread = "";
  436                 printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread,
  437                     cpu_apic_ids[i]);
  438         }
  439 
  440         /* List disabled CPUs last. */
  441         for (i = 0; i <= MAX_APIC_ID; i++) {
  442                 if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled)
  443                         continue;
  444                 if (cpu_info[i].cpu_hyperthread)
  445                         hyperthread = "/HT";
  446                 else
  447                         hyperthread = "";
  448                 printf("  cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread,
  449                     i);
  450         }
  451 }
  452 
  453 /*
  454  * AP CPU's call this to initialize themselves.
  455  */
  456 void
  457 init_secondary(void)
  458 {
  459         struct pcpu *pc;
  460         struct nmi_pcpu *np;
  461         u_int64_t msr, cr0;
  462         int cpu, gsel_tss, x;
  463         struct region_descriptor ap_gdt;
  464 
  465         /* Set by the startup code for us to use */
  466         cpu = bootAP;
  467 
  468         /* Init tss */
  469         common_tss[cpu] = common_tss[0];
  470         common_tss[cpu].tss_rsp0 = 0;   /* not used until after switch */
  471         common_tss[cpu].tss_iobase = sizeof(struct amd64tss);
  472         common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE];
  473 
  474         /* The NMI stack runs on IST2. */
  475         np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1;
  476         common_tss[cpu].tss_ist2 = (long) np;
  477 
  478         /* Prepare private GDT */
  479         gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
  480         ssdtosyssd(&gdt_segs[GPROC0_SEL],
  481            (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]);
  482         for (x = 0; x < NGDT; x++) {
  483                 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1))
  484                         ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]);
  485         }
  486         ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
  487         ap_gdt.rd_base =  (long) &gdt[NGDT * cpu];
  488         lgdt(&ap_gdt);                  /* does magic intra-segment return */
  489 
  490         /* Get per-cpu data */
  491         pc = &__pcpu[cpu];
  492 
  493         /* prime data page for it to use */
  494         pcpu_init(pc, cpu, sizeof(struct pcpu));
  495         pc->pc_apic_id = cpu_apic_ids[cpu];
  496         pc->pc_prvspace = pc;
  497         pc->pc_curthread = 0;
  498         pc->pc_tssp = &common_tss[cpu];
  499         pc->pc_rsp0 = 0;
  500         pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
  501 
  502         /* Save the per-cpu pointer for use by the NMI handler. */
  503         np->np_pcpu = (register_t) pc;
  504 
  505         wrmsr(MSR_FSBASE, 0);           /* User value */
  506         wrmsr(MSR_GSBASE, (u_int64_t)pc);
  507         wrmsr(MSR_KGSBASE, (u_int64_t)pc);      /* XXX User value while we're in the kernel */
  508 
  509         lidt(&r_idt);
  510 
  511         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
  512         ltr(gsel_tss);
  513 
  514         /*
  515          * Set to a known state:
  516          * Set by mpboot.s: CR0_PG, CR0_PE
  517          * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
  518          */
  519         cr0 = rcr0();
  520         cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
  521         load_cr0(cr0);
  522 
  523         /* Set up the fast syscall stuff */
  524         msr = rdmsr(MSR_EFER) | EFER_SCE;
  525         wrmsr(MSR_EFER, msr);
  526         wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
  527         wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
  528         msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
  529               ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
  530         wrmsr(MSR_STAR, msr);
  531         wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
  532 
  533         /* Disable local APIC just to be sure. */
  534         lapic_disable();
  535 
  536         /* signal our startup to the BSP. */
  537         mp_naps++;
  538 
  539         /* Spin until the BSP releases the AP's. */
  540         while (!aps_ready)
  541                 ia32_pause();
  542 
  543         /* Initialize the PAT MSR. */
  544         pmap_init_pat();
  545 
  546         /* set up CPU registers and state */
  547         cpu_setregs();
  548 
  549         /* set up SSE/NX registers */
  550         initializecpu();
  551 
  552         /* set up FPU state on the AP */
  553         fpuinit();
  554 
  555         /* A quick check from sanity claus */
  556         if (PCPU_GET(apic_id) != lapic_id()) {
  557                 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
  558                 printf("SMP: actual apic_id = %d\n", lapic_id());
  559                 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
  560                 panic("cpuid mismatch! boom!!");
  561         }
  562 
  563         /* Initialize curthread. */
  564         KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
  565         PCPU_SET(curthread, PCPU_GET(idlethread));
  566 
  567         mca_init();
  568 
  569         mtx_lock_spin(&ap_boot_mtx);
  570 
  571         /* Init local apic for irq's */
  572         lapic_setup(1);
  573 
  574         /* Set memory range attributes for this CPU to match the BSP */
  575         mem_range_AP_init();
  576 
  577         smp_cpus++;
  578 
  579         CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
  580         printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
  581 
  582         /* Determine if we are a logical CPU. */
  583         if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
  584                 logical_cpus_mask |= PCPU_GET(cpumask);
  585         
  586         /* Determine if we are a hyperthread. */
  587         if (hyperthreading_cpus > 1 &&
  588             PCPU_GET(apic_id) % hyperthreading_cpus != 0)
  589                 hyperthreading_cpus_mask |= PCPU_GET(cpumask);
  590 
  591         /* Build our map of 'other' CPUs. */
  592         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  593 
  594         if (bootverbose)
  595                 lapic_dump("AP");
  596 
  597         if (smp_cpus == mp_ncpus) {
  598                 /* enable IPI's, tlb shootdown, freezes etc */
  599                 atomic_store_rel_int(&smp_started, 1);
  600                 smp_active = 1;  /* historic */
  601         }
  602 
  603         /*
  604          * Enable global pages TLB extension
  605          * This also implicitly flushes the TLB 
  606          */
  607 
  608         load_cr4(rcr4() | CR4_PGE);
  609 
  610         mtx_unlock_spin(&ap_boot_mtx);
  611 
  612         /* wait until all the AP's are up */
  613         while (smp_started == 0)
  614                 ia32_pause();
  615 
  616         sched_throw(NULL);
  617 
  618         panic("scheduler returned us to %s", __func__);
  619         /* NOTREACHED */
  620 }
  621 
  622 /*******************************************************************
  623  * local functions and data
  624  */
  625 
  626 /*
  627  * We tell the I/O APIC code about all the CPUs we want to receive
  628  * interrupts.  If we don't want certain CPUs to receive IRQs we
  629  * can simply not tell the I/O APIC code about them in this function.
  630  * We also do not tell it about the BSP since it tells itself about
  631  * the BSP internally to work with UP kernels and on UP machines.
  632  */
  633 static void
  634 set_interrupt_apic_ids(void)
  635 {
  636         u_int i, apic_id;
  637 
  638         for (i = 0; i < MAXCPU; i++) {
  639                 apic_id = cpu_apic_ids[i];
  640                 if (apic_id == -1)
  641                         continue;
  642                 if (cpu_info[apic_id].cpu_bsp)
  643                         continue;
  644                 if (cpu_info[apic_id].cpu_disabled)
  645                         continue;
  646 
  647                 /* Don't let hyperthreads service interrupts. */
  648                 if (hyperthreading_cpus > 1 &&
  649                     apic_id % hyperthreading_cpus != 0)
  650                         continue;
  651 
  652                 intr_add_cpu(i);
  653         }
  654 }
  655 
  656 /*
  657  * Assign logical CPU IDs to local APICs.
  658  */
  659 static void
  660 assign_cpu_ids(void)
  661 {
  662         u_int i;
  663 
  664         TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
  665             &hyperthreading_allowed);
  666 
  667         /* Check for explicitly disabled CPUs. */
  668         for (i = 0; i <= MAX_APIC_ID; i++) {
  669                 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
  670                         continue;
  671 
  672                 if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
  673                         cpu_info[i].cpu_hyperthread = 1;
  674 #if defined(SCHED_ULE)
  675                         /*
  676                          * Don't use HT CPU if it has been disabled by a
  677                          * tunable.
  678                          */
  679                         if (hyperthreading_allowed == 0) {
  680                                 cpu_info[i].cpu_disabled = 1;
  681                                 continue;
  682                         }
  683 #endif
  684                 }
  685 
  686                 /* Don't use this CPU if it has been disabled by a tunable. */
  687                 if (resource_disabled("lapic", i)) {
  688                         cpu_info[i].cpu_disabled = 1;
  689                         continue;
  690                 }
  691         }
  692 
  693         /*
  694          * Assign CPU IDs to local APIC IDs and disable any CPUs
  695          * beyond MAXCPU.  CPU 0 is always assigned to the BSP.
  696          *
  697          * To minimize confusion for userland, we attempt to number
  698          * CPUs such that all threads and cores in a package are
  699          * grouped together.  For now we assume that the BSP is always
  700          * the first thread in a package and just start adding APs
  701          * starting with the BSP's APIC ID.
  702          */
  703         mp_ncpus = 1;
  704         cpu_apic_ids[0] = boot_cpu_id;
  705         for (i = boot_cpu_id + 1; i != boot_cpu_id;
  706              i == MAX_APIC_ID ? i = 0 : i++) {
  707                 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
  708                     cpu_info[i].cpu_disabled)
  709                         continue;
  710 
  711                 if (mp_ncpus < MAXCPU) {
  712                         cpu_apic_ids[mp_ncpus] = i;
  713                         mp_ncpus++;
  714                 } else
  715                         cpu_info[i].cpu_disabled = 1;
  716         }
  717         KASSERT(mp_maxid >= mp_ncpus - 1,
  718             ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
  719             mp_ncpus));         
  720 }
  721 
  722 /*
  723  * start each AP in our list
  724  */
  725 static int
  726 start_all_aps(void)
  727 {
  728         vm_offset_t va = boot_address + KERNBASE;
  729         u_int64_t *pt4, *pt3, *pt2;
  730         u_int32_t mpbioswarmvec;
  731         int apic_id, cpu, i;
  732         u_char mpbiosreason;
  733 
  734         mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
  735 
  736         /* install the AP 1st level boot code */
  737         pmap_kenter(va, boot_address);
  738         pmap_invalidate_page(kernel_pmap, va);
  739         bcopy(mptramp_start, (void *)va, bootMP_size);
  740 
  741         /* Locate the page tables, they'll be below the trampoline */
  742         pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE);
  743         pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
  744         pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
  745 
  746         /* Create the initial 1GB replicated page tables */
  747         for (i = 0; i < 512; i++) {
  748                 /* Each slot of the level 4 pages points to the same level 3 page */
  749                 pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
  750                 pt4[i] |= PG_V | PG_RW | PG_U;
  751 
  752                 /* Each slot of the level 3 pages points to the same level 2 page */
  753                 pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
  754                 pt3[i] |= PG_V | PG_RW | PG_U;
  755 
  756                 /* The level 2 page slots are mapped with 2MB pages for 1GB. */
  757                 pt2[i] = i * (2 * 1024 * 1024);
  758                 pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
  759         }
  760 
  761         /* save the current value of the warm-start vector */
  762         mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
  763         outb(CMOS_REG, BIOS_RESET);
  764         mpbiosreason = inb(CMOS_DATA);
  765 
  766         /* setup a vector to our boot code */
  767         *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
  768         *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
  769         outb(CMOS_REG, BIOS_RESET);
  770         outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
  771 
  772         /* start each AP */
  773         for (cpu = 1; cpu < mp_ncpus; cpu++) {
  774                 apic_id = cpu_apic_ids[cpu];
  775 
  776                 /* allocate and set up an idle stack data page */
  777                 bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
  778                 doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE);
  779                 nmi_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE);
  780 
  781                 bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8;
  782                 bootAP = cpu;
  783 
  784                 /* attempt to start the Application Processor */
  785                 if (!start_ap(apic_id)) {
  786                         /* restore the warmstart vector */
  787                         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  788                         panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
  789                 }
  790 
  791                 all_cpus |= (1 << cpu);         /* record AP in CPU map */
  792         }
  793 
  794         /* build our map of 'other' CPUs */
  795         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  796 
  797         /* restore the warmstart vector */
  798         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  799 
  800         outb(CMOS_REG, BIOS_RESET);
  801         outb(CMOS_DATA, mpbiosreason);
  802 
  803         /* number of APs actually started */
  804         return mp_naps;
  805 }
  806 
  807 
  808 /*
  809  * This function starts the AP (application processor) identified
  810  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  811  * to accomplish this.  This is necessary because of the nuances
  812  * of the different hardware we might encounter.  It isn't pretty,
  813  * but it seems to work.
  814  */
  815 static int
  816 start_ap(int apic_id)
  817 {
  818         int vector, ms;
  819         int cpus;
  820 
  821         /* calculate the vector */
  822         vector = (boot_address >> 12) & 0xff;
  823 
  824         /* used as a watchpoint to signal AP startup */
  825         cpus = mp_naps;
  826 
  827         /*
  828          * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
  829          * and running the target CPU. OR this INIT IPI might be latched (P5
  830          * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
  831          * ignored.
  832          */
  833 
  834         /* do an INIT IPI: assert RESET */
  835         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  836             APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
  837 
  838         /* wait for pending status end */
  839         lapic_ipi_wait(-1);
  840 
  841         /* do an INIT IPI: deassert RESET */
  842         lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
  843             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
  844 
  845         /* wait for pending status end */
  846         DELAY(10000);           /* wait ~10mS */
  847         lapic_ipi_wait(-1);
  848 
  849         /*
  850          * next we do a STARTUP IPI: the previous INIT IPI might still be
  851          * latched, (P5 bug) this 1st STARTUP would then terminate
  852          * immediately, and the previously started INIT IPI would continue. OR
  853          * the previous INIT IPI has already run. and this STARTUP IPI will
  854          * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
  855          * will run.
  856          */
  857 
  858         /* do a STARTUP IPI */
  859         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  860             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  861             vector, apic_id);
  862         lapic_ipi_wait(-1);
  863         DELAY(200);             /* wait ~200uS */
  864 
  865         /*
  866          * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
  867          * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
  868          * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
  869          * recognized after hardware RESET or INIT IPI.
  870          */
  871 
  872         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  873             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  874             vector, apic_id);
  875         lapic_ipi_wait(-1);
  876         DELAY(200);             /* wait ~200uS */
  877 
  878         /* Wait up to 5 seconds for it to start. */
  879         for (ms = 0; ms < 5000; ms++) {
  880                 if (mp_naps > cpus)
  881                         return 1;       /* return SUCCESS */
  882                 DELAY(1000);
  883         }
  884         return 0;               /* return FAILURE */
  885 }
  886 
  887 /*
  888  * Flush the TLB on all other CPU's
  889  */
  890 static void
  891 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
  892 {
  893         u_int ncpu;
  894 
  895         ncpu = mp_ncpus - 1;    /* does not shootdown self */
  896         if (ncpu < 1)
  897                 return;         /* no other cpus */
  898         if (!(read_rflags() & PSL_I))
  899                 panic("%s: interrupts disabled", __func__);
  900         mtx_lock_spin(&smp_ipi_mtx);
  901         smp_tlb_addr1 = addr1;
  902         smp_tlb_addr2 = addr2;
  903         atomic_store_rel_int(&smp_tlb_wait, 0);
  904         ipi_all_but_self(vector);
  905         while (smp_tlb_wait < ncpu)
  906                 ia32_pause();
  907         mtx_unlock_spin(&smp_ipi_mtx);
  908 }
  909 
  910 static void
  911 smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
  912 {
  913         int ncpu, othercpus;
  914 
  915         othercpus = mp_ncpus - 1;
  916         if (mask == (u_int)-1) {
  917                 ncpu = othercpus;
  918                 if (ncpu < 1)
  919                         return;
  920         } else {
  921                 mask &= ~PCPU_GET(cpumask);
  922                 if (mask == 0)
  923                         return;
  924                 ncpu = bitcount32(mask);
  925                 if (ncpu > othercpus) {
  926                         /* XXX this should be a panic offence */
  927                         printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
  928                             ncpu, othercpus);
  929                         ncpu = othercpus;
  930                 }
  931                 /* XXX should be a panic, implied by mask == 0 above */
  932                 if (ncpu < 1)
  933                         return;
  934         }
  935         if (!(read_rflags() & PSL_I))
  936                 panic("%s: interrupts disabled", __func__);
  937         mtx_lock_spin(&smp_ipi_mtx);
  938         smp_tlb_addr1 = addr1;
  939         smp_tlb_addr2 = addr2;
  940         atomic_store_rel_int(&smp_tlb_wait, 0);
  941         if (mask == (u_int)-1)
  942                 ipi_all_but_self(vector);
  943         else
  944                 ipi_selected(mask, vector);
  945         while (smp_tlb_wait < ncpu)
  946                 ia32_pause();
  947         mtx_unlock_spin(&smp_ipi_mtx);
  948 }
  949 
  950 void
  951 smp_cache_flush(void)
  952 {
  953 
  954         if (smp_started)
  955                 smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
  956 }
  957 
  958 void
  959 smp_invltlb(void)
  960 {
  961 
  962         if (smp_started) {
  963                 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
  964         }
  965 }
  966 
  967 void
  968 smp_invlpg(vm_offset_t addr)
  969 {
  970 
  971         if (smp_started)
  972                 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
  973 }
  974 
  975 void
  976 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
  977 {
  978 
  979         if (smp_started) {
  980                 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
  981         }
  982 }
  983 
  984 void
  985 smp_masked_invltlb(u_int mask)
  986 {
  987 
  988         if (smp_started) {
  989                 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
  990         }
  991 }
  992 
  993 void
  994 smp_masked_invlpg(u_int mask, vm_offset_t addr)
  995 {
  996 
  997         if (smp_started) {
  998                 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
  999         }
 1000 }
 1001 
 1002 void
 1003 smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
 1004 {
 1005 
 1006         if (smp_started) {
 1007                 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
 1008         }
 1009 }
 1010 
 1011 void
 1012 ipi_bitmap_handler(struct trapframe frame)
 1013 {
 1014         int cpu = PCPU_GET(cpuid);
 1015         u_int ipi_bitmap;
 1016 
 1017         ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
 1018 
 1019         if (ipi_bitmap & (1 << IPI_PREEMPT)) {
 1020                 struct thread *running_thread = curthread;
 1021                 thread_lock(running_thread);
 1022                 if (running_thread->td_critnest > 1) 
 1023                         running_thread->td_owepreempt = 1;
 1024                 else            
 1025                         mi_switch(SW_INVOL | SW_PREEMPT, NULL);
 1026                 thread_unlock(running_thread);
 1027         }
 1028 
 1029         /* Nothing to do for AST */
 1030 }
 1031 
 1032 /*
 1033  * send an IPI to a set of cpus.
 1034  */
 1035 void
 1036 ipi_selected(u_int32_t cpus, u_int ipi)
 1037 {
 1038         int cpu;
 1039         u_int bitmap = 0;
 1040         u_int old_pending;
 1041         u_int new_pending;
 1042 
 1043         if (IPI_IS_BITMAPED(ipi)) { 
 1044                 bitmap = 1 << ipi;
 1045                 ipi = IPI_BITMAP_VECTOR;
 1046         }
 1047 
 1048 #ifdef STOP_NMI
 1049         if (ipi == IPI_STOP && stop_cpus_with_nmi) {
 1050                 ipi_nmi_selected(cpus);
 1051                 return;
 1052         }
 1053 #endif
 1054         CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
 1055         while ((cpu = ffs(cpus)) != 0) {
 1056                 cpu--;
 1057                 cpus &= ~(1 << cpu);
 1058 
 1059                 KASSERT(cpu_apic_ids[cpu] != -1,
 1060                     ("IPI to non-existent CPU %d", cpu));
 1061 
 1062                 if (bitmap) {
 1063                         do {
 1064                                 old_pending = cpu_ipi_pending[cpu];
 1065                                 new_pending = old_pending | bitmap;
 1066                         } while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending));  
 1067 
 1068                         if (old_pending)
 1069                                 continue;
 1070                 }
 1071 
 1072                 lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
 1073         }
 1074 
 1075 }
 1076 
 1077 /*
 1078  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
 1079  */
 1080 void
 1081 ipi_all(u_int ipi)
 1082 {
 1083 
 1084         if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
 1085                 ipi_selected(all_cpus, ipi);
 1086                 return;
 1087         }
 1088         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1089         lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
 1090 }
 1091 
 1092 /*
 1093  * send an IPI to all CPUs EXCEPT myself
 1094  */
 1095 void
 1096 ipi_all_but_self(u_int ipi)
 1097 {
 1098 
 1099         if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
 1100                 ipi_selected(PCPU_GET(other_cpus), ipi);
 1101                 return;
 1102         }
 1103         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1104         lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 1105 }
 1106 
 1107 /*
 1108  * send an IPI to myself
 1109  */
 1110 void
 1111 ipi_self(u_int ipi)
 1112 {
 1113 
 1114         if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
 1115                 ipi_selected(PCPU_GET(cpumask), ipi);
 1116                 return;
 1117         }
 1118         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1119         lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
 1120 }
 1121 
 1122 #ifdef STOP_NMI
 1123 /*
 1124  * send NMI IPI to selected CPUs
 1125  */
 1126 
 1127 #define BEFORE_SPIN     1000000
 1128 
 1129 void
 1130 ipi_nmi_selected(u_int32_t cpus)
 1131 {
 1132         int cpu;
 1133         register_t icrlo;
 1134 
 1135         icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 
 1136                 | APIC_TRIGMOD_EDGE; 
 1137         
 1138         CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus);
 1139 
 1140         atomic_set_int(&ipi_nmi_pending, cpus);
 1141 
 1142         while ((cpu = ffs(cpus)) != 0) {
 1143                 cpu--;
 1144                 cpus &= ~(1 << cpu);
 1145 
 1146                 KASSERT(cpu_apic_ids[cpu] != -1,
 1147                     ("IPI NMI to non-existent CPU %d", cpu));
 1148                 
 1149                 /* Wait for an earlier IPI to finish. */
 1150                 if (!lapic_ipi_wait(BEFORE_SPIN))
 1151                         panic("ipi_nmi_selected: previous IPI has not cleared");
 1152 
 1153                 lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]);
 1154         }
 1155 }
 1156 
 1157 int
 1158 ipi_nmi_handler(void)
 1159 {
 1160         int cpumask = PCPU_GET(cpumask);
 1161 
 1162         if (!(ipi_nmi_pending & cpumask))
 1163                 return 1;
 1164 
 1165         atomic_clear_int(&ipi_nmi_pending, cpumask);
 1166         cpustop_handler();
 1167         return 0;
 1168 }
 1169      
 1170 #endif /* STOP_NMI */
 1171 
 1172 /*
 1173  * Handle an IPI_STOP by saving our current context and spinning until we
 1174  * are resumed.
 1175  */
 1176 void
 1177 cpustop_handler(void)
 1178 {
 1179         int cpu = PCPU_GET(cpuid);
 1180         int cpumask = PCPU_GET(cpumask);
 1181 
 1182         savectx(&stoppcbs[cpu]);
 1183 
 1184         /* Indicate that we are stopped */
 1185         atomic_set_int(&stopped_cpus, cpumask);
 1186 
 1187         /* Wait for restart */
 1188         while (!(started_cpus & cpumask))
 1189             ia32_pause();
 1190 
 1191         atomic_clear_int(&started_cpus, cpumask);
 1192         atomic_clear_int(&stopped_cpus, cpumask);
 1193 
 1194         if (cpu == 0 && cpustop_restartfunc != NULL) {
 1195                 cpustop_restartfunc();
 1196                 cpustop_restartfunc = NULL;
 1197         }
 1198 }
 1199 
 1200 /*
 1201  * This is called once the rest of the system is up and running and we're
 1202  * ready to let the AP's out of the pen.
 1203  */
 1204 static void
 1205 release_aps(void *dummy __unused)
 1206 {
 1207 
 1208         if (mp_ncpus == 1) 
 1209                 return;
 1210         atomic_store_rel_int(&aps_ready, 1);
 1211         while (smp_started == 0)
 1212                 ia32_pause();
 1213 }
 1214 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 1215 
 1216 static int
 1217 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
 1218 {
 1219         u_int mask;
 1220         int error;
 1221 
 1222         mask = hlt_cpus_mask;
 1223         error = sysctl_handle_int(oidp, &mask, 0, req);
 1224         if (error || !req->newptr)
 1225                 return (error);
 1226 
 1227         if (logical_cpus_mask != 0 &&
 1228             (mask & logical_cpus_mask) == logical_cpus_mask)
 1229                 hlt_logical_cpus = 1;
 1230         else
 1231                 hlt_logical_cpus = 0;
 1232 
 1233         if (! hyperthreading_allowed)
 1234                 mask |= hyperthreading_cpus_mask;
 1235 
 1236         if ((mask & all_cpus) == all_cpus)
 1237                 mask &= ~(1<<0);
 1238         hlt_cpus_mask = mask;
 1239         return (error);
 1240 }
 1241 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
 1242     0, 0, sysctl_hlt_cpus, "IU",
 1243     "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
 1244 
 1245 static int
 1246 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 1247 {
 1248         int disable, error;
 1249 
 1250         disable = hlt_logical_cpus;
 1251         error = sysctl_handle_int(oidp, &disable, 0, req);
 1252         if (error || !req->newptr)
 1253                 return (error);
 1254 
 1255         if (disable)
 1256                 hlt_cpus_mask |= logical_cpus_mask;
 1257         else
 1258                 hlt_cpus_mask &= ~logical_cpus_mask;
 1259 
 1260         if (! hyperthreading_allowed)
 1261                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1262 
 1263         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1264                 hlt_cpus_mask &= ~(1<<0);
 1265 
 1266         hlt_logical_cpus = disable;
 1267         return (error);
 1268 }
 1269 
 1270 static int
 1271 sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
 1272 {
 1273         int allowed, error;
 1274 
 1275         allowed = hyperthreading_allowed;
 1276         error = sysctl_handle_int(oidp, &allowed, 0, req);
 1277         if (error || !req->newptr)
 1278                 return (error);
 1279 
 1280 #ifdef SCHED_ULE
 1281         /*
 1282          * SCHED_ULE doesn't allow enabling/disabling HT cores at
 1283          * run-time.
 1284          */
 1285         if (allowed != hyperthreading_allowed)
 1286                 return (ENOTSUP);
 1287         return (error);
 1288 #endif
 1289 
 1290         if (allowed)
 1291                 hlt_cpus_mask &= ~hyperthreading_cpus_mask;
 1292         else
 1293                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1294 
 1295         if (logical_cpus_mask != 0 &&
 1296             (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
 1297                 hlt_logical_cpus = 1;
 1298         else
 1299                 hlt_logical_cpus = 0;
 1300 
 1301         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1302                 hlt_cpus_mask &= ~(1<<0);
 1303 
 1304         hyperthreading_allowed = allowed;
 1305         return (error);
 1306 }
 1307 
 1308 static void
 1309 cpu_hlt_setup(void *dummy __unused)
 1310 {
 1311 
 1312         if (logical_cpus_mask != 0) {
 1313                 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 1314                     &hlt_logical_cpus);
 1315                 sysctl_ctx_init(&logical_cpu_clist);
 1316                 SYSCTL_ADD_PROC(&logical_cpu_clist,
 1317                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1318                     "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 1319                     sysctl_hlt_logical_cpus, "IU", "");
 1320                 SYSCTL_ADD_UINT(&logical_cpu_clist,
 1321                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1322                     "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
 1323                     &logical_cpus_mask, 0, "");
 1324 
 1325                 if (hlt_logical_cpus)
 1326                         hlt_cpus_mask |= logical_cpus_mask;
 1327 
 1328                 /*
 1329                  * If necessary for security purposes, force
 1330                  * hyperthreading off, regardless of the value
 1331                  * of hlt_logical_cpus.
 1332                  */
 1333                 if (hyperthreading_cpus_mask) {
 1334                         SYSCTL_ADD_PROC(&logical_cpu_clist,
 1335                             SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1336                             "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
 1337                             0, 0, sysctl_hyperthreading_allowed, "IU", "");
 1338                         if (! hyperthreading_allowed)
 1339                                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1340                 }
 1341         }
 1342 }
 1343 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 1344 
 1345 int
 1346 mp_grab_cpu_hlt(void)
 1347 {
 1348         u_int mask = PCPU_GET(cpumask);
 1349 #ifdef MP_WATCHDOG
 1350         u_int cpuid = PCPU_GET(cpuid);
 1351 #endif
 1352         int retval;
 1353 
 1354 #ifdef MP_WATCHDOG
 1355         ap_watchdog(cpuid);
 1356 #endif
 1357 
 1358         retval = mask & hlt_cpus_mask;
 1359         while (mask & hlt_cpus_mask)
 1360                 __asm __volatile("sti; hlt" : : : "memory");
 1361         return (retval);
 1362 }

Cache object: 9af067f715f90271344563b80fa450d0


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.