The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/mp_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1996, by Steve Passe
    3  * Copyright (c) 2003, by Peter Wemm
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. The name of the developer may NOT be used to endorse or promote products
   12  *    derived from this software without specific prior written permission.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD$");
   29 
   30 #include "opt_cpu.h"
   31 #include "opt_kstack_pages.h"
   32 #include "opt_mp_watchdog.h"
   33 #include "opt_sched.h"
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/bus.h>
   38 #ifdef GPROF 
   39 #include <sys/gmon.h>
   40 #endif
   41 #include <sys/kernel.h>
   42 #include <sys/ktr.h>
   43 #include <sys/lock.h>
   44 #include <sys/malloc.h>
   45 #include <sys/memrange.h>
   46 #include <sys/mutex.h>
   47 #include <sys/pcpu.h>
   48 #include <sys/proc.h>
   49 #include <sys/sched.h>
   50 #include <sys/smp.h>
   51 #include <sys/sysctl.h>
   52 
   53 #include <vm/vm.h>
   54 #include <vm/vm_param.h>
   55 #include <vm/pmap.h>
   56 #include <vm/vm_kern.h>
   57 #include <vm/vm_extern.h>
   58 
   59 #include <machine/apicreg.h>
   60 #include <machine/md_var.h>
   61 #include <machine/mp_watchdog.h>
   62 #include <machine/pcb.h>
   63 #include <machine/psl.h>
   64 #include <machine/smp.h>
   65 #include <machine/specialreg.h>
   66 #include <machine/tss.h>
   67 
   68 #define WARMBOOT_TARGET         0
   69 #define WARMBOOT_OFF            (KERNBASE + 0x0467)
   70 #define WARMBOOT_SEG            (KERNBASE + 0x0469)
   71 
   72 #define CMOS_REG                (0x70)
   73 #define CMOS_DATA               (0x71)
   74 #define BIOS_RESET              (0x0f)
   75 #define BIOS_WARM               (0x0a)
   76 
   77 /* lock region used by kernel profiling */
   78 int     mcount_lock;
   79 
   80 int     mp_naps;                /* # of Applications processors */
   81 int     boot_cpu_id = -1;       /* designated BSP */
   82 extern  int nkpt;
   83 
   84 extern  struct pcpu __pcpu[];
   85 
   86 /*
   87  * CPU topology map datastructures for HTT.
   88  */
   89 static struct cpu_group mp_groups[MAXCPU];
   90 static struct cpu_top mp_top;
   91 
   92 /* AP uses this during bootstrap.  Do not staticize.  */
   93 char *bootSTK;
   94 static int bootAP;
   95 
   96 /* Free these after use */
   97 void *bootstacks[MAXCPU];
   98 
   99 /* Temporary holder for double fault stack */
  100 char *doublefault_stack;
  101 
  102 /* Hotwire a 0->4MB V==P mapping */
  103 extern pt_entry_t *KPTphys;
  104 
  105 /* SMP page table page */
  106 extern pt_entry_t *SMPpt;
  107 
  108 struct pcb stoppcbs[MAXCPU];
  109 
  110 /* Variables needed for SMP tlb shootdown. */
  111 vm_offset_t smp_tlb_addr1;
  112 vm_offset_t smp_tlb_addr2;
  113 volatile int smp_tlb_wait;
  114 
  115 extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
  116 
  117 #ifdef STOP_NMI
  118 volatile cpumask_t ipi_nmi_pending;
  119 
  120 static void     ipi_nmi_selected(u_int32_t cpus);
  121 #endif 
  122 
  123 /*
  124  * Local data and functions.
  125  */
  126 
  127 #ifdef STOP_NMI
  128 /* 
  129  * Provide an alternate method of stopping other CPUs. If another CPU has
  130  * disabled interrupts the conventional STOP IPI will be blocked. This 
  131  * NMI-based stop should get through in that case.
  132  */
  133 static int stop_cpus_with_nmi = 1;
  134 SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW,
  135     &stop_cpus_with_nmi, 0, "");
  136 TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi);
  137 #else
  138 #define stop_cpus_with_nmi      0
  139 #endif
  140 
  141 static u_int logical_cpus;
  142 
  143 /* used to hold the AP's until we are ready to release them */
  144 static struct mtx ap_boot_mtx;
  145 
  146 /* Set to 1 once we're ready to let the APs out of the pen. */
  147 static volatile int aps_ready = 0;
  148 
  149 /*
  150  * Store data from cpu_add() until later in the boot when we actually setup
  151  * the APs.
  152  */
  153 struct cpu_info {
  154         int     cpu_present:1;
  155         int     cpu_bsp:1;
  156         int     cpu_disabled:1;
  157 } static cpu_info[MAX_APIC_ID + 1];
  158 int cpu_apic_ids[MAXCPU];
  159 
  160 /* Holds pending bitmap based IPIs per CPU */
  161 static volatile u_int cpu_ipi_pending[MAXCPU];
  162 
  163 static u_int boot_address;
  164 
  165 static void     assign_cpu_ids(void);
  166 static void     set_interrupt_apic_ids(void);
  167 static int      start_all_aps(void);
  168 static int      start_ap(int apic_id);
  169 static void     release_aps(void *dummy);
  170 
  171 static int      hlt_logical_cpus;
  172 static u_int    hyperthreading_cpus;
  173 static cpumask_t        hyperthreading_cpus_mask;
  174 static int      hyperthreading_allowed = 1;
  175 static struct   sysctl_ctx_list logical_cpu_clist;
  176 static u_int    bootMP_size;
  177 
  178 static void
  179 mem_range_AP_init(void)
  180 {
  181         if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
  182                 mem_range_softc.mr_op->initAP(&mem_range_softc);
  183 }
  184 
  185 void
  186 mp_topology(void)
  187 {
  188         struct cpu_group *group;
  189         int apic_id;
  190         int groups;
  191         int cpu;
  192 
  193         /* Build the smp_topology map. */
  194         /* Nothing to do if there is no HTT support. */
  195         if (hyperthreading_cpus <= 1)
  196                 return;
  197         group = &mp_groups[0];
  198         groups = 1;
  199         for (cpu = 0, apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
  200                 if (!cpu_info[apic_id].cpu_present)
  201                         continue;
  202                 /*
  203                  * If the current group has members and we're not a logical
  204                  * cpu, create a new group.
  205                  */
  206                 if (group->cg_count != 0 &&
  207                     (apic_id % hyperthreading_cpus) == 0) {
  208                         group++;
  209                         groups++;
  210                 }
  211                 group->cg_count++;
  212                 group->cg_mask |= 1 << cpu;
  213                 cpu++;
  214         }
  215 
  216         mp_top.ct_count = groups;
  217         mp_top.ct_group = mp_groups;
  218         smp_topology = &mp_top;
  219 }
  220 
  221 /*
  222  * Calculate usable address in base memory for AP trampoline code.
  223  */
  224 u_int
  225 mp_bootaddress(u_int basemem)
  226 {
  227 
  228         bootMP_size = mptramp_end - mptramp_start;
  229         boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */
  230         if (((basemem * 1024) - boot_address) < bootMP_size)
  231                 boot_address -= PAGE_SIZE;      /* not enough, lower by 4k */
  232         /* 3 levels of page table pages */
  233         mptramp_pagetables = boot_address - (PAGE_SIZE * 3);
  234 
  235         return mptramp_pagetables;
  236 }
  237 
  238 void
  239 cpu_add(u_int apic_id, char boot_cpu)
  240 {
  241 
  242         if (apic_id > MAX_APIC_ID) {
  243                 panic("SMP: APIC ID %d too high", apic_id);
  244                 return;
  245         }
  246         KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
  247             apic_id));
  248         cpu_info[apic_id].cpu_present = 1;
  249         if (boot_cpu) {
  250                 KASSERT(boot_cpu_id == -1,
  251                     ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
  252                     boot_cpu_id));
  253                 boot_cpu_id = apic_id;
  254                 cpu_info[apic_id].cpu_bsp = 1;
  255         }
  256         if (mp_ncpus < MAXCPU) {
  257                 mp_ncpus++;
  258                 mp_maxid = mp_ncpus -1;
  259         }
  260         if (bootverbose)
  261                 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
  262                     "AP");
  263 }
  264 
  265 void
  266 cpu_mp_setmaxid(void)
  267 {
  268 
  269         /*
  270          * mp_maxid should be already set by calls to cpu_add().
  271          * Just sanity check its value here.
  272          */
  273         if (mp_ncpus == 0)
  274                 KASSERT(mp_maxid == 0,
  275                     ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
  276         else if (mp_ncpus == 1)
  277                 mp_maxid = 0;
  278         else
  279                 KASSERT(mp_maxid >= mp_ncpus - 1,
  280                     ("%s: counters out of sync: max %d, count %d", __func__,
  281                         mp_maxid, mp_ncpus));           
  282 }
  283 
  284 int
  285 cpu_mp_probe(void)
  286 {
  287 
  288         /*
  289          * Always record BSP in CPU map so that the mbuf init code works
  290          * correctly.
  291          */
  292         all_cpus = 1;
  293         if (mp_ncpus == 0) {
  294                 /*
  295                  * No CPUs were found, so this must be a UP system.  Setup
  296                  * the variables to represent a system with a single CPU
  297                  * with an id of 0.
  298                  */
  299                 mp_ncpus = 1;
  300                 return (0);
  301         }
  302 
  303         /* At least one CPU was found. */
  304         if (mp_ncpus == 1) {
  305                 /*
  306                  * One CPU was found, so this must be a UP system with
  307                  * an I/O APIC.
  308                  */
  309                 mp_maxid = 0;
  310                 return (0);
  311         }
  312 
  313         /* At least two CPUs were found. */
  314         return (1);
  315 }
  316 
  317 /*
  318  * Initialize the IPI handlers and start up the AP's.
  319  */
  320 void
  321 cpu_mp_start(void)
  322 {
  323         int i;
  324         u_int threads_per_cache, p[4];
  325 
  326         /* Initialize the logical ID to APIC ID table. */
  327         for (i = 0; i < MAXCPU; i++) {
  328                 cpu_apic_ids[i] = -1;
  329                 cpu_ipi_pending[i] = 0;
  330         }
  331 
  332         /* Install an inter-CPU IPI for TLB invalidation */
  333         setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
  334         setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
  335         setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
  336 
  337         /* Install an inter-CPU IPI for cache invalidation. */
  338         setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYSIGT, SEL_KPL, 0);
  339 
  340         /* Install an inter-CPU IPI for all-CPU rendezvous */
  341         setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
  342 
  343         /* Install generic inter-CPU IPI handler */
  344         setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
  345                SDT_SYSIGT, SEL_KPL, 0);
  346 
  347         /* Install an inter-CPU IPI for CPU stop/restart */
  348         setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0);
  349 
  350         /* Set boot_cpu_id if needed. */
  351         if (boot_cpu_id == -1) {
  352                 boot_cpu_id = PCPU_GET(apic_id);
  353                 cpu_info[boot_cpu_id].cpu_bsp = 1;
  354         } else
  355                 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
  356                     ("BSP's APIC ID doesn't match boot_cpu_id"));
  357         cpu_apic_ids[0] = boot_cpu_id;
  358 
  359         assign_cpu_ids();
  360 
  361         /* Start each Application Processor */
  362         start_all_aps();
  363 
  364         /* Setup the initial logical CPUs info. */
  365         logical_cpus = logical_cpus_mask = 0;
  366         if (cpu_feature & CPUID_HTT)
  367                 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
  368 
  369         /*
  370          * Work out if hyperthreading is *really* enabled.  This
  371          * is made really ugly by the fact that processors lie: Dual
  372          * core processors claim to be hyperthreaded even when they're
  373          * not, presumably because they want to be treated the same
  374          * way as HTT with respect to per-cpu software licensing.
  375          * At the time of writing (May 12, 2005) the only hyperthreaded
  376          * cpus are from Intel, and Intel's dual-core processors can be
  377          * identified via the "deterministic cache parameters" cpuid
  378          * calls.
  379          */
  380         /*
  381          * First determine if this is an Intel processor which claims
  382          * to have hyperthreading support.
  383          */
  384         if ((cpu_feature & CPUID_HTT) &&
  385             (strcmp(cpu_vendor, "GenuineIntel") == 0)) {
  386                 /*
  387                  * If the "deterministic cache parameters" cpuid calls
  388                  * are available, use them.
  389                  */
  390                 if (cpu_high >= 4) {
  391                         /* Ask the processor about the L1 cache. */
  392                         for (i = 0; i < 1; i++) {
  393                                 cpuid_count(4, i, p);
  394                                 threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1;
  395                                 if (hyperthreading_cpus < threads_per_cache)
  396                                         hyperthreading_cpus = threads_per_cache;
  397                                 if ((p[0] & 0x1f) == 0)
  398                                         break;
  399                         }
  400                 }
  401 
  402                 /*
  403                  * If the deterministic cache parameters are not
  404                  * available, or if no caches were reported to exist,
  405                  * just accept what the HTT flag indicated.
  406                  */
  407                 if (hyperthreading_cpus == 0)
  408                         hyperthreading_cpus = logical_cpus;
  409         }
  410 
  411         set_interrupt_apic_ids();
  412 
  413         /* Last, setup the cpu topology now that we have probed CPUs */
  414         mp_topology();
  415 }
  416 
  417 
  418 /*
  419  * Print various information about the SMP system hardware and setup.
  420  */
  421 void
  422 cpu_mp_announce(void)
  423 {
  424         int i, x;
  425 
  426         /* List CPUs */
  427         printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
  428         for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
  429                 if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
  430                         continue;
  431                 if (cpu_info[x].cpu_disabled)
  432                         printf("  cpu (AP): APIC ID: %2d (disabled)\n", x);
  433                 else {
  434                         KASSERT(i < mp_ncpus,
  435                             ("mp_ncpus and actual cpus are out of whack"));
  436                         printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
  437                 }
  438         }
  439 }
  440 
  441 /*
  442  * AP CPU's call this to initialize themselves.
  443  */
  444 void
  445 init_secondary(void)
  446 {
  447         struct pcpu *pc;
  448         u_int64_t msr, cr0;
  449         int cpu, gsel_tss, x;
  450         struct region_descriptor ap_gdt;
  451 
  452         /* Set by the startup code for us to use */
  453         cpu = bootAP;
  454 
  455         /* Init tss */
  456         common_tss[cpu] = common_tss[0];
  457         common_tss[cpu].tss_rsp0 = 0;   /* not used until after switch */
  458         common_tss[cpu].tss_iobase = sizeof(struct amd64tss);
  459         common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE];
  460 
  461         /* Prepare private GDT */
  462         gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
  463         ssdtosyssd(&gdt_segs[GPROC0_SEL],
  464            (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]);
  465         for (x = 0; x < NGDT; x++) {
  466                 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1))
  467                         ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]);
  468         }
  469         ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
  470         ap_gdt.rd_base =  (long) &gdt[NGDT * cpu];
  471         lgdt(&ap_gdt);                  /* does magic intra-segment return */
  472 
  473         /* Get per-cpu data */
  474         pc = &__pcpu[cpu];
  475 
  476         /* prime data page for it to use */
  477         pcpu_init(pc, cpu, sizeof(struct pcpu));
  478         pc->pc_apic_id = cpu_apic_ids[cpu];
  479         pc->pc_prvspace = pc;
  480         pc->pc_curthread = 0;
  481         pc->pc_tssp = &common_tss[cpu];
  482         pc->pc_rsp0 = 0;
  483         pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
  484 
  485         wrmsr(MSR_FSBASE, 0);           /* User value */
  486         wrmsr(MSR_GSBASE, (u_int64_t)pc);
  487         wrmsr(MSR_KGSBASE, (u_int64_t)pc);      /* XXX User value while we're in the kernel */
  488 
  489         lidt(&r_idt);
  490 
  491         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
  492         ltr(gsel_tss);
  493 
  494         /*
  495          * Set to a known state:
  496          * Set by mpboot.s: CR0_PG, CR0_PE
  497          * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
  498          */
  499         cr0 = rcr0();
  500         cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
  501         load_cr0(cr0);
  502 
  503         /* Set up the fast syscall stuff */
  504         msr = rdmsr(MSR_EFER) | EFER_SCE;
  505         wrmsr(MSR_EFER, msr);
  506         wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
  507         wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
  508         msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
  509               ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
  510         wrmsr(MSR_STAR, msr);
  511         wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
  512 
  513         /* Disable local APIC just to be sure. */
  514         lapic_disable();
  515 
  516         /* signal our startup to the BSP. */
  517         mp_naps++;
  518 
  519         /* Spin until the BSP releases the AP's. */
  520         while (!aps_ready)
  521                 ia32_pause();
  522 
  523         /* Initialize the PAT MSR. */
  524         pmap_init_pat();
  525 
  526         /* set up CPU registers and state */
  527         cpu_setregs();
  528 
  529         /* set up SSE/NX registers */
  530         initializecpu();
  531 
  532         /* set up FPU state on the AP */
  533         fpuinit();
  534 
  535         /* A quick check from sanity claus */
  536         if (PCPU_GET(apic_id) != lapic_id()) {
  537                 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
  538                 printf("SMP: actual apic_id = %d\n", lapic_id());
  539                 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
  540                 panic("cpuid mismatch! boom!!");
  541         }
  542 
  543         /* Initialize curthread. */
  544         KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
  545         PCPU_SET(curthread, PCPU_GET(idlethread));
  546 
  547         mtx_lock_spin(&ap_boot_mtx);
  548 
  549         /* Init local apic for irq's */
  550         lapic_setup(1);
  551 
  552         /* Set memory range attributes for this CPU to match the BSP */
  553         mem_range_AP_init();
  554 
  555         smp_cpus++;
  556 
  557         CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
  558         printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
  559 
  560         /* Determine if we are a logical CPU. */
  561         if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
  562                 logical_cpus_mask |= PCPU_GET(cpumask);
  563         
  564         /* Determine if we are a hyperthread. */
  565         if (hyperthreading_cpus > 1 &&
  566             PCPU_GET(apic_id) % hyperthreading_cpus != 0)
  567                 hyperthreading_cpus_mask |= PCPU_GET(cpumask);
  568 
  569         /* Build our map of 'other' CPUs. */
  570         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  571 
  572         if (bootverbose)
  573                 lapic_dump("AP");
  574 
  575         if (smp_cpus == mp_ncpus) {
  576                 /* enable IPI's, tlb shootdown, freezes etc */
  577                 atomic_store_rel_int(&smp_started, 1);
  578                 smp_active = 1;  /* historic */
  579         }
  580 
  581         /*
  582          * Enable global pages TLB extension
  583          * This also implicitly flushes the TLB 
  584          */
  585 
  586         load_cr4(rcr4() | CR4_PGE);
  587 
  588         mtx_unlock_spin(&ap_boot_mtx);
  589 
  590         /* wait until all the AP's are up */
  591         while (smp_started == 0)
  592                 ia32_pause();
  593 
  594         sched_throw(NULL);
  595 
  596         panic("scheduler returned us to %s", __func__);
  597         /* NOTREACHED */
  598 }
  599 
  600 /*******************************************************************
  601  * local functions and data
  602  */
  603 
  604 /*
  605  * We tell the I/O APIC code about all the CPUs we want to receive
  606  * interrupts.  If we don't want certain CPUs to receive IRQs we
  607  * can simply not tell the I/O APIC code about them in this function.
  608  * We also do not tell it about the BSP since it tells itself about
  609  * the BSP internally to work with UP kernels and on UP machines.
  610  */
  611 static void
  612 set_interrupt_apic_ids(void)
  613 {
  614         u_int i, apic_id;
  615 
  616         for (i = 0; i < MAXCPU; i++) {
  617                 apic_id = cpu_apic_ids[i];
  618                 if (apic_id == -1)
  619                         continue;
  620                 if (cpu_info[apic_id].cpu_bsp)
  621                         continue;
  622                 if (cpu_info[apic_id].cpu_disabled)
  623                         continue;
  624 
  625                 /* Don't let hyperthreads service interrupts. */
  626                 if (hyperthreading_cpus > 1 &&
  627                     apic_id % hyperthreading_cpus != 0)
  628                         continue;
  629 
  630                 intr_add_cpu(i);
  631         }
  632 }
  633 
  634 /*
  635  * Assign logical CPU IDs to local APICs.
  636  */
  637 static void
  638 assign_cpu_ids(void)
  639 {
  640         u_int i;
  641 
  642         /* Check for explicitly disabled CPUs. */
  643         for (i = 0; i <= MAX_APIC_ID; i++) {
  644                 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
  645                         continue;
  646 
  647                 /* Don't use this CPU if it has been disabled by a tunable. */
  648                 if (resource_disabled("lapic", i)) {
  649                         cpu_info[i].cpu_disabled = 1;
  650                         continue;
  651                 }
  652         }
  653 
  654         /*
  655          * Assign CPU IDs to local APIC IDs and disable any CPUs
  656          * beyond MAXCPU.  CPU 0 has already been assigned to the BSP,
  657          * so we only have to assign IDs for APs.
  658          */
  659         mp_ncpus = 1;
  660         for (i = 0; i <= MAX_APIC_ID; i++) {
  661                 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
  662                     cpu_info[i].cpu_disabled)
  663                         continue;
  664 
  665                 if (mp_ncpus < MAXCPU) {
  666                         cpu_apic_ids[mp_ncpus] = i;
  667                         mp_ncpus++;
  668                 } else
  669                         cpu_info[i].cpu_disabled = 1;
  670         }
  671         KASSERT(mp_maxid >= mp_ncpus - 1,
  672             ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
  673             mp_ncpus));         
  674 }
  675 
  676 /*
  677  * start each AP in our list
  678  */
  679 static int
  680 start_all_aps(void)
  681 {
  682         vm_offset_t va = boot_address + KERNBASE;
  683         u_int64_t *pt4, *pt3, *pt2;
  684         u_int32_t mpbioswarmvec;
  685         int apic_id, cpu, i;
  686         u_char mpbiosreason;
  687 
  688         mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
  689 
  690         /* install the AP 1st level boot code */
  691         pmap_kenter(va, boot_address);
  692         pmap_invalidate_page(kernel_pmap, va);
  693         bcopy(mptramp_start, (void *)va, bootMP_size);
  694 
  695         /* Locate the page tables, they'll be below the trampoline */
  696         pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE);
  697         pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
  698         pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
  699 
  700         /* Create the initial 1GB replicated page tables */
  701         for (i = 0; i < 512; i++) {
  702                 /* Each slot of the level 4 pages points to the same level 3 page */
  703                 pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
  704                 pt4[i] |= PG_V | PG_RW | PG_U;
  705 
  706                 /* Each slot of the level 3 pages points to the same level 2 page */
  707                 pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
  708                 pt3[i] |= PG_V | PG_RW | PG_U;
  709 
  710                 /* The level 2 page slots are mapped with 2MB pages for 1GB. */
  711                 pt2[i] = i * (2 * 1024 * 1024);
  712                 pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
  713         }
  714 
  715         /* save the current value of the warm-start vector */
  716         mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
  717         outb(CMOS_REG, BIOS_RESET);
  718         mpbiosreason = inb(CMOS_DATA);
  719 
  720         /* setup a vector to our boot code */
  721         *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
  722         *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
  723         outb(CMOS_REG, BIOS_RESET);
  724         outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
  725 
  726         /* start each AP */
  727         for (cpu = 1; cpu < mp_ncpus; cpu++) {
  728                 apic_id = cpu_apic_ids[cpu];
  729 
  730                 /* allocate and set up an idle stack data page */
  731                 bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
  732                 doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE);
  733 
  734                 bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8;
  735                 bootAP = cpu;
  736 
  737                 /* attempt to start the Application Processor */
  738                 if (!start_ap(apic_id)) {
  739                         /* restore the warmstart vector */
  740                         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  741                         panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
  742                 }
  743 
  744                 all_cpus |= (1 << cpu);         /* record AP in CPU map */
  745         }
  746 
  747         /* build our map of 'other' CPUs */
  748         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  749 
  750         /* restore the warmstart vector */
  751         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  752 
  753         outb(CMOS_REG, BIOS_RESET);
  754         outb(CMOS_DATA, mpbiosreason);
  755 
  756         /* number of APs actually started */
  757         return mp_naps;
  758 }
  759 
  760 
  761 /*
  762  * This function starts the AP (application processor) identified
  763  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  764  * to accomplish this.  This is necessary because of the nuances
  765  * of the different hardware we might encounter.  It isn't pretty,
  766  * but it seems to work.
  767  */
  768 static int
  769 start_ap(int apic_id)
  770 {
  771         int vector, ms;
  772         int cpus;
  773 
  774         /* calculate the vector */
  775         vector = (boot_address >> 12) & 0xff;
  776 
  777         /* used as a watchpoint to signal AP startup */
  778         cpus = mp_naps;
  779 
  780         /*
  781          * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
  782          * and running the target CPU. OR this INIT IPI might be latched (P5
  783          * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
  784          * ignored.
  785          */
  786 
  787         /* do an INIT IPI: assert RESET */
  788         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  789             APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
  790 
  791         /* wait for pending status end */
  792         lapic_ipi_wait(-1);
  793 
  794         /* do an INIT IPI: deassert RESET */
  795         lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
  796             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
  797 
  798         /* wait for pending status end */
  799         DELAY(10000);           /* wait ~10mS */
  800         lapic_ipi_wait(-1);
  801 
  802         /*
  803          * next we do a STARTUP IPI: the previous INIT IPI might still be
  804          * latched, (P5 bug) this 1st STARTUP would then terminate
  805          * immediately, and the previously started INIT IPI would continue. OR
  806          * the previous INIT IPI has already run. and this STARTUP IPI will
  807          * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
  808          * will run.
  809          */
  810 
  811         /* do a STARTUP IPI */
  812         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  813             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  814             vector, apic_id);
  815         lapic_ipi_wait(-1);
  816         DELAY(200);             /* wait ~200uS */
  817 
  818         /*
  819          * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
  820          * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
  821          * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
  822          * recognized after hardware RESET or INIT IPI.
  823          */
  824 
  825         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  826             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  827             vector, apic_id);
  828         lapic_ipi_wait(-1);
  829         DELAY(200);             /* wait ~200uS */
  830 
  831         /* Wait up to 5 seconds for it to start. */
  832         for (ms = 0; ms < 5000; ms++) {
  833                 if (mp_naps > cpus)
  834                         return 1;       /* return SUCCESS */
  835                 DELAY(1000);
  836         }
  837         return 0;               /* return FAILURE */
  838 }
  839 
  840 /*
  841  * Flush the TLB on all other CPU's
  842  */
  843 static void
  844 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
  845 {
  846         u_int ncpu;
  847 
  848         ncpu = mp_ncpus - 1;    /* does not shootdown self */
  849         if (ncpu < 1)
  850                 return;         /* no other cpus */
  851         if (!(read_rflags() & PSL_I))
  852                 panic("%s: interrupts disabled", __func__);
  853         mtx_lock_spin(&smp_ipi_mtx);
  854         smp_tlb_addr1 = addr1;
  855         smp_tlb_addr2 = addr2;
  856         atomic_store_rel_int(&smp_tlb_wait, 0);
  857         ipi_all_but_self(vector);
  858         while (smp_tlb_wait < ncpu)
  859                 ia32_pause();
  860         mtx_unlock_spin(&smp_ipi_mtx);
  861 }
  862 
  863 static void
  864 smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
  865 {
  866         int ncpu, othercpus;
  867 
  868         othercpus = mp_ncpus - 1;
  869         if (mask == (u_int)-1) {
  870                 ncpu = othercpus;
  871                 if (ncpu < 1)
  872                         return;
  873         } else {
  874                 mask &= ~PCPU_GET(cpumask);
  875                 if (mask == 0)
  876                         return;
  877                 ncpu = bitcount32(mask);
  878                 if (ncpu > othercpus) {
  879                         /* XXX this should be a panic offence */
  880                         printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
  881                             ncpu, othercpus);
  882                         ncpu = othercpus;
  883                 }
  884                 /* XXX should be a panic, implied by mask == 0 above */
  885                 if (ncpu < 1)
  886                         return;
  887         }
  888         if (!(read_rflags() & PSL_I))
  889                 panic("%s: interrupts disabled", __func__);
  890         mtx_lock_spin(&smp_ipi_mtx);
  891         smp_tlb_addr1 = addr1;
  892         smp_tlb_addr2 = addr2;
  893         atomic_store_rel_int(&smp_tlb_wait, 0);
  894         if (mask == (u_int)-1)
  895                 ipi_all_but_self(vector);
  896         else
  897                 ipi_selected(mask, vector);
  898         while (smp_tlb_wait < ncpu)
  899                 ia32_pause();
  900         mtx_unlock_spin(&smp_ipi_mtx);
  901 }
  902 
  903 void
  904 smp_cache_flush(void)
  905 {
  906 
  907         if (smp_started)
  908                 smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
  909 }
  910 
  911 void
  912 smp_invltlb(void)
  913 {
  914 
  915         if (smp_started) {
  916                 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
  917         }
  918 }
  919 
  920 void
  921 smp_invlpg(vm_offset_t addr)
  922 {
  923 
  924         if (smp_started)
  925                 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
  926 }
  927 
  928 void
  929 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
  930 {
  931 
  932         if (smp_started) {
  933                 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
  934         }
  935 }
  936 
  937 void
  938 smp_masked_invltlb(u_int mask)
  939 {
  940 
  941         if (smp_started) {
  942                 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
  943         }
  944 }
  945 
  946 void
  947 smp_masked_invlpg(u_int mask, vm_offset_t addr)
  948 {
  949 
  950         if (smp_started) {
  951                 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
  952         }
  953 }
  954 
  955 void
  956 smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
  957 {
  958 
  959         if (smp_started) {
  960                 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
  961         }
  962 }
  963 
  964 void
  965 ipi_bitmap_handler(struct trapframe frame)
  966 {
  967         int cpu = PCPU_GET(cpuid);
  968         u_int ipi_bitmap;
  969 
  970         ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
  971 
  972         if (ipi_bitmap & (1 << IPI_PREEMPT)) {
  973                 struct thread *running_thread = curthread;
  974                 thread_lock(running_thread);
  975                 if (running_thread->td_critnest > 1) 
  976                         running_thread->td_owepreempt = 1;
  977                 else            
  978                         mi_switch(SW_INVOL | SW_PREEMPT, NULL);
  979                 thread_unlock(running_thread);
  980         }
  981 
  982         /* Nothing to do for AST */
  983 }
  984 
  985 /*
  986  * send an IPI to a set of cpus.
  987  */
  988 void
  989 ipi_selected(u_int32_t cpus, u_int ipi)
  990 {
  991         int cpu;
  992         u_int bitmap = 0;
  993         u_int old_pending;
  994         u_int new_pending;
  995 
  996         if (IPI_IS_BITMAPED(ipi)) { 
  997                 bitmap = 1 << ipi;
  998                 ipi = IPI_BITMAP_VECTOR;
  999         }
 1000 
 1001 #ifdef STOP_NMI
 1002         if (ipi == IPI_STOP && stop_cpus_with_nmi) {
 1003                 ipi_nmi_selected(cpus);
 1004                 return;
 1005         }
 1006 #endif
 1007         CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
 1008         while ((cpu = ffs(cpus)) != 0) {
 1009                 cpu--;
 1010                 cpus &= ~(1 << cpu);
 1011 
 1012                 KASSERT(cpu_apic_ids[cpu] != -1,
 1013                     ("IPI to non-existent CPU %d", cpu));
 1014 
 1015                 if (bitmap) {
 1016                         do {
 1017                                 old_pending = cpu_ipi_pending[cpu];
 1018                                 new_pending = old_pending | bitmap;
 1019                         } while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending));  
 1020 
 1021                         if (old_pending)
 1022                                 continue;
 1023                 }
 1024 
 1025                 lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
 1026         }
 1027 
 1028 }
 1029 
 1030 /*
 1031  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
 1032  */
 1033 void
 1034 ipi_all(u_int ipi)
 1035 {
 1036 
 1037         if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
 1038                 ipi_selected(all_cpus, ipi);
 1039                 return;
 1040         }
 1041         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1042         lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
 1043 }
 1044 
 1045 /*
 1046  * send an IPI to all CPUs EXCEPT myself
 1047  */
 1048 void
 1049 ipi_all_but_self(u_int ipi)
 1050 {
 1051 
 1052         if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
 1053                 ipi_selected(PCPU_GET(other_cpus), ipi);
 1054                 return;
 1055         }
 1056         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1057         lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 1058 }
 1059 
 1060 /*
 1061  * send an IPI to myself
 1062  */
 1063 void
 1064 ipi_self(u_int ipi)
 1065 {
 1066 
 1067         if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
 1068                 ipi_selected(PCPU_GET(cpumask), ipi);
 1069                 return;
 1070         }
 1071         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1072         lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
 1073 }
 1074 
 1075 #ifdef STOP_NMI
 1076 /*
 1077  * send NMI IPI to selected CPUs
 1078  */
 1079 
 1080 #define BEFORE_SPIN     1000000
 1081 
 1082 void
 1083 ipi_nmi_selected(u_int32_t cpus)
 1084 {
 1085         int cpu;
 1086         register_t icrlo;
 1087 
 1088         icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 
 1089                 | APIC_TRIGMOD_EDGE; 
 1090         
 1091         CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus);
 1092 
 1093         atomic_set_int(&ipi_nmi_pending, cpus);
 1094 
 1095         while ((cpu = ffs(cpus)) != 0) {
 1096                 cpu--;
 1097                 cpus &= ~(1 << cpu);
 1098 
 1099                 KASSERT(cpu_apic_ids[cpu] != -1,
 1100                     ("IPI NMI to non-existent CPU %d", cpu));
 1101                 
 1102                 /* Wait for an earlier IPI to finish. */
 1103                 if (!lapic_ipi_wait(BEFORE_SPIN))
 1104                         panic("ipi_nmi_selected: previous IPI has not cleared");
 1105 
 1106                 lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]);
 1107         }
 1108 }
 1109 
 1110 int
 1111 ipi_nmi_handler(void)
 1112 {
 1113         int cpumask = PCPU_GET(cpumask);
 1114 
 1115         if (!(ipi_nmi_pending & cpumask))
 1116                 return 1;
 1117 
 1118         atomic_clear_int(&ipi_nmi_pending, cpumask);
 1119         cpustop_handler();
 1120         return 0;
 1121 }
 1122      
 1123 #endif /* STOP_NMI */
 1124 
 1125 /*
 1126  * Handle an IPI_STOP by saving our current context and spinning until we
 1127  * are resumed.
 1128  */
 1129 void
 1130 cpustop_handler(void)
 1131 {
 1132         int cpu = PCPU_GET(cpuid);
 1133         int cpumask = PCPU_GET(cpumask);
 1134 
 1135         savectx(&stoppcbs[cpu]);
 1136 
 1137         /* Indicate that we are stopped */
 1138         atomic_set_int(&stopped_cpus, cpumask);
 1139 
 1140         /* Wait for restart */
 1141         while (!(started_cpus & cpumask))
 1142             ia32_pause();
 1143 
 1144         atomic_clear_int(&started_cpus, cpumask);
 1145         atomic_clear_int(&stopped_cpus, cpumask);
 1146 
 1147         if (cpu == 0 && cpustop_restartfunc != NULL) {
 1148                 cpustop_restartfunc();
 1149                 cpustop_restartfunc = NULL;
 1150         }
 1151 }
 1152 
 1153 /*
 1154  * This is called once the rest of the system is up and running and we're
 1155  * ready to let the AP's out of the pen.
 1156  */
 1157 static void
 1158 release_aps(void *dummy __unused)
 1159 {
 1160 
 1161         if (mp_ncpus == 1) 
 1162                 return;
 1163         atomic_store_rel_int(&aps_ready, 1);
 1164         while (smp_started == 0)
 1165                 ia32_pause();
 1166 }
 1167 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 1168 
 1169 static int
 1170 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
 1171 {
 1172         u_int mask;
 1173         int error;
 1174 
 1175         mask = hlt_cpus_mask;
 1176         error = sysctl_handle_int(oidp, &mask, 0, req);
 1177         if (error || !req->newptr)
 1178                 return (error);
 1179 
 1180         if (logical_cpus_mask != 0 &&
 1181             (mask & logical_cpus_mask) == logical_cpus_mask)
 1182                 hlt_logical_cpus = 1;
 1183         else
 1184                 hlt_logical_cpus = 0;
 1185 
 1186         if (! hyperthreading_allowed)
 1187                 mask |= hyperthreading_cpus_mask;
 1188 
 1189         if ((mask & all_cpus) == all_cpus)
 1190                 mask &= ~(1<<0);
 1191         hlt_cpus_mask = mask;
 1192         return (error);
 1193 }
 1194 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
 1195     0, 0, sysctl_hlt_cpus, "IU",
 1196     "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
 1197 
 1198 static int
 1199 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 1200 {
 1201         int disable, error;
 1202 
 1203         disable = hlt_logical_cpus;
 1204         error = sysctl_handle_int(oidp, &disable, 0, req);
 1205         if (error || !req->newptr)
 1206                 return (error);
 1207 
 1208         if (disable)
 1209                 hlt_cpus_mask |= logical_cpus_mask;
 1210         else
 1211                 hlt_cpus_mask &= ~logical_cpus_mask;
 1212 
 1213         if (! hyperthreading_allowed)
 1214                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1215 
 1216         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1217                 hlt_cpus_mask &= ~(1<<0);
 1218 
 1219         hlt_logical_cpus = disable;
 1220         return (error);
 1221 }
 1222 
 1223 static int
 1224 sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
 1225 {
 1226         int allowed, error;
 1227 
 1228         allowed = hyperthreading_allowed;
 1229         error = sysctl_handle_int(oidp, &allowed, 0, req);
 1230         if (error || !req->newptr)
 1231                 return (error);
 1232 
 1233         if (allowed)
 1234                 hlt_cpus_mask &= ~hyperthreading_cpus_mask;
 1235         else
 1236                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1237 
 1238         if (logical_cpus_mask != 0 &&
 1239             (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
 1240                 hlt_logical_cpus = 1;
 1241         else
 1242                 hlt_logical_cpus = 0;
 1243 
 1244         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1245                 hlt_cpus_mask &= ~(1<<0);
 1246 
 1247         hyperthreading_allowed = allowed;
 1248         return (error);
 1249 }
 1250 
 1251 static void
 1252 cpu_hlt_setup(void *dummy __unused)
 1253 {
 1254 
 1255         if (logical_cpus_mask != 0) {
 1256                 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 1257                     &hlt_logical_cpus);
 1258                 sysctl_ctx_init(&logical_cpu_clist);
 1259                 SYSCTL_ADD_PROC(&logical_cpu_clist,
 1260                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1261                     "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 1262                     sysctl_hlt_logical_cpus, "IU", "");
 1263                 SYSCTL_ADD_UINT(&logical_cpu_clist,
 1264                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1265                     "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
 1266                     &logical_cpus_mask, 0, "");
 1267 
 1268                 if (hlt_logical_cpus)
 1269                         hlt_cpus_mask |= logical_cpus_mask;
 1270 
 1271                 /*
 1272                  * If necessary for security purposes, force
 1273                  * hyperthreading off, regardless of the value
 1274                  * of hlt_logical_cpus.
 1275                  */
 1276                 if (hyperthreading_cpus_mask) {
 1277                         TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
 1278                             &hyperthreading_allowed);
 1279                         SYSCTL_ADD_PROC(&logical_cpu_clist,
 1280                             SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1281                             "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
 1282                             0, 0, sysctl_hyperthreading_allowed, "IU", "");
 1283                         if (! hyperthreading_allowed)
 1284                                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1285                 }
 1286         }
 1287 }
 1288 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 1289 
 1290 int
 1291 mp_grab_cpu_hlt(void)
 1292 {
 1293         u_int mask = PCPU_GET(cpumask);
 1294 #ifdef MP_WATCHDOG
 1295         u_int cpuid = PCPU_GET(cpuid);
 1296 #endif
 1297         int retval;
 1298 
 1299 #ifdef MP_WATCHDOG
 1300         ap_watchdog(cpuid);
 1301 #endif
 1302 
 1303         retval = mask & hlt_cpus_mask;
 1304         while (mask & hlt_cpus_mask)
 1305                 __asm __volatile("sti; hlt" : : : "memory");
 1306         return (retval);
 1307 }

Cache object: d40a5446a3aaa92b0afcf5106843b995


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.