The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/mp_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1996, by Steve Passe
    3  * Copyright (c) 2003, by Peter Wemm
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. The name of the developer may NOT be used to endorse or promote products
   12  *    derived from this software without specific prior written permission.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD$");
   29 
   30 #include "opt_cpu.h"
   31 #include "opt_kstack_pages.h"
   32 #include "opt_mp_watchdog.h"
   33 #include "opt_sched.h"
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/bus.h>
   38 #ifdef GPROF 
   39 #include <sys/gmon.h>
   40 #endif
   41 #include <sys/kernel.h>
   42 #include <sys/ktr.h>
   43 #include <sys/lock.h>
   44 #include <sys/malloc.h>
   45 #include <sys/memrange.h>
   46 #include <sys/mutex.h>
   47 #include <sys/pcpu.h>
   48 #include <sys/proc.h>
   49 #include <sys/sched.h>
   50 #include <sys/smp.h>
   51 #include <sys/sysctl.h>
   52 
   53 #include <vm/vm.h>
   54 #include <vm/vm_param.h>
   55 #include <vm/pmap.h>
   56 #include <vm/vm_kern.h>
   57 #include <vm/vm_extern.h>
   58 
   59 #include <machine/apicreg.h>
   60 #include <machine/md_var.h>
   61 #include <machine/mp_watchdog.h>
   62 #include <machine/pcb.h>
   63 #include <machine/psl.h>
   64 #include <machine/smp.h>
   65 #include <machine/specialreg.h>
   66 #include <machine/tss.h>
   67 
   68 #define WARMBOOT_TARGET         0
   69 #define WARMBOOT_OFF            (KERNBASE + 0x0467)
   70 #define WARMBOOT_SEG            (KERNBASE + 0x0469)
   71 
   72 #define CMOS_REG                (0x70)
   73 #define CMOS_DATA               (0x71)
   74 #define BIOS_RESET              (0x0f)
   75 #define BIOS_WARM               (0x0a)
   76 
   77 /* lock region used by kernel profiling */
   78 int     mcount_lock;
   79 
   80 int     mp_naps;                /* # of Applications processors */
   81 int     boot_cpu_id = -1;       /* designated BSP */
   82 
   83 extern  struct pcpu __pcpu[];
   84 
   85 /*
   86  * CPU topology map datastructures for HTT.
   87  */
   88 static struct cpu_group mp_groups[MAXCPU];
   89 static struct cpu_top mp_top;
   90 
   91 /* AP uses this during bootstrap.  Do not staticize.  */
   92 char *bootSTK;
   93 static int bootAP;
   94 
   95 /* Free these after use */
   96 void *bootstacks[MAXCPU];
   97 
   98 /* Temporary holder for double fault stack */
   99 char *doublefault_stack;
  100 
  101 /* Hotwire a 0->4MB V==P mapping */
  102 extern pt_entry_t *KPTphys;
  103 
  104 /* SMP page table page */
  105 extern pt_entry_t *SMPpt;
  106 
  107 struct pcb stoppcbs[MAXCPU];
  108 
  109 /* Variables needed for SMP tlb shootdown. */
  110 vm_offset_t smp_tlb_addr1;
  111 vm_offset_t smp_tlb_addr2;
  112 volatile int smp_tlb_wait;
  113 
  114 extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
  115 
  116 #ifdef STOP_NMI
  117 volatile cpumask_t ipi_nmi_pending;
  118 
  119 static void     ipi_nmi_selected(u_int32_t cpus);
  120 #endif 
  121 
  122 /*
  123  * Local data and functions.
  124  */
  125 
  126 #ifdef STOP_NMI
  127 /* 
  128  * Provide an alternate method of stopping other CPUs. If another CPU has
  129  * disabled interrupts the conventional STOP IPI will be blocked. This 
  130  * NMI-based stop should get through in that case.
  131  */
  132 static int stop_cpus_with_nmi = 1;
  133 SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW,
  134     &stop_cpus_with_nmi, 0, "");
  135 TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi);
  136 #else
  137 #define stop_cpus_with_nmi      0
  138 #endif
  139 
  140 static u_int logical_cpus;
  141 
  142 /* used to hold the AP's until we are ready to release them */
  143 static struct mtx ap_boot_mtx;
  144 
  145 /* Set to 1 once we're ready to let the APs out of the pen. */
  146 static volatile int aps_ready = 0;
  147 
  148 /*
  149  * Store data from cpu_add() until later in the boot when we actually setup
  150  * the APs.
  151  */
  152 struct cpu_info {
  153         int     cpu_present:1;
  154         int     cpu_bsp:1;
  155         int     cpu_disabled:1;
  156         int     cpu_hyperthread:1;
  157 } static cpu_info[MAX_APIC_ID + 1];
  158 int cpu_apic_ids[MAXCPU];
  159 
  160 /* Holds pending bitmap based IPIs per CPU */
  161 static volatile u_int cpu_ipi_pending[MAXCPU];
  162 
  163 static u_int boot_address;
  164 
  165 static void     assign_cpu_ids(void);
  166 static void     set_interrupt_apic_ids(void);
  167 static int      start_all_aps(void);
  168 static int      start_ap(int apic_id);
  169 static void     release_aps(void *dummy);
  170 
  171 static int      hlt_logical_cpus;
  172 static u_int    hyperthreading_cpus;
  173 static cpumask_t        hyperthreading_cpus_mask;
  174 static int      hyperthreading_allowed = 1;
  175 static struct   sysctl_ctx_list logical_cpu_clist;
  176 static u_int    bootMP_size;
  177 
  178 static void
  179 mem_range_AP_init(void)
  180 {
  181         if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
  182                 mem_range_softc.mr_op->initAP(&mem_range_softc);
  183 }
  184 
  185 void
  186 mp_topology(void)
  187 {
  188         struct cpu_group *group;
  189         int apic_id;
  190         int groups;
  191         int cpu;
  192 
  193         /* Build the smp_topology map. */
  194         /* Nothing to do if there is no HTT support. */
  195         if (hyperthreading_cpus <= 1)
  196                 return;
  197         group = &mp_groups[0];
  198         groups = 1;
  199         for (cpu = 0, apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
  200                 if (!cpu_info[apic_id].cpu_present)
  201                         continue;
  202                 /*
  203                  * If the current group has members and we're not a logical
  204                  * cpu, create a new group.
  205                  */
  206                 if (group->cg_count != 0 &&
  207                     (apic_id % hyperthreading_cpus) == 0) {
  208                         group++;
  209                         groups++;
  210                 }
  211                 group->cg_count++;
  212                 group->cg_mask |= 1 << cpu;
  213                 cpu++;
  214         }
  215 
  216         mp_top.ct_count = groups;
  217         mp_top.ct_group = mp_groups;
  218         smp_topology = &mp_top;
  219 }
  220 
  221 /*
  222  * Calculate usable address in base memory for AP trampoline code.
  223  */
  224 u_int
  225 mp_bootaddress(u_int basemem)
  226 {
  227 
  228         bootMP_size = mptramp_end - mptramp_start;
  229         boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */
  230         if (((basemem * 1024) - boot_address) < bootMP_size)
  231                 boot_address -= PAGE_SIZE;      /* not enough, lower by 4k */
  232         /* 3 levels of page table pages */
  233         mptramp_pagetables = boot_address - (PAGE_SIZE * 3);
  234 
  235         return mptramp_pagetables;
  236 }
  237 
  238 void
  239 cpu_add(u_int apic_id, char boot_cpu)
  240 {
  241 
  242         if (apic_id > MAX_APIC_ID) {
  243                 panic("SMP: APIC ID %d too high", apic_id);
  244                 return;
  245         }
  246         KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
  247             apic_id));
  248         cpu_info[apic_id].cpu_present = 1;
  249         if (boot_cpu) {
  250                 KASSERT(boot_cpu_id == -1,
  251                     ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
  252                     boot_cpu_id));
  253                 boot_cpu_id = apic_id;
  254                 cpu_info[apic_id].cpu_bsp = 1;
  255         }
  256         if (mp_ncpus < MAXCPU) {
  257                 mp_ncpus++;
  258                 mp_maxid = mp_ncpus -1;
  259         }
  260         if (bootverbose)
  261                 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
  262                     "AP");
  263 }
  264 
  265 void
  266 cpu_mp_setmaxid(void)
  267 {
  268 
  269         /*
  270          * mp_maxid should be already set by calls to cpu_add().
  271          * Just sanity check its value here.
  272          */
  273         if (mp_ncpus == 0)
  274                 KASSERT(mp_maxid == 0,
  275                     ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
  276         else if (mp_ncpus == 1)
  277                 mp_maxid = 0;
  278         else
  279                 KASSERT(mp_maxid >= mp_ncpus - 1,
  280                     ("%s: counters out of sync: max %d, count %d", __func__,
  281                         mp_maxid, mp_ncpus));           
  282 }
  283 
  284 int
  285 cpu_mp_probe(void)
  286 {
  287 
  288         /*
  289          * Always record BSP in CPU map so that the mbuf init code works
  290          * correctly.
  291          */
  292         all_cpus = 1;
  293         if (mp_ncpus == 0) {
  294                 /*
  295                  * No CPUs were found, so this must be a UP system.  Setup
  296                  * the variables to represent a system with a single CPU
  297                  * with an id of 0.
  298                  */
  299                 mp_ncpus = 1;
  300                 return (0);
  301         }
  302 
  303         /* At least one CPU was found. */
  304         if (mp_ncpus == 1) {
  305                 /*
  306                  * One CPU was found, so this must be a UP system with
  307                  * an I/O APIC.
  308                  */
  309                 mp_maxid = 0;
  310                 return (0);
  311         }
  312 
  313         /* At least two CPUs were found. */
  314         return (1);
  315 }
  316 
  317 /*
  318  * Initialize the IPI handlers and start up the AP's.
  319  */
  320 void
  321 cpu_mp_start(void)
  322 {
  323         int i;
  324         u_int threads_per_cache, p[4];
  325 
  326         /* Initialize the logical ID to APIC ID table. */
  327         for (i = 0; i < MAXCPU; i++) {
  328                 cpu_apic_ids[i] = -1;
  329                 cpu_ipi_pending[i] = 0;
  330         }
  331 
  332         /* Install an inter-CPU IPI for TLB invalidation */
  333         setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
  334         setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
  335         setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
  336 
  337         /* Install an inter-CPU IPI for cache invalidation. */
  338         setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYSIGT, SEL_KPL, 0);
  339 
  340         /* Install an inter-CPU IPI for all-CPU rendezvous */
  341         setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
  342 
  343         /* Install generic inter-CPU IPI handler */
  344         setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
  345                SDT_SYSIGT, SEL_KPL, 0);
  346 
  347         /* Install an inter-CPU IPI for CPU stop/restart */
  348         setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0);
  349 
  350         /* Set boot_cpu_id if needed. */
  351         if (boot_cpu_id == -1) {
  352                 boot_cpu_id = PCPU_GET(apic_id);
  353                 cpu_info[boot_cpu_id].cpu_bsp = 1;
  354         } else
  355                 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
  356                     ("BSP's APIC ID doesn't match boot_cpu_id"));
  357         cpu_apic_ids[0] = boot_cpu_id;
  358 
  359         /* Setup the initial logical CPUs info. */
  360         logical_cpus = logical_cpus_mask = 0;
  361         if (cpu_feature & CPUID_HTT)
  362                 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
  363 
  364         /*
  365          * Work out if hyperthreading is *really* enabled.  This
  366          * is made really ugly by the fact that processors lie: Dual
  367          * core processors claim to be hyperthreaded even when they're
  368          * not, presumably because they want to be treated the same
  369          * way as HTT with respect to per-cpu software licensing.
  370          * At the time of writing (May 12, 2005) the only hyperthreaded
  371          * cpus are from Intel, and Intel's dual-core processors can be
  372          * identified via the "deterministic cache parameters" cpuid
  373          * calls.
  374          */
  375         /*
  376          * First determine if this is an Intel processor which claims
  377          * to have hyperthreading support.
  378          */
  379         if ((cpu_feature & CPUID_HTT) &&
  380             (strcmp(cpu_vendor, "GenuineIntel") == 0)) {
  381                 /*
  382                  * If the "deterministic cache parameters" cpuid calls
  383                  * are available, use them.
  384                  */
  385                 if (cpu_high >= 4) {
  386                         /* Ask the processor about the L1 cache. */
  387                         for (i = 0; i < 1; i++) {
  388                                 cpuid_count(4, i, p);
  389                                 threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1;
  390                                 if (hyperthreading_cpus < threads_per_cache)
  391                                         hyperthreading_cpus = threads_per_cache;
  392                                 if ((p[0] & 0x1f) == 0)
  393                                         break;
  394                         }
  395                 }
  396 
  397                 /*
  398                  * If the deterministic cache parameters are not
  399                  * available, or if no caches were reported to exist,
  400                  * just accept what the HTT flag indicated.
  401                  */
  402                 if (hyperthreading_cpus == 0)
  403                         hyperthreading_cpus = logical_cpus;
  404         }
  405 
  406         assign_cpu_ids();
  407 
  408         /* Start each Application Processor */
  409         start_all_aps();
  410 
  411         set_interrupt_apic_ids();
  412 
  413         /* Last, setup the cpu topology now that we have probed CPUs */
  414         mp_topology();
  415 }
  416 
  417 
  418 /*
  419  * Print various information about the SMP system hardware and setup.
  420  */
  421 void
  422 cpu_mp_announce(void)
  423 {
  424         int i, x;
  425         const char *hyperthread;
  426 
  427         /* List CPUs */
  428         printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
  429         for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
  430                 if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
  431                         continue;
  432                 if (cpu_info[x].cpu_hyperthread) {
  433                         hyperthread = "/HT";
  434                 } else {
  435                         hyperthread = "";
  436                 }
  437                 if (cpu_info[x].cpu_disabled)
  438                         printf("  cpu (AP%s): APIC ID: %2d (disabled)\n",
  439                             hyperthread, x);
  440                 else {
  441                         KASSERT(i < mp_ncpus,
  442                             ("mp_ncpus and actual cpus are out of whack"));
  443                         printf(" cpu%d (AP%s): APIC ID: %2d\n", i++,
  444                             hyperthread, x);
  445                 }
  446         }
  447 }
  448 
  449 /*
  450  * AP CPU's call this to initialize themselves.
  451  */
  452 void
  453 init_secondary(void)
  454 {
  455         struct pcpu *pc;
  456         u_int64_t msr, cr0;
  457         int cpu, gsel_tss, x;
  458         struct region_descriptor ap_gdt;
  459 
  460         /* Set by the startup code for us to use */
  461         cpu = bootAP;
  462 
  463         /* Init tss */
  464         common_tss[cpu] = common_tss[0];
  465         common_tss[cpu].tss_rsp0 = 0;   /* not used until after switch */
  466         common_tss[cpu].tss_iobase = sizeof(struct amd64tss);
  467         common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE];
  468 
  469         /* Prepare private GDT */
  470         gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
  471         ssdtosyssd(&gdt_segs[GPROC0_SEL],
  472            (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]);
  473         for (x = 0; x < NGDT; x++) {
  474                 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1))
  475                         ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]);
  476         }
  477         ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
  478         ap_gdt.rd_base =  (long) &gdt[NGDT * cpu];
  479         lgdt(&ap_gdt);                  /* does magic intra-segment return */
  480 
  481         /* Get per-cpu data */
  482         pc = &__pcpu[cpu];
  483 
  484         /* prime data page for it to use */
  485         pcpu_init(pc, cpu, sizeof(struct pcpu));
  486         pc->pc_apic_id = cpu_apic_ids[cpu];
  487         pc->pc_prvspace = pc;
  488         pc->pc_curthread = 0;
  489         pc->pc_tssp = &common_tss[cpu];
  490         pc->pc_rsp0 = 0;
  491         pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
  492 
  493         wrmsr(MSR_FSBASE, 0);           /* User value */
  494         wrmsr(MSR_GSBASE, (u_int64_t)pc);
  495         wrmsr(MSR_KGSBASE, (u_int64_t)pc);      /* XXX User value while we're in the kernel */
  496 
  497         lidt(&r_idt);
  498 
  499         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
  500         ltr(gsel_tss);
  501 
  502         /*
  503          * Set to a known state:
  504          * Set by mpboot.s: CR0_PG, CR0_PE
  505          * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
  506          */
  507         cr0 = rcr0();
  508         cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
  509         load_cr0(cr0);
  510 
  511         /* Set up the fast syscall stuff */
  512         msr = rdmsr(MSR_EFER) | EFER_SCE;
  513         wrmsr(MSR_EFER, msr);
  514         wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
  515         wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
  516         msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
  517               ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
  518         wrmsr(MSR_STAR, msr);
  519         wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
  520 
  521         /* Disable local APIC just to be sure. */
  522         lapic_disable();
  523 
  524         /* signal our startup to the BSP. */
  525         mp_naps++;
  526 
  527         /* Spin until the BSP releases the AP's. */
  528         while (!aps_ready)
  529                 ia32_pause();
  530 
  531         /* Initialize the PAT MSR. */
  532         pmap_init_pat();
  533 
  534         /* set up CPU registers and state */
  535         cpu_setregs();
  536 
  537         /* set up SSE/NX registers */
  538         initializecpu();
  539 
  540         /* set up FPU state on the AP */
  541         fpuinit();
  542 
  543         /* A quick check from sanity claus */
  544         if (PCPU_GET(apic_id) != lapic_id()) {
  545                 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
  546                 printf("SMP: actual apic_id = %d\n", lapic_id());
  547                 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
  548                 panic("cpuid mismatch! boom!!");
  549         }
  550 
  551         /* Initialize curthread. */
  552         KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
  553         PCPU_SET(curthread, PCPU_GET(idlethread));
  554 
  555         mtx_lock_spin(&ap_boot_mtx);
  556 
  557         /* Init local apic for irq's */
  558         lapic_setup(1);
  559 
  560         /* Set memory range attributes for this CPU to match the BSP */
  561         mem_range_AP_init();
  562 
  563         smp_cpus++;
  564 
  565         CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
  566         printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
  567 
  568         /* Determine if we are a logical CPU. */
  569         if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
  570                 logical_cpus_mask |= PCPU_GET(cpumask);
  571         
  572         /* Determine if we are a hyperthread. */
  573         if (hyperthreading_cpus > 1 &&
  574             PCPU_GET(apic_id) % hyperthreading_cpus != 0)
  575                 hyperthreading_cpus_mask |= PCPU_GET(cpumask);
  576 
  577         /* Build our map of 'other' CPUs. */
  578         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  579 
  580         if (bootverbose)
  581                 lapic_dump("AP");
  582 
  583         if (smp_cpus == mp_ncpus) {
  584                 /* enable IPI's, tlb shootdown, freezes etc */
  585                 atomic_store_rel_int(&smp_started, 1);
  586                 smp_active = 1;  /* historic */
  587         }
  588 
  589         /*
  590          * Enable global pages TLB extension
  591          * This also implicitly flushes the TLB 
  592          */
  593 
  594         load_cr4(rcr4() | CR4_PGE);
  595 
  596         mtx_unlock_spin(&ap_boot_mtx);
  597 
  598         /* wait until all the AP's are up */
  599         while (smp_started == 0)
  600                 ia32_pause();
  601 
  602         sched_throw(NULL);
  603 
  604         panic("scheduler returned us to %s", __func__);
  605         /* NOTREACHED */
  606 }
  607 
  608 /*******************************************************************
  609  * local functions and data
  610  */
  611 
  612 /*
  613  * We tell the I/O APIC code about all the CPUs we want to receive
  614  * interrupts.  If we don't want certain CPUs to receive IRQs we
  615  * can simply not tell the I/O APIC code about them in this function.
  616  * We also do not tell it about the BSP since it tells itself about
  617  * the BSP internally to work with UP kernels and on UP machines.
  618  */
  619 static void
  620 set_interrupt_apic_ids(void)
  621 {
  622         u_int i, apic_id;
  623 
  624         for (i = 0; i < MAXCPU; i++) {
  625                 apic_id = cpu_apic_ids[i];
  626                 if (apic_id == -1)
  627                         continue;
  628                 if (cpu_info[apic_id].cpu_bsp)
  629                         continue;
  630                 if (cpu_info[apic_id].cpu_disabled)
  631                         continue;
  632 
  633                 /* Don't let hyperthreads service interrupts. */
  634                 if (hyperthreading_cpus > 1 &&
  635                     apic_id % hyperthreading_cpus != 0)
  636                         continue;
  637 
  638                 intr_add_cpu(i);
  639         }
  640 }
  641 
  642 /*
  643  * Assign logical CPU IDs to local APICs.
  644  */
  645 static void
  646 assign_cpu_ids(void)
  647 {
  648         u_int i;
  649 
  650         TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
  651             &hyperthreading_allowed);
  652 
  653         /* Check for explicitly disabled CPUs. */
  654         for (i = 0; i <= MAX_APIC_ID; i++) {
  655                 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
  656                         continue;
  657 
  658                 if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
  659                         cpu_info[i].cpu_hyperthread = 1;
  660 #if defined(SCHED_ULE)
  661                         /*
  662                          * Don't use HT CPU if it has been disabled by a
  663                          * tunable.
  664                          */
  665                         if (hyperthreading_allowed == 0) {
  666                                 cpu_info[i].cpu_disabled = 1;
  667                                 continue;
  668                         }
  669 #endif
  670                 }
  671 
  672                 /* Don't use this CPU if it has been disabled by a tunable. */
  673                 if (resource_disabled("lapic", i)) {
  674                         cpu_info[i].cpu_disabled = 1;
  675                         continue;
  676                 }
  677         }
  678 
  679         /*
  680          * Assign CPU IDs to local APIC IDs and disable any CPUs
  681          * beyond MAXCPU.  CPU 0 has already been assigned to the BSP,
  682          * so we only have to assign IDs for APs.
  683          */
  684         mp_ncpus = 1;
  685         for (i = 0; i <= MAX_APIC_ID; i++) {
  686                 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
  687                     cpu_info[i].cpu_disabled)
  688                         continue;
  689 
  690                 if (mp_ncpus < MAXCPU) {
  691                         cpu_apic_ids[mp_ncpus] = i;
  692                         mp_ncpus++;
  693                 } else
  694                         cpu_info[i].cpu_disabled = 1;
  695         }
  696         KASSERT(mp_maxid >= mp_ncpus - 1,
  697             ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
  698             mp_ncpus));         
  699 }
  700 
  701 /*
  702  * start each AP in our list
  703  */
  704 static int
  705 start_all_aps(void)
  706 {
  707         vm_offset_t va = boot_address + KERNBASE;
  708         u_int64_t *pt4, *pt3, *pt2;
  709         u_int32_t mpbioswarmvec;
  710         int apic_id, cpu, i;
  711         u_char mpbiosreason;
  712 
  713         mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
  714 
  715         /* install the AP 1st level boot code */
  716         pmap_kenter(va, boot_address);
  717         pmap_invalidate_page(kernel_pmap, va);
  718         bcopy(mptramp_start, (void *)va, bootMP_size);
  719 
  720         /* Locate the page tables, they'll be below the trampoline */
  721         pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE);
  722         pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
  723         pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
  724 
  725         /* Create the initial 1GB replicated page tables */
  726         for (i = 0; i < 512; i++) {
  727                 /* Each slot of the level 4 pages points to the same level 3 page */
  728                 pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
  729                 pt4[i] |= PG_V | PG_RW | PG_U;
  730 
  731                 /* Each slot of the level 3 pages points to the same level 2 page */
  732                 pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
  733                 pt3[i] |= PG_V | PG_RW | PG_U;
  734 
  735                 /* The level 2 page slots are mapped with 2MB pages for 1GB. */
  736                 pt2[i] = i * (2 * 1024 * 1024);
  737                 pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
  738         }
  739 
  740         /* save the current value of the warm-start vector */
  741         mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
  742         outb(CMOS_REG, BIOS_RESET);
  743         mpbiosreason = inb(CMOS_DATA);
  744 
  745         /* setup a vector to our boot code */
  746         *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
  747         *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
  748         outb(CMOS_REG, BIOS_RESET);
  749         outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
  750 
  751         /* start each AP */
  752         for (cpu = 1; cpu < mp_ncpus; cpu++) {
  753                 apic_id = cpu_apic_ids[cpu];
  754 
  755                 /* allocate and set up an idle stack data page */
  756                 bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
  757                 doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE);
  758 
  759                 bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8;
  760                 bootAP = cpu;
  761 
  762                 /* attempt to start the Application Processor */
  763                 if (!start_ap(apic_id)) {
  764                         /* restore the warmstart vector */
  765                         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  766                         panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
  767                 }
  768 
  769                 all_cpus |= (1 << cpu);         /* record AP in CPU map */
  770         }
  771 
  772         /* build our map of 'other' CPUs */
  773         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  774 
  775         /* restore the warmstart vector */
  776         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  777 
  778         outb(CMOS_REG, BIOS_RESET);
  779         outb(CMOS_DATA, mpbiosreason);
  780 
  781         /* number of APs actually started */
  782         return mp_naps;
  783 }
  784 
  785 
  786 /*
  787  * This function starts the AP (application processor) identified
  788  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  789  * to accomplish this.  This is necessary because of the nuances
  790  * of the different hardware we might encounter.  It isn't pretty,
  791  * but it seems to work.
  792  */
  793 static int
  794 start_ap(int apic_id)
  795 {
  796         int vector, ms;
  797         int cpus;
  798 
  799         /* calculate the vector */
  800         vector = (boot_address >> 12) & 0xff;
  801 
  802         /* used as a watchpoint to signal AP startup */
  803         cpus = mp_naps;
  804 
  805         /*
  806          * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
  807          * and running the target CPU. OR this INIT IPI might be latched (P5
  808          * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
  809          * ignored.
  810          */
  811 
  812         /* do an INIT IPI: assert RESET */
  813         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  814             APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
  815 
  816         /* wait for pending status end */
  817         lapic_ipi_wait(-1);
  818 
  819         /* do an INIT IPI: deassert RESET */
  820         lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
  821             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
  822 
  823         /* wait for pending status end */
  824         DELAY(10000);           /* wait ~10mS */
  825         lapic_ipi_wait(-1);
  826 
  827         /*
  828          * next we do a STARTUP IPI: the previous INIT IPI might still be
  829          * latched, (P5 bug) this 1st STARTUP would then terminate
  830          * immediately, and the previously started INIT IPI would continue. OR
  831          * the previous INIT IPI has already run. and this STARTUP IPI will
  832          * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
  833          * will run.
  834          */
  835 
  836         /* do a STARTUP IPI */
  837         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  838             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  839             vector, apic_id);
  840         lapic_ipi_wait(-1);
  841         DELAY(200);             /* wait ~200uS */
  842 
  843         /*
  844          * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
  845          * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
  846          * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
  847          * recognized after hardware RESET or INIT IPI.
  848          */
  849 
  850         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  851             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  852             vector, apic_id);
  853         lapic_ipi_wait(-1);
  854         DELAY(200);             /* wait ~200uS */
  855 
  856         /* Wait up to 5 seconds for it to start. */
  857         for (ms = 0; ms < 5000; ms++) {
  858                 if (mp_naps > cpus)
  859                         return 1;       /* return SUCCESS */
  860                 DELAY(1000);
  861         }
  862         return 0;               /* return FAILURE */
  863 }
  864 
  865 /*
  866  * Flush the TLB on all other CPU's
  867  */
  868 static void
  869 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
  870 {
  871         u_int ncpu;
  872 
  873         ncpu = mp_ncpus - 1;    /* does not shootdown self */
  874         if (ncpu < 1)
  875                 return;         /* no other cpus */
  876         if (!(read_rflags() & PSL_I))
  877                 panic("%s: interrupts disabled", __func__);
  878         mtx_lock_spin(&smp_ipi_mtx);
  879         smp_tlb_addr1 = addr1;
  880         smp_tlb_addr2 = addr2;
  881         atomic_store_rel_int(&smp_tlb_wait, 0);
  882         ipi_all_but_self(vector);
  883         while (smp_tlb_wait < ncpu)
  884                 ia32_pause();
  885         mtx_unlock_spin(&smp_ipi_mtx);
  886 }
  887 
  888 static void
  889 smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
  890 {
  891         int ncpu, othercpus;
  892 
  893         othercpus = mp_ncpus - 1;
  894         if (mask == (u_int)-1) {
  895                 ncpu = othercpus;
  896                 if (ncpu < 1)
  897                         return;
  898         } else {
  899                 mask &= ~PCPU_GET(cpumask);
  900                 if (mask == 0)
  901                         return;
  902                 ncpu = bitcount32(mask);
  903                 if (ncpu > othercpus) {
  904                         /* XXX this should be a panic offence */
  905                         printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
  906                             ncpu, othercpus);
  907                         ncpu = othercpus;
  908                 }
  909                 /* XXX should be a panic, implied by mask == 0 above */
  910                 if (ncpu < 1)
  911                         return;
  912         }
  913         if (!(read_rflags() & PSL_I))
  914                 panic("%s: interrupts disabled", __func__);
  915         mtx_lock_spin(&smp_ipi_mtx);
  916         smp_tlb_addr1 = addr1;
  917         smp_tlb_addr2 = addr2;
  918         atomic_store_rel_int(&smp_tlb_wait, 0);
  919         if (mask == (u_int)-1)
  920                 ipi_all_but_self(vector);
  921         else
  922                 ipi_selected(mask, vector);
  923         while (smp_tlb_wait < ncpu)
  924                 ia32_pause();
  925         mtx_unlock_spin(&smp_ipi_mtx);
  926 }
  927 
  928 void
  929 smp_cache_flush(void)
  930 {
  931 
  932         if (smp_started)
  933                 smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
  934 }
  935 
  936 void
  937 smp_invltlb(void)
  938 {
  939 
  940         if (smp_started) {
  941                 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
  942         }
  943 }
  944 
  945 void
  946 smp_invlpg(vm_offset_t addr)
  947 {
  948 
  949         if (smp_started)
  950                 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
  951 }
  952 
  953 void
  954 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
  955 {
  956 
  957         if (smp_started) {
  958                 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
  959         }
  960 }
  961 
  962 void
  963 smp_masked_invltlb(u_int mask)
  964 {
  965 
  966         if (smp_started) {
  967                 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
  968         }
  969 }
  970 
  971 void
  972 smp_masked_invlpg(u_int mask, vm_offset_t addr)
  973 {
  974 
  975         if (smp_started) {
  976                 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
  977         }
  978 }
  979 
  980 void
  981 smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
  982 {
  983 
  984         if (smp_started) {
  985                 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
  986         }
  987 }
  988 
  989 void
  990 ipi_bitmap_handler(struct trapframe frame)
  991 {
  992         int cpu = PCPU_GET(cpuid);
  993         u_int ipi_bitmap;
  994 
  995         ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
  996 
  997         if (ipi_bitmap & (1 << IPI_PREEMPT)) {
  998                 struct thread *running_thread = curthread;
  999                 thread_lock(running_thread);
 1000                 if (running_thread->td_critnest > 1) 
 1001                         running_thread->td_owepreempt = 1;
 1002                 else            
 1003                         mi_switch(SW_INVOL | SW_PREEMPT, NULL);
 1004                 thread_unlock(running_thread);
 1005         }
 1006 
 1007         /* Nothing to do for AST */
 1008 }
 1009 
 1010 /*
 1011  * send an IPI to a set of cpus.
 1012  */
 1013 void
 1014 ipi_selected(u_int32_t cpus, u_int ipi)
 1015 {
 1016         int cpu;
 1017         u_int bitmap = 0;
 1018         u_int old_pending;
 1019         u_int new_pending;
 1020 
 1021         if (IPI_IS_BITMAPED(ipi)) { 
 1022                 bitmap = 1 << ipi;
 1023                 ipi = IPI_BITMAP_VECTOR;
 1024         }
 1025 
 1026 #ifdef STOP_NMI
 1027         if (ipi == IPI_STOP && stop_cpus_with_nmi) {
 1028                 ipi_nmi_selected(cpus);
 1029                 return;
 1030         }
 1031 #endif
 1032         CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
 1033         while ((cpu = ffs(cpus)) != 0) {
 1034                 cpu--;
 1035                 cpus &= ~(1 << cpu);
 1036 
 1037                 KASSERT(cpu_apic_ids[cpu] != -1,
 1038                     ("IPI to non-existent CPU %d", cpu));
 1039 
 1040                 if (bitmap) {
 1041                         do {
 1042                                 old_pending = cpu_ipi_pending[cpu];
 1043                                 new_pending = old_pending | bitmap;
 1044                         } while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending));  
 1045 
 1046                         if (old_pending)
 1047                                 continue;
 1048                 }
 1049 
 1050                 lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
 1051         }
 1052 
 1053 }
 1054 
 1055 /*
 1056  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
 1057  */
 1058 void
 1059 ipi_all(u_int ipi)
 1060 {
 1061 
 1062         if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
 1063                 ipi_selected(all_cpus, ipi);
 1064                 return;
 1065         }
 1066         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1067         lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
 1068 }
 1069 
 1070 /*
 1071  * send an IPI to all CPUs EXCEPT myself
 1072  */
 1073 void
 1074 ipi_all_but_self(u_int ipi)
 1075 {
 1076 
 1077         if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
 1078                 ipi_selected(PCPU_GET(other_cpus), ipi);
 1079                 return;
 1080         }
 1081         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1082         lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 1083 }
 1084 
 1085 /*
 1086  * send an IPI to myself
 1087  */
 1088 void
 1089 ipi_self(u_int ipi)
 1090 {
 1091 
 1092         if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
 1093                 ipi_selected(PCPU_GET(cpumask), ipi);
 1094                 return;
 1095         }
 1096         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1097         lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
 1098 }
 1099 
 1100 #ifdef STOP_NMI
 1101 /*
 1102  * send NMI IPI to selected CPUs
 1103  */
 1104 
 1105 #define BEFORE_SPIN     1000000
 1106 
 1107 void
 1108 ipi_nmi_selected(u_int32_t cpus)
 1109 {
 1110         int cpu;
 1111         register_t icrlo;
 1112 
 1113         icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 
 1114                 | APIC_TRIGMOD_EDGE; 
 1115         
 1116         CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus);
 1117 
 1118         atomic_set_int(&ipi_nmi_pending, cpus);
 1119 
 1120         while ((cpu = ffs(cpus)) != 0) {
 1121                 cpu--;
 1122                 cpus &= ~(1 << cpu);
 1123 
 1124                 KASSERT(cpu_apic_ids[cpu] != -1,
 1125                     ("IPI NMI to non-existent CPU %d", cpu));
 1126                 
 1127                 /* Wait for an earlier IPI to finish. */
 1128                 if (!lapic_ipi_wait(BEFORE_SPIN))
 1129                         panic("ipi_nmi_selected: previous IPI has not cleared");
 1130 
 1131                 lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]);
 1132         }
 1133 }
 1134 
 1135 int
 1136 ipi_nmi_handler(void)
 1137 {
 1138         int cpumask = PCPU_GET(cpumask);
 1139 
 1140         if (!(ipi_nmi_pending & cpumask))
 1141                 return 1;
 1142 
 1143         atomic_clear_int(&ipi_nmi_pending, cpumask);
 1144         cpustop_handler();
 1145         return 0;
 1146 }
 1147      
 1148 #endif /* STOP_NMI */
 1149 
 1150 /*
 1151  * Handle an IPI_STOP by saving our current context and spinning until we
 1152  * are resumed.
 1153  */
 1154 void
 1155 cpustop_handler(void)
 1156 {
 1157         int cpu = PCPU_GET(cpuid);
 1158         int cpumask = PCPU_GET(cpumask);
 1159 
 1160         savectx(&stoppcbs[cpu]);
 1161 
 1162         /* Indicate that we are stopped */
 1163         atomic_set_int(&stopped_cpus, cpumask);
 1164 
 1165         /* Wait for restart */
 1166         while (!(started_cpus & cpumask))
 1167             ia32_pause();
 1168 
 1169         atomic_clear_int(&started_cpus, cpumask);
 1170         atomic_clear_int(&stopped_cpus, cpumask);
 1171 
 1172         if (cpu == 0 && cpustop_restartfunc != NULL) {
 1173                 cpustop_restartfunc();
 1174                 cpustop_restartfunc = NULL;
 1175         }
 1176 }
 1177 
 1178 /*
 1179  * This is called once the rest of the system is up and running and we're
 1180  * ready to let the AP's out of the pen.
 1181  */
 1182 static void
 1183 release_aps(void *dummy __unused)
 1184 {
 1185 
 1186         if (mp_ncpus == 1) 
 1187                 return;
 1188         atomic_store_rel_int(&aps_ready, 1);
 1189         while (smp_started == 0)
 1190                 ia32_pause();
 1191 }
 1192 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 1193 
 1194 static int
 1195 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
 1196 {
 1197         u_int mask;
 1198         int error;
 1199 
 1200         mask = hlt_cpus_mask;
 1201         error = sysctl_handle_int(oidp, &mask, 0, req);
 1202         if (error || !req->newptr)
 1203                 return (error);
 1204 
 1205         if (logical_cpus_mask != 0 &&
 1206             (mask & logical_cpus_mask) == logical_cpus_mask)
 1207                 hlt_logical_cpus = 1;
 1208         else
 1209                 hlt_logical_cpus = 0;
 1210 
 1211         if (! hyperthreading_allowed)
 1212                 mask |= hyperthreading_cpus_mask;
 1213 
 1214         if ((mask & all_cpus) == all_cpus)
 1215                 mask &= ~(1<<0);
 1216         hlt_cpus_mask = mask;
 1217         return (error);
 1218 }
 1219 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
 1220     0, 0, sysctl_hlt_cpus, "IU",
 1221     "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
 1222 
 1223 static int
 1224 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 1225 {
 1226         int disable, error;
 1227 
 1228         disable = hlt_logical_cpus;
 1229         error = sysctl_handle_int(oidp, &disable, 0, req);
 1230         if (error || !req->newptr)
 1231                 return (error);
 1232 
 1233         if (disable)
 1234                 hlt_cpus_mask |= logical_cpus_mask;
 1235         else
 1236                 hlt_cpus_mask &= ~logical_cpus_mask;
 1237 
 1238         if (! hyperthreading_allowed)
 1239                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1240 
 1241         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1242                 hlt_cpus_mask &= ~(1<<0);
 1243 
 1244         hlt_logical_cpus = disable;
 1245         return (error);
 1246 }
 1247 
 1248 static int
 1249 sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
 1250 {
 1251         int allowed, error;
 1252 
 1253         allowed = hyperthreading_allowed;
 1254         error = sysctl_handle_int(oidp, &allowed, 0, req);
 1255         if (error || !req->newptr)
 1256                 return (error);
 1257 
 1258 #ifdef SCHED_ULE
 1259         /*
 1260          * SCHED_ULE doesn't allow enabling/disabling HT cores at
 1261          * run-time.
 1262          */
 1263         if (allowed != hyperthreading_allowed)
 1264                 return (ENOTSUP);
 1265         return (error);
 1266 #endif
 1267 
 1268         if (allowed)
 1269                 hlt_cpus_mask &= ~hyperthreading_cpus_mask;
 1270         else
 1271                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1272 
 1273         if (logical_cpus_mask != 0 &&
 1274             (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
 1275                 hlt_logical_cpus = 1;
 1276         else
 1277                 hlt_logical_cpus = 0;
 1278 
 1279         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1280                 hlt_cpus_mask &= ~(1<<0);
 1281 
 1282         hyperthreading_allowed = allowed;
 1283         return (error);
 1284 }
 1285 
 1286 static void
 1287 cpu_hlt_setup(void *dummy __unused)
 1288 {
 1289 
 1290         if (logical_cpus_mask != 0) {
 1291                 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 1292                     &hlt_logical_cpus);
 1293                 sysctl_ctx_init(&logical_cpu_clist);
 1294                 SYSCTL_ADD_PROC(&logical_cpu_clist,
 1295                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1296                     "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 1297                     sysctl_hlt_logical_cpus, "IU", "");
 1298                 SYSCTL_ADD_UINT(&logical_cpu_clist,
 1299                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1300                     "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
 1301                     &logical_cpus_mask, 0, "");
 1302 
 1303                 if (hlt_logical_cpus)
 1304                         hlt_cpus_mask |= logical_cpus_mask;
 1305 
 1306                 /*
 1307                  * If necessary for security purposes, force
 1308                  * hyperthreading off, regardless of the value
 1309                  * of hlt_logical_cpus.
 1310                  */
 1311                 if (hyperthreading_cpus_mask) {
 1312                         SYSCTL_ADD_PROC(&logical_cpu_clist,
 1313                             SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1314                             "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
 1315                             0, 0, sysctl_hyperthreading_allowed, "IU", "");
 1316                         if (! hyperthreading_allowed)
 1317                                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1318                 }
 1319         }
 1320 }
 1321 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 1322 
 1323 int
 1324 mp_grab_cpu_hlt(void)
 1325 {
 1326         u_int mask = PCPU_GET(cpumask);
 1327 #ifdef MP_WATCHDOG
 1328         u_int cpuid = PCPU_GET(cpuid);
 1329 #endif
 1330         int retval;
 1331 
 1332 #ifdef MP_WATCHDOG
 1333         ap_watchdog(cpuid);
 1334 #endif
 1335 
 1336         retval = mask & hlt_cpus_mask;
 1337         while (mask & hlt_cpus_mask)
 1338                 __asm __volatile("sti; hlt" : : : "memory");
 1339         return (retval);
 1340 }

Cache object: a18e31bbcda0bf9e78700124f1def36f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.