The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/mp_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1996, by Steve Passe
    3  * Copyright (c) 2003, by Peter Wemm
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. The name of the developer may NOT be used to endorse or promote products
   12  *    derived from this software without specific prior written permission.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD$");
   29 
   30 #include "opt_cpu.h"
   31 #include "opt_kstack_pages.h"
   32 #include "opt_mp_watchdog.h"
   33 #include "opt_sched.h"
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/bus.h>
   38 #ifdef GPROF 
   39 #include <sys/gmon.h>
   40 #endif
   41 #include <sys/kernel.h>
   42 #include <sys/ktr.h>
   43 #include <sys/lock.h>
   44 #include <sys/malloc.h>
   45 #include <sys/memrange.h>
   46 #include <sys/mutex.h>
   47 #include <sys/pcpu.h>
   48 #include <sys/proc.h>
   49 #include <sys/sched.h>
   50 #include <sys/smp.h>
   51 #include <sys/sysctl.h>
   52 
   53 #include <vm/vm.h>
   54 #include <vm/vm_param.h>
   55 #include <vm/pmap.h>
   56 #include <vm/vm_kern.h>
   57 #include <vm/vm_extern.h>
   58 
   59 #include <machine/apicreg.h>
   60 #include <machine/md_var.h>
   61 #include <machine/mp_watchdog.h>
   62 #include <machine/pcb.h>
   63 #include <machine/psl.h>
   64 #include <machine/smp.h>
   65 #include <machine/specialreg.h>
   66 #include <machine/tss.h>
   67 
   68 #define WARMBOOT_TARGET         0
   69 #define WARMBOOT_OFF            (KERNBASE + 0x0467)
   70 #define WARMBOOT_SEG            (KERNBASE + 0x0469)
   71 
   72 #define CMOS_REG                (0x70)
   73 #define CMOS_DATA               (0x71)
   74 #define BIOS_RESET              (0x0f)
   75 #define BIOS_WARM               (0x0a)
   76 
   77 /* lock region used by kernel profiling */
   78 int     mcount_lock;
   79 
   80 int     mp_naps;                /* # of Applications processors */
   81 int     boot_cpu_id = -1;       /* designated BSP */
   82 extern  int nkpt;
   83 
   84 extern  struct pcpu __pcpu[];
   85 
   86 /*
   87  * CPU topology map datastructures for HTT.
   88  */
   89 static struct cpu_group mp_groups[MAXCPU];
   90 static struct cpu_top mp_top;
   91 
   92 /* AP uses this during bootstrap.  Do not staticize.  */
   93 char *bootSTK;
   94 static int bootAP;
   95 
   96 /* Free these after use */
   97 void *bootstacks[MAXCPU];
   98 
   99 /* Temporary holder for double fault stack */
  100 char *doublefault_stack;
  101 
  102 /* Hotwire a 0->4MB V==P mapping */
  103 extern pt_entry_t *KPTphys;
  104 
  105 /* SMP page table page */
  106 extern pt_entry_t *SMPpt;
  107 
  108 struct pcb stoppcbs[MAXCPU];
  109 
  110 /* Variables needed for SMP tlb shootdown. */
  111 vm_offset_t smp_tlb_addr1;
  112 vm_offset_t smp_tlb_addr2;
  113 volatile int smp_tlb_wait;
  114 
  115 extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
  116 
  117 #ifdef STOP_NMI
  118 volatile cpumask_t ipi_nmi_pending;
  119 
  120 static void     ipi_nmi_selected(u_int32_t cpus);
  121 #endif 
  122 
  123 /*
  124  * Local data and functions.
  125  */
  126 
  127 #ifdef STOP_NMI
  128 /* 
  129  * Provide an alternate method of stopping other CPUs. If another CPU has
  130  * disabled interrupts the conventional STOP IPI will be blocked. This 
  131  * NMI-based stop should get through in that case.
  132  */
  133 static int stop_cpus_with_nmi = 1;
  134 SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW,
  135     &stop_cpus_with_nmi, 0, "");
  136 TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi);
  137 #else
  138 #define stop_cpus_with_nmi      0
  139 #endif
  140 
  141 static u_int logical_cpus;
  142 
  143 /* used to hold the AP's until we are ready to release them */
  144 static struct mtx ap_boot_mtx;
  145 
  146 /* Set to 1 once we're ready to let the APs out of the pen. */
  147 static volatile int aps_ready = 0;
  148 
  149 /*
  150  * Store data from cpu_add() until later in the boot when we actually setup
  151  * the APs.
  152  */
  153 struct cpu_info {
  154         int     cpu_present:1;
  155         int     cpu_bsp:1;
  156         int     cpu_disabled:1;
  157 } static cpu_info[MAX_APIC_ID + 1];
  158 int cpu_apic_ids[MAXCPU];
  159 
  160 /* Holds pending bitmap based IPIs per CPU */
  161 static volatile u_int cpu_ipi_pending[MAXCPU];
  162 
  163 static u_int boot_address;
  164 
  165 static void     assign_cpu_ids(void);
  166 static void     set_interrupt_apic_ids(void);
  167 static int      start_all_aps(void);
  168 static int      start_ap(int apic_id);
  169 static void     release_aps(void *dummy);
  170 
  171 static int      hlt_logical_cpus;
  172 static u_int    hyperthreading_cpus;
  173 static cpumask_t        hyperthreading_cpus_mask;
  174 static int      hyperthreading_allowed = 1;
  175 static struct   sysctl_ctx_list logical_cpu_clist;
  176 static u_int    bootMP_size;
  177 
  178 static void
  179 mem_range_AP_init(void)
  180 {
  181         if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
  182                 mem_range_softc.mr_op->initAP(&mem_range_softc);
  183 }
  184 
  185 void
  186 mp_topology(void)
  187 {
  188         struct cpu_group *group;
  189         int apic_id;
  190         int groups;
  191         int cpu;
  192 
  193         /* Build the smp_topology map. */
  194         /* Nothing to do if there is no HTT support. */
  195         if (hyperthreading_cpus <= 1)
  196                 return;
  197         group = &mp_groups[0];
  198         groups = 1;
  199         for (cpu = 0, apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
  200                 if (!cpu_info[apic_id].cpu_present)
  201                         continue;
  202                 /*
  203                  * If the current group has members and we're not a logical
  204                  * cpu, create a new group.
  205                  */
  206                 if (group->cg_count != 0 &&
  207                     (apic_id % hyperthreading_cpus) == 0) {
  208                         group++;
  209                         groups++;
  210                 }
  211                 group->cg_count++;
  212                 group->cg_mask |= 1 << cpu;
  213                 cpu++;
  214         }
  215 
  216         mp_top.ct_count = groups;
  217         mp_top.ct_group = mp_groups;
  218         smp_topology = &mp_top;
  219 }
  220 
  221 /*
  222  * Calculate usable address in base memory for AP trampoline code.
  223  */
  224 u_int
  225 mp_bootaddress(u_int basemem)
  226 {
  227 
  228         bootMP_size = mptramp_end - mptramp_start;
  229         boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */
  230         if (((basemem * 1024) - boot_address) < bootMP_size)
  231                 boot_address -= PAGE_SIZE;      /* not enough, lower by 4k */
  232         /* 3 levels of page table pages */
  233         mptramp_pagetables = boot_address - (PAGE_SIZE * 3);
  234 
  235         return mptramp_pagetables;
  236 }
  237 
  238 void
  239 cpu_add(u_int apic_id, char boot_cpu)
  240 {
  241 
  242         if (apic_id > MAX_APIC_ID) {
  243                 panic("SMP: APIC ID %d too high", apic_id);
  244                 return;
  245         }
  246         KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
  247             apic_id));
  248         cpu_info[apic_id].cpu_present = 1;
  249         if (boot_cpu) {
  250                 KASSERT(boot_cpu_id == -1,
  251                     ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
  252                     boot_cpu_id));
  253                 boot_cpu_id = apic_id;
  254                 cpu_info[apic_id].cpu_bsp = 1;
  255         }
  256         if (mp_ncpus < MAXCPU) {
  257                 mp_ncpus++;
  258                 mp_maxid = mp_ncpus -1;
  259         }
  260         if (bootverbose)
  261                 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
  262                     "AP");
  263 }
  264 
  265 void
  266 cpu_mp_setmaxid(void)
  267 {
  268 
  269         /*
  270          * mp_maxid should be already set by calls to cpu_add().
  271          * Just sanity check its value here.
  272          */
  273         if (mp_ncpus == 0)
  274                 KASSERT(mp_maxid == 0,
  275                     ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
  276         else if (mp_ncpus == 1)
  277                 mp_maxid = 0;
  278         else
  279                 KASSERT(mp_maxid >= mp_ncpus - 1,
  280                     ("%s: counters out of sync: max %d, count %d", __func__,
  281                         mp_maxid, mp_ncpus));           
  282 }
  283 
  284 int
  285 cpu_mp_probe(void)
  286 {
  287 
  288         /*
  289          * Always record BSP in CPU map so that the mbuf init code works
  290          * correctly.
  291          */
  292         all_cpus = 1;
  293         if (mp_ncpus == 0) {
  294                 /*
  295                  * No CPUs were found, so this must be a UP system.  Setup
  296                  * the variables to represent a system with a single CPU
  297                  * with an id of 0.
  298                  */
  299                 mp_ncpus = 1;
  300                 return (0);
  301         }
  302 
  303         /* At least one CPU was found. */
  304         if (mp_ncpus == 1) {
  305                 /*
  306                  * One CPU was found, so this must be a UP system with
  307                  * an I/O APIC.
  308                  */
  309                 mp_maxid = 0;
  310                 return (0);
  311         }
  312 
  313         /* At least two CPUs were found. */
  314         return (1);
  315 }
  316 
  317 /*
  318  * Initialize the IPI handlers and start up the AP's.
  319  */
  320 void
  321 cpu_mp_start(void)
  322 {
  323         int i;
  324         u_int threads_per_cache, p[4];
  325 
  326         /* Initialize the logical ID to APIC ID table. */
  327         for (i = 0; i < MAXCPU; i++) {
  328                 cpu_apic_ids[i] = -1;
  329                 cpu_ipi_pending[i] = 0;
  330         }
  331 
  332         /* Install an inter-CPU IPI for TLB invalidation */
  333         setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
  334         setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
  335         setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
  336 
  337         /* Install an inter-CPU IPI for cache invalidation. */
  338         setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYSIGT, SEL_KPL, 0);
  339 
  340         /* Install an inter-CPU IPI for all-CPU rendezvous */
  341         setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
  342 
  343         /* Install generic inter-CPU IPI handler */
  344         setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
  345                SDT_SYSIGT, SEL_KPL, 0);
  346 
  347         /* Install an inter-CPU IPI for CPU stop/restart */
  348         setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0);
  349 
  350         /* Set boot_cpu_id if needed. */
  351         if (boot_cpu_id == -1) {
  352                 boot_cpu_id = PCPU_GET(apic_id);
  353                 cpu_info[boot_cpu_id].cpu_bsp = 1;
  354         } else
  355                 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
  356                     ("BSP's APIC ID doesn't match boot_cpu_id"));
  357         cpu_apic_ids[0] = boot_cpu_id;
  358 
  359         assign_cpu_ids();
  360 
  361         /* Start each Application Processor */
  362         start_all_aps();
  363 
  364         /* Setup the initial logical CPUs info. */
  365         logical_cpus = logical_cpus_mask = 0;
  366         if (cpu_feature & CPUID_HTT)
  367                 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
  368 
  369         /*
  370          * Work out if hyperthreading is *really* enabled.  This
  371          * is made really ugly by the fact that processors lie: Dual
  372          * core processors claim to be hyperthreaded even when they're
  373          * not, presumably because they want to be treated the same
  374          * way as HTT with respect to per-cpu software licensing.
  375          * At the time of writing (May 12, 2005) the only hyperthreaded
  376          * cpus are from Intel, and Intel's dual-core processors can be
  377          * identified via the "deterministic cache parameters" cpuid
  378          * calls.
  379          */
  380         /*
  381          * First determine if this is an Intel processor which claims
  382          * to have hyperthreading support.
  383          */
  384         if ((cpu_feature & CPUID_HTT) &&
  385             (strcmp(cpu_vendor, "GenuineIntel") == 0)) {
  386                 /*
  387                  * If the "deterministic cache parameters" cpuid calls
  388                  * are available, use them.
  389                  */
  390                 if (cpu_high >= 4) {
  391                         /* Ask the processor about the L1 cache. */
  392                         for (i = 0; i < 1; i++) {
  393                                 cpuid_count(4, i, p);
  394                                 threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1;
  395                                 if (hyperthreading_cpus < threads_per_cache)
  396                                         hyperthreading_cpus = threads_per_cache;
  397                                 if ((p[0] & 0x1f) == 0)
  398                                         break;
  399                         }
  400                 }
  401 
  402                 /*
  403                  * If the deterministic cache parameters are not
  404                  * available, or if no caches were reported to exist,
  405                  * just accept what the HTT flag indicated.
  406                  */
  407                 if (hyperthreading_cpus == 0)
  408                         hyperthreading_cpus = logical_cpus;
  409         }
  410 
  411         set_interrupt_apic_ids();
  412 
  413         /* Last, setup the cpu topology now that we have probed CPUs */
  414         mp_topology();
  415 }
  416 
  417 
  418 /*
  419  * Print various information about the SMP system hardware and setup.
  420  */
  421 void
  422 cpu_mp_announce(void)
  423 {
  424         int i, x;
  425 
  426         /* List CPUs */
  427         printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
  428         for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
  429                 if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
  430                         continue;
  431                 if (cpu_info[x].cpu_disabled)
  432                         printf("  cpu (AP): APIC ID: %2d (disabled)\n", x);
  433                 else {
  434                         KASSERT(i < mp_ncpus,
  435                             ("mp_ncpus and actual cpus are out of whack"));
  436                         printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
  437                 }
  438         }
  439 }
  440 
  441 /*
  442  * AP CPU's call this to initialize themselves.
  443  */
  444 void
  445 init_secondary(void)
  446 {
  447         struct pcpu *pc;
  448         u_int64_t msr, cr0;
  449         int cpu, gsel_tss;
  450 
  451         /* Set by the startup code for us to use */
  452         cpu = bootAP;
  453 
  454         /* Init tss */
  455         common_tss[cpu] = common_tss[0];
  456         common_tss[cpu].tss_rsp0 = 0;   /* not used until after switch */
  457         common_tss[cpu].tss_iobase = sizeof(struct amd64tss);
  458         common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE];
  459 
  460         gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
  461         ssdtosyssd(&gdt_segs[GPROC0_SEL],
  462            (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
  463 
  464         lgdt(&r_gdt);                   /* does magic intra-segment return */
  465 
  466         /* Get per-cpu data */
  467         pc = &__pcpu[cpu];
  468 
  469         /* prime data page for it to use */
  470         pcpu_init(pc, cpu, sizeof(struct pcpu));
  471         pc->pc_apic_id = cpu_apic_ids[cpu];
  472         pc->pc_prvspace = pc;
  473         pc->pc_curthread = 0;
  474         pc->pc_tssp = &common_tss[cpu];
  475         pc->pc_rsp0 = 0;
  476 
  477         wrmsr(MSR_FSBASE, 0);           /* User value */
  478         wrmsr(MSR_GSBASE, (u_int64_t)pc);
  479         wrmsr(MSR_KGSBASE, (u_int64_t)pc);      /* XXX User value while we're in the kernel */
  480 
  481         lidt(&r_idt);
  482 
  483         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
  484         ltr(gsel_tss);
  485 
  486         /*
  487          * Set to a known state:
  488          * Set by mpboot.s: CR0_PG, CR0_PE
  489          * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
  490          */
  491         cr0 = rcr0();
  492         cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
  493         load_cr0(cr0);
  494 
  495         /* Set up the fast syscall stuff */
  496         msr = rdmsr(MSR_EFER) | EFER_SCE;
  497         wrmsr(MSR_EFER, msr);
  498         wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
  499         wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
  500         msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
  501               ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
  502         wrmsr(MSR_STAR, msr);
  503         wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
  504 
  505         /* Disable local APIC just to be sure. */
  506         lapic_disable();
  507 
  508         /* signal our startup to the BSP. */
  509         mp_naps++;
  510 
  511         /* Spin until the BSP releases the AP's. */
  512         while (!aps_ready)
  513                 ia32_pause();
  514 
  515         /* Initialize the PAT MSR. */
  516         pmap_init_pat();
  517 
  518         /* set up CPU registers and state */
  519         cpu_setregs();
  520 
  521         /* set up SSE/NX registers */
  522         initializecpu();
  523 
  524         /* set up FPU state on the AP */
  525         fpuinit();
  526 
  527         /* A quick check from sanity claus */
  528         if (PCPU_GET(apic_id) != lapic_id()) {
  529                 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
  530                 printf("SMP: actual apic_id = %d\n", lapic_id());
  531                 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
  532                 panic("cpuid mismatch! boom!!");
  533         }
  534 
  535         /* Initialize curthread. */
  536         KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
  537         PCPU_SET(curthread, PCPU_GET(idlethread));
  538 
  539         mtx_lock_spin(&ap_boot_mtx);
  540 
  541         /* Init local apic for irq's */
  542         lapic_setup(1);
  543 
  544         /* Set memory range attributes for this CPU to match the BSP */
  545         mem_range_AP_init();
  546 
  547         smp_cpus++;
  548 
  549         CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
  550         printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
  551 
  552         /* Determine if we are a logical CPU. */
  553         if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
  554                 logical_cpus_mask |= PCPU_GET(cpumask);
  555         
  556         /* Determine if we are a hyperthread. */
  557         if (hyperthreading_cpus > 1 &&
  558             PCPU_GET(apic_id) % hyperthreading_cpus != 0)
  559                 hyperthreading_cpus_mask |= PCPU_GET(cpumask);
  560 
  561         /* Build our map of 'other' CPUs. */
  562         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  563 
  564         if (bootverbose)
  565                 lapic_dump("AP");
  566 
  567         if (smp_cpus == mp_ncpus) {
  568                 /* enable IPI's, tlb shootdown, freezes etc */
  569                 atomic_store_rel_int(&smp_started, 1);
  570                 smp_active = 1;  /* historic */
  571         }
  572 
  573         /*
  574          * Enable global pages TLB extension
  575          * This also implicitly flushes the TLB 
  576          */
  577 
  578         load_cr4(rcr4() | CR4_PGE);
  579 
  580         mtx_unlock_spin(&ap_boot_mtx);
  581 
  582         /* wait until all the AP's are up */
  583         while (smp_started == 0)
  584                 ia32_pause();
  585 
  586         sched_throw(NULL);
  587 
  588         panic("scheduler returned us to %s", __func__);
  589         /* NOTREACHED */
  590 }
  591 
  592 /*******************************************************************
  593  * local functions and data
  594  */
  595 
  596 /*
  597  * We tell the I/O APIC code about all the CPUs we want to receive
  598  * interrupts.  If we don't want certain CPUs to receive IRQs we
  599  * can simply not tell the I/O APIC code about them in this function.
  600  * We also do not tell it about the BSP since it tells itself about
  601  * the BSP internally to work with UP kernels and on UP machines.
  602  */
  603 static void
  604 set_interrupt_apic_ids(void)
  605 {
  606         u_int i, apic_id;
  607 
  608         for (i = 0; i < MAXCPU; i++) {
  609                 apic_id = cpu_apic_ids[i];
  610                 if (apic_id == -1)
  611                         continue;
  612                 if (cpu_info[apic_id].cpu_bsp)
  613                         continue;
  614                 if (cpu_info[apic_id].cpu_disabled)
  615                         continue;
  616 
  617                 /* Don't let hyperthreads service interrupts. */
  618                 if (hyperthreading_cpus > 1 &&
  619                     apic_id % hyperthreading_cpus != 0)
  620                         continue;
  621 
  622                 intr_add_cpu(i);
  623         }
  624 }
  625 
  626 /*
  627  * Assign logical CPU IDs to local APICs.
  628  */
  629 static void
  630 assign_cpu_ids(void)
  631 {
  632         u_int i;
  633 
  634         /* Check for explicitly disabled CPUs. */
  635         for (i = 0; i <= MAX_APIC_ID; i++) {
  636                 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
  637                         continue;
  638 
  639                 /* Don't use this CPU if it has been disabled by a tunable. */
  640                 if (resource_disabled("lapic", i)) {
  641                         cpu_info[i].cpu_disabled = 1;
  642                         continue;
  643                 }
  644         }
  645 
  646         /*
  647          * Assign CPU IDs to local APIC IDs and disable any CPUs
  648          * beyond MAXCPU.  CPU 0 has already been assigned to the BSP,
  649          * so we only have to assign IDs for APs.
  650          */
  651         mp_ncpus = 1;
  652         for (i = 0; i <= MAX_APIC_ID; i++) {
  653                 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
  654                     cpu_info[i].cpu_disabled)
  655                         continue;
  656 
  657                 if (mp_ncpus < MAXCPU) {
  658                         cpu_apic_ids[mp_ncpus] = i;
  659                         mp_ncpus++;
  660                 } else
  661                         cpu_info[i].cpu_disabled = 1;
  662         }
  663         KASSERT(mp_maxid >= mp_ncpus - 1,
  664             ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
  665             mp_ncpus));         
  666 }
  667 
  668 /*
  669  * start each AP in our list
  670  */
  671 static int
  672 start_all_aps(void)
  673 {
  674         vm_offset_t va = boot_address + KERNBASE;
  675         u_int64_t *pt4, *pt3, *pt2;
  676         u_int32_t mpbioswarmvec;
  677         int apic_id, cpu, i;
  678         u_char mpbiosreason;
  679 
  680         mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
  681 
  682         /* install the AP 1st level boot code */
  683         pmap_kenter(va, boot_address);
  684         pmap_invalidate_page(kernel_pmap, va);
  685         bcopy(mptramp_start, (void *)va, bootMP_size);
  686 
  687         /* Locate the page tables, they'll be below the trampoline */
  688         pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE);
  689         pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
  690         pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
  691 
  692         /* Create the initial 1GB replicated page tables */
  693         for (i = 0; i < 512; i++) {
  694                 /* Each slot of the level 4 pages points to the same level 3 page */
  695                 pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
  696                 pt4[i] |= PG_V | PG_RW | PG_U;
  697 
  698                 /* Each slot of the level 3 pages points to the same level 2 page */
  699                 pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
  700                 pt3[i] |= PG_V | PG_RW | PG_U;
  701 
  702                 /* The level 2 page slots are mapped with 2MB pages for 1GB. */
  703                 pt2[i] = i * (2 * 1024 * 1024);
  704                 pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
  705         }
  706 
  707         /* save the current value of the warm-start vector */
  708         mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
  709         outb(CMOS_REG, BIOS_RESET);
  710         mpbiosreason = inb(CMOS_DATA);
  711 
  712         /* setup a vector to our boot code */
  713         *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
  714         *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
  715         outb(CMOS_REG, BIOS_RESET);
  716         outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
  717 
  718         /* start each AP */
  719         for (cpu = 1; cpu < mp_ncpus; cpu++) {
  720                 apic_id = cpu_apic_ids[cpu];
  721 
  722                 /* allocate and set up an idle stack data page */
  723                 bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
  724                 doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE);
  725 
  726                 bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8;
  727                 bootAP = cpu;
  728 
  729                 /* attempt to start the Application Processor */
  730                 if (!start_ap(apic_id)) {
  731                         /* restore the warmstart vector */
  732                         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  733                         panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
  734                 }
  735 
  736                 all_cpus |= (1 << cpu);         /* record AP in CPU map */
  737         }
  738 
  739         /* build our map of 'other' CPUs */
  740         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  741 
  742         /* restore the warmstart vector */
  743         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  744 
  745         outb(CMOS_REG, BIOS_RESET);
  746         outb(CMOS_DATA, mpbiosreason);
  747 
  748         /* number of APs actually started */
  749         return mp_naps;
  750 }
  751 
  752 
  753 /*
  754  * This function starts the AP (application processor) identified
  755  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  756  * to accomplish this.  This is necessary because of the nuances
  757  * of the different hardware we might encounter.  It isn't pretty,
  758  * but it seems to work.
  759  */
  760 static int
  761 start_ap(int apic_id)
  762 {
  763         int vector, ms;
  764         int cpus;
  765 
  766         /* calculate the vector */
  767         vector = (boot_address >> 12) & 0xff;
  768 
  769         /* used as a watchpoint to signal AP startup */
  770         cpus = mp_naps;
  771 
  772         /*
  773          * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
  774          * and running the target CPU. OR this INIT IPI might be latched (P5
  775          * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
  776          * ignored.
  777          */
  778 
  779         /* do an INIT IPI: assert RESET */
  780         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  781             APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
  782 
  783         /* wait for pending status end */
  784         lapic_ipi_wait(-1);
  785 
  786         /* do an INIT IPI: deassert RESET */
  787         lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
  788             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
  789 
  790         /* wait for pending status end */
  791         DELAY(10000);           /* wait ~10mS */
  792         lapic_ipi_wait(-1);
  793 
  794         /*
  795          * next we do a STARTUP IPI: the previous INIT IPI might still be
  796          * latched, (P5 bug) this 1st STARTUP would then terminate
  797          * immediately, and the previously started INIT IPI would continue. OR
  798          * the previous INIT IPI has already run. and this STARTUP IPI will
  799          * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
  800          * will run.
  801          */
  802 
  803         /* do a STARTUP IPI */
  804         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  805             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  806             vector, apic_id);
  807         lapic_ipi_wait(-1);
  808         DELAY(200);             /* wait ~200uS */
  809 
  810         /*
  811          * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
  812          * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
  813          * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
  814          * recognized after hardware RESET or INIT IPI.
  815          */
  816 
  817         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  818             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  819             vector, apic_id);
  820         lapic_ipi_wait(-1);
  821         DELAY(200);             /* wait ~200uS */
  822 
  823         /* Wait up to 5 seconds for it to start. */
  824         for (ms = 0; ms < 5000; ms++) {
  825                 if (mp_naps > cpus)
  826                         return 1;       /* return SUCCESS */
  827                 DELAY(1000);
  828         }
  829         return 0;               /* return FAILURE */
  830 }
  831 
  832 /*
  833  * Flush the TLB on all other CPU's
  834  */
  835 static void
  836 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
  837 {
  838         u_int ncpu;
  839 
  840         ncpu = mp_ncpus - 1;    /* does not shootdown self */
  841         if (ncpu < 1)
  842                 return;         /* no other cpus */
  843         if (!(read_rflags() & PSL_I))
  844                 panic("%s: interrupts disabled", __func__);
  845         mtx_lock_spin(&smp_ipi_mtx);
  846         smp_tlb_addr1 = addr1;
  847         smp_tlb_addr2 = addr2;
  848         atomic_store_rel_int(&smp_tlb_wait, 0);
  849         ipi_all_but_self(vector);
  850         while (smp_tlb_wait < ncpu)
  851                 ia32_pause();
  852         mtx_unlock_spin(&smp_ipi_mtx);
  853 }
  854 
  855 static void
  856 smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
  857 {
  858         int ncpu, othercpus;
  859 
  860         othercpus = mp_ncpus - 1;
  861         if (mask == (u_int)-1) {
  862                 ncpu = othercpus;
  863                 if (ncpu < 1)
  864                         return;
  865         } else {
  866                 mask &= ~PCPU_GET(cpumask);
  867                 if (mask == 0)
  868                         return;
  869                 ncpu = bitcount32(mask);
  870                 if (ncpu > othercpus) {
  871                         /* XXX this should be a panic offence */
  872                         printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
  873                             ncpu, othercpus);
  874                         ncpu = othercpus;
  875                 }
  876                 /* XXX should be a panic, implied by mask == 0 above */
  877                 if (ncpu < 1)
  878                         return;
  879         }
  880         if (!(read_rflags() & PSL_I))
  881                 panic("%s: interrupts disabled", __func__);
  882         mtx_lock_spin(&smp_ipi_mtx);
  883         smp_tlb_addr1 = addr1;
  884         smp_tlb_addr2 = addr2;
  885         atomic_store_rel_int(&smp_tlb_wait, 0);
  886         if (mask == (u_int)-1)
  887                 ipi_all_but_self(vector);
  888         else
  889                 ipi_selected(mask, vector);
  890         while (smp_tlb_wait < ncpu)
  891                 ia32_pause();
  892         mtx_unlock_spin(&smp_ipi_mtx);
  893 }
  894 
  895 void
  896 smp_cache_flush(void)
  897 {
  898 
  899         if (smp_started)
  900                 smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
  901 }
  902 
  903 void
  904 smp_invltlb(void)
  905 {
  906 
  907         if (smp_started) {
  908                 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
  909         }
  910 }
  911 
  912 void
  913 smp_invlpg(vm_offset_t addr)
  914 {
  915 
  916         if (smp_started)
  917                 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
  918 }
  919 
  920 void
  921 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
  922 {
  923 
  924         if (smp_started) {
  925                 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
  926         }
  927 }
  928 
  929 void
  930 smp_masked_invltlb(u_int mask)
  931 {
  932 
  933         if (smp_started) {
  934                 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
  935         }
  936 }
  937 
  938 void
  939 smp_masked_invlpg(u_int mask, vm_offset_t addr)
  940 {
  941 
  942         if (smp_started) {
  943                 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
  944         }
  945 }
  946 
  947 void
  948 smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
  949 {
  950 
  951         if (smp_started) {
  952                 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
  953         }
  954 }
  955 
  956 void
  957 ipi_bitmap_handler(struct trapframe frame)
  958 {
  959         int cpu = PCPU_GET(cpuid);
  960         u_int ipi_bitmap;
  961 
  962         ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
  963 
  964         if (ipi_bitmap & (1 << IPI_PREEMPT)) {
  965                 struct thread *running_thread = curthread;
  966                 thread_lock(running_thread);
  967                 if (running_thread->td_critnest > 1) 
  968                         running_thread->td_owepreempt = 1;
  969                 else            
  970                         mi_switch(SW_INVOL | SW_PREEMPT, NULL);
  971                 thread_unlock(running_thread);
  972         }
  973 
  974         /* Nothing to do for AST */
  975 }
  976 
  977 /*
  978  * send an IPI to a set of cpus.
  979  */
  980 void
  981 ipi_selected(u_int32_t cpus, u_int ipi)
  982 {
  983         int cpu;
  984         u_int bitmap = 0;
  985         u_int old_pending;
  986         u_int new_pending;
  987 
  988         if (IPI_IS_BITMAPED(ipi)) { 
  989                 bitmap = 1 << ipi;
  990                 ipi = IPI_BITMAP_VECTOR;
  991         }
  992 
  993 #ifdef STOP_NMI
  994         if (ipi == IPI_STOP && stop_cpus_with_nmi) {
  995                 ipi_nmi_selected(cpus);
  996                 return;
  997         }
  998 #endif
  999         CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
 1000         while ((cpu = ffs(cpus)) != 0) {
 1001                 cpu--;
 1002                 cpus &= ~(1 << cpu);
 1003 
 1004                 KASSERT(cpu_apic_ids[cpu] != -1,
 1005                     ("IPI to non-existent CPU %d", cpu));
 1006 
 1007                 if (bitmap) {
 1008                         do {
 1009                                 old_pending = cpu_ipi_pending[cpu];
 1010                                 new_pending = old_pending | bitmap;
 1011                         } while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending));  
 1012 
 1013                         if (old_pending)
 1014                                 continue;
 1015                 }
 1016 
 1017                 lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
 1018         }
 1019 
 1020 }
 1021 
 1022 /*
 1023  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
 1024  */
 1025 void
 1026 ipi_all(u_int ipi)
 1027 {
 1028 
 1029         if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
 1030                 ipi_selected(all_cpus, ipi);
 1031                 return;
 1032         }
 1033         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1034         lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
 1035 }
 1036 
 1037 /*
 1038  * send an IPI to all CPUs EXCEPT myself
 1039  */
 1040 void
 1041 ipi_all_but_self(u_int ipi)
 1042 {
 1043 
 1044         if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
 1045                 ipi_selected(PCPU_GET(other_cpus), ipi);
 1046                 return;
 1047         }
 1048         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1049         lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 1050 }
 1051 
 1052 /*
 1053  * send an IPI to myself
 1054  */
 1055 void
 1056 ipi_self(u_int ipi)
 1057 {
 1058 
 1059         if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
 1060                 ipi_selected(PCPU_GET(cpumask), ipi);
 1061                 return;
 1062         }
 1063         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1064         lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
 1065 }
 1066 
 1067 #ifdef STOP_NMI
 1068 /*
 1069  * send NMI IPI to selected CPUs
 1070  */
 1071 
 1072 #define BEFORE_SPIN     1000000
 1073 
 1074 void
 1075 ipi_nmi_selected(u_int32_t cpus)
 1076 {
 1077         int cpu;
 1078         register_t icrlo;
 1079 
 1080         icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 
 1081                 | APIC_TRIGMOD_EDGE; 
 1082         
 1083         CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus);
 1084 
 1085         atomic_set_int(&ipi_nmi_pending, cpus);
 1086 
 1087         while ((cpu = ffs(cpus)) != 0) {
 1088                 cpu--;
 1089                 cpus &= ~(1 << cpu);
 1090 
 1091                 KASSERT(cpu_apic_ids[cpu] != -1,
 1092                     ("IPI NMI to non-existent CPU %d", cpu));
 1093                 
 1094                 /* Wait for an earlier IPI to finish. */
 1095                 if (!lapic_ipi_wait(BEFORE_SPIN))
 1096                         panic("ipi_nmi_selected: previous IPI has not cleared");
 1097 
 1098                 lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]);
 1099         }
 1100 }
 1101 
 1102 int
 1103 ipi_nmi_handler(void)
 1104 {
 1105         int cpumask = PCPU_GET(cpumask);
 1106 
 1107         if (!(ipi_nmi_pending & cpumask))
 1108                 return 1;
 1109 
 1110         atomic_clear_int(&ipi_nmi_pending, cpumask);
 1111         cpustop_handler();
 1112         return 0;
 1113 }
 1114      
 1115 #endif /* STOP_NMI */
 1116 
 1117 /*
 1118  * Handle an IPI_STOP by saving our current context and spinning until we
 1119  * are resumed.
 1120  */
 1121 void
 1122 cpustop_handler(void)
 1123 {
 1124         int cpu = PCPU_GET(cpuid);
 1125         int cpumask = PCPU_GET(cpumask);
 1126 
 1127         savectx(&stoppcbs[cpu]);
 1128 
 1129         /* Indicate that we are stopped */
 1130         atomic_set_int(&stopped_cpus, cpumask);
 1131 
 1132         /* Wait for restart */
 1133         while (!(started_cpus & cpumask))
 1134             ia32_pause();
 1135 
 1136         atomic_clear_int(&started_cpus, cpumask);
 1137         atomic_clear_int(&stopped_cpus, cpumask);
 1138 
 1139         if (cpu == 0 && cpustop_restartfunc != NULL) {
 1140                 cpustop_restartfunc();
 1141                 cpustop_restartfunc = NULL;
 1142         }
 1143 }
 1144 
 1145 /*
 1146  * This is called once the rest of the system is up and running and we're
 1147  * ready to let the AP's out of the pen.
 1148  */
 1149 static void
 1150 release_aps(void *dummy __unused)
 1151 {
 1152 
 1153         if (mp_ncpus == 1) 
 1154                 return;
 1155         atomic_store_rel_int(&aps_ready, 1);
 1156         while (smp_started == 0)
 1157                 ia32_pause();
 1158 }
 1159 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 1160 
 1161 static int
 1162 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
 1163 {
 1164         u_int mask;
 1165         int error;
 1166 
 1167         mask = hlt_cpus_mask;
 1168         error = sysctl_handle_int(oidp, &mask, 0, req);
 1169         if (error || !req->newptr)
 1170                 return (error);
 1171 
 1172         if (logical_cpus_mask != 0 &&
 1173             (mask & logical_cpus_mask) == logical_cpus_mask)
 1174                 hlt_logical_cpus = 1;
 1175         else
 1176                 hlt_logical_cpus = 0;
 1177 
 1178         if (! hyperthreading_allowed)
 1179                 mask |= hyperthreading_cpus_mask;
 1180 
 1181         if ((mask & all_cpus) == all_cpus)
 1182                 mask &= ~(1<<0);
 1183         hlt_cpus_mask = mask;
 1184         return (error);
 1185 }
 1186 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
 1187     0, 0, sysctl_hlt_cpus, "IU",
 1188     "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
 1189 
 1190 static int
 1191 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 1192 {
 1193         int disable, error;
 1194 
 1195         disable = hlt_logical_cpus;
 1196         error = sysctl_handle_int(oidp, &disable, 0, req);
 1197         if (error || !req->newptr)
 1198                 return (error);
 1199 
 1200         if (disable)
 1201                 hlt_cpus_mask |= logical_cpus_mask;
 1202         else
 1203                 hlt_cpus_mask &= ~logical_cpus_mask;
 1204 
 1205         if (! hyperthreading_allowed)
 1206                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1207 
 1208         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1209                 hlt_cpus_mask &= ~(1<<0);
 1210 
 1211         hlt_logical_cpus = disable;
 1212         return (error);
 1213 }
 1214 
 1215 static int
 1216 sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
 1217 {
 1218         int allowed, error;
 1219 
 1220         allowed = hyperthreading_allowed;
 1221         error = sysctl_handle_int(oidp, &allowed, 0, req);
 1222         if (error || !req->newptr)
 1223                 return (error);
 1224 
 1225         if (allowed)
 1226                 hlt_cpus_mask &= ~hyperthreading_cpus_mask;
 1227         else
 1228                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1229 
 1230         if (logical_cpus_mask != 0 &&
 1231             (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
 1232                 hlt_logical_cpus = 1;
 1233         else
 1234                 hlt_logical_cpus = 0;
 1235 
 1236         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1237                 hlt_cpus_mask &= ~(1<<0);
 1238 
 1239         hyperthreading_allowed = allowed;
 1240         return (error);
 1241 }
 1242 
 1243 static void
 1244 cpu_hlt_setup(void *dummy __unused)
 1245 {
 1246 
 1247         if (logical_cpus_mask != 0) {
 1248                 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 1249                     &hlt_logical_cpus);
 1250                 sysctl_ctx_init(&logical_cpu_clist);
 1251                 SYSCTL_ADD_PROC(&logical_cpu_clist,
 1252                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1253                     "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 1254                     sysctl_hlt_logical_cpus, "IU", "");
 1255                 SYSCTL_ADD_UINT(&logical_cpu_clist,
 1256                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1257                     "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
 1258                     &logical_cpus_mask, 0, "");
 1259 
 1260                 if (hlt_logical_cpus)
 1261                         hlt_cpus_mask |= logical_cpus_mask;
 1262 
 1263                 /*
 1264                  * If necessary for security purposes, force
 1265                  * hyperthreading off, regardless of the value
 1266                  * of hlt_logical_cpus.
 1267                  */
 1268                 if (hyperthreading_cpus_mask) {
 1269                         TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
 1270                             &hyperthreading_allowed);
 1271                         SYSCTL_ADD_PROC(&logical_cpu_clist,
 1272                             SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1273                             "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
 1274                             0, 0, sysctl_hyperthreading_allowed, "IU", "");
 1275                         if (! hyperthreading_allowed)
 1276                                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1277                 }
 1278         }
 1279 }
 1280 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 1281 
 1282 int
 1283 mp_grab_cpu_hlt(void)
 1284 {
 1285         u_int mask = PCPU_GET(cpumask);
 1286 #ifdef MP_WATCHDOG
 1287         u_int cpuid = PCPU_GET(cpuid);
 1288 #endif
 1289         int retval;
 1290 
 1291 #ifdef MP_WATCHDOG
 1292         ap_watchdog(cpuid);
 1293 #endif
 1294 
 1295         retval = mask & hlt_cpus_mask;
 1296         while (mask & hlt_cpus_mask)
 1297                 __asm __volatile("sti; hlt" : : : "memory");
 1298         return (retval);
 1299 }

Cache object: 261830f906d0e9d25488f6df6a61dc15


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.