The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/mp_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1996, by Steve Passe
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. The name of the developer may NOT be used to endorse or promote products
   11  *    derived from this software without specific prior written permission.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  */
   25 
   26 #include <sys/cdefs.h>
   27 __FBSDID("$FreeBSD: releng/5.3/sys/i386/i386/mp_machdep.c 146167 2005-05-13 00:02:47Z nectar $");
   28 
   29 #include "opt_apic.h"
   30 #include "opt_cpu.h"
   31 #include "opt_kstack_pages.h"
   32 #include "opt_mp_watchdog.h"
   33 
   34 #if !defined(lint)
   35 #if !defined(SMP)
   36 #error How did you get here?
   37 #endif
   38 
   39 #if defined(I386_CPU) && !defined(COMPILING_LINT)
   40 #error SMP not supported with I386_CPU
   41 #endif
   42 #ifndef DEV_APIC
   43 #error The apic device is required for SMP, add "device apic" to your config file.
   44 #endif
   45 #if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
   46 #error SMP not supported with CPU_DISABLE_CMPXCHG
   47 #endif
   48 #endif /* not lint */
   49 
   50 #include <sys/param.h>
   51 #include <sys/systm.h>
   52 #include <sys/bus.h>
   53 #include <sys/cons.h>   /* cngetc() */
   54 #ifdef GPROF 
   55 #include <sys/gmon.h>
   56 #endif
   57 #include <sys/kernel.h>
   58 #include <sys/ktr.h>
   59 #include <sys/lock.h>
   60 #include <sys/malloc.h>
   61 #include <sys/memrange.h>
   62 #include <sys/mutex.h>
   63 #include <sys/pcpu.h>
   64 #include <sys/proc.h>
   65 #include <sys/smp.h>
   66 #include <sys/sysctl.h>
   67 
   68 #include <vm/vm.h>
   69 #include <vm/vm_param.h>
   70 #include <vm/pmap.h>
   71 #include <vm/vm_kern.h>
   72 #include <vm/vm_extern.h>
   73 
   74 #include <machine/apicreg.h>
   75 #include <machine/clock.h>
   76 #include <machine/md_var.h>
   77 #include <machine/mp_watchdog.h>
   78 #include <machine/pcb.h>
   79 #include <machine/smp.h>
   80 #include <machine/smptests.h>   /** COUNT_XINVLTLB_HITS */
   81 #include <machine/specialreg.h>
   82 #include <machine/privatespace.h>
   83 
   84 #define WARMBOOT_TARGET         0
   85 #define WARMBOOT_OFF            (KERNBASE + 0x0467)
   86 #define WARMBOOT_SEG            (KERNBASE + 0x0469)
   87 
   88 #define CMOS_REG                (0x70)
   89 #define CMOS_DATA               (0x71)
   90 #define BIOS_RESET              (0x0f)
   91 #define BIOS_WARM               (0x0a)
   92 
   93 /*
   94  * this code MUST be enabled here and in mpboot.s.
   95  * it follows the very early stages of AP boot by placing values in CMOS ram.
   96  * it NORMALLY will never be needed and thus the primitive method for enabling.
   97  *
   98 #define CHECK_POINTS
   99  */
  100 
  101 #if defined(CHECK_POINTS) && !defined(PC98)
  102 #define CHECK_READ(A)    (outb(CMOS_REG, (A)), inb(CMOS_DATA))
  103 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
  104 
  105 #define CHECK_INIT(D);                          \
  106         CHECK_WRITE(0x34, (D));                 \
  107         CHECK_WRITE(0x35, (D));                 \
  108         CHECK_WRITE(0x36, (D));                 \
  109         CHECK_WRITE(0x37, (D));                 \
  110         CHECK_WRITE(0x38, (D));                 \
  111         CHECK_WRITE(0x39, (D));
  112 
  113 #define CHECK_PRINT(S);                         \
  114         printf("%s: %d, %d, %d, %d, %d, %d\n",  \
  115            (S),                                 \
  116            CHECK_READ(0x34),                    \
  117            CHECK_READ(0x35),                    \
  118            CHECK_READ(0x36),                    \
  119            CHECK_READ(0x37),                    \
  120            CHECK_READ(0x38),                    \
  121            CHECK_READ(0x39));
  122 
  123 #else                           /* CHECK_POINTS */
  124 
  125 #define CHECK_INIT(D)
  126 #define CHECK_PRINT(S)
  127 #define CHECK_WRITE(A, D)
  128 
  129 #endif                          /* CHECK_POINTS */
  130 
  131 /*
  132  * Values to send to the POST hardware.
  133  */
  134 #define MP_BOOTADDRESS_POST     0x10
  135 #define MP_PROBE_POST           0x11
  136 #define MPTABLE_PASS1_POST      0x12
  137 
  138 #define MP_START_POST           0x13
  139 #define MP_ENABLE_POST          0x14
  140 #define MPTABLE_PASS2_POST      0x15
  141 
  142 #define START_ALL_APS_POST      0x16
  143 #define INSTALL_AP_TRAMP_POST   0x17
  144 #define START_AP_POST           0x18
  145 
  146 #define MP_ANNOUNCE_POST        0x19
  147 
  148 /* lock region used by kernel profiling */
  149 int     mcount_lock;
  150 
  151 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
  152 int     current_postcode;
  153 
  154 int     mp_naps;                /* # of Applications processors */
  155 int     boot_cpu_id = -1;       /* designated BSP */
  156 extern  int nkpt;
  157 
  158 /*
  159  * CPU topology map datastructures for HTT.
  160  */
  161 static struct cpu_group mp_groups[MAXCPU];
  162 static struct cpu_top mp_top;
  163 
  164 /* AP uses this during bootstrap.  Do not staticize.  */
  165 char *bootSTK;
  166 static int bootAP;
  167 
  168 /* Hotwire a 0->4MB V==P mapping */
  169 extern pt_entry_t *KPTphys;
  170 
  171 /* SMP page table page */
  172 extern pt_entry_t *SMPpt;
  173 
  174 struct pcb stoppcbs[MAXCPU];
  175 
  176 /* Variables needed for SMP tlb shootdown. */
  177 vm_offset_t smp_tlb_addr1;
  178 vm_offset_t smp_tlb_addr2;
  179 volatile int smp_tlb_wait;
  180 
  181 /*
  182  * Local data and functions.
  183  */
  184 
  185 static u_int logical_cpus;
  186 
  187 /* used to hold the AP's until we are ready to release them */
  188 static struct mtx ap_boot_mtx;
  189 
  190 /* Set to 1 once we're ready to let the APs out of the pen. */
  191 static volatile int aps_ready = 0;
  192 
  193 /*
  194  * Store data from cpu_add() until later in the boot when we actually setup
  195  * the APs.
  196  */
  197 struct cpu_info {
  198         int     cpu_present:1;
  199         int     cpu_bsp:1;
  200 } static cpu_info[MAXCPU];
  201 static int cpu_apic_ids[MAXCPU];
  202 
  203 /* Holds pending bitmap based IPIs per CPU */
  204 static volatile u_int cpu_ipi_pending[MAXCPU];
  205 
  206 static u_int boot_address;
  207 
  208 static void     set_logical_apic_ids(void);
  209 static int      start_all_aps(void);
  210 static void     install_ap_tramp(void);
  211 static int      start_ap(int apic_id);
  212 static void     release_aps(void *dummy);
  213 
  214 static int      hlt_logical_cpus;
  215 static u_int    hyperthreading_cpus;
  216 static cpumask_t        hyperthreading_cpus_mask;
  217 static int      hyperthreading_allowed;
  218 static struct   sysctl_ctx_list logical_cpu_clist;
  219 
  220 static void
  221 mem_range_AP_init(void)
  222 {
  223         if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
  224                 mem_range_softc.mr_op->initAP(&mem_range_softc);
  225 }
  226 
  227 void
  228 mp_topology(void)
  229 {
  230         struct cpu_group *group;
  231         int logical_cpus;
  232         int apic_id;
  233         int groups;
  234         int cpu;
  235 
  236         /* Build the smp_topology map. */
  237         /* Nothing to do if there is no HTT support. */
  238         if ((cpu_feature & CPUID_HTT) == 0)
  239                 return;
  240         logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
  241         if (logical_cpus <= 1)
  242                 return;
  243         group = &mp_groups[0];
  244         groups = 1;
  245         for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
  246                 if (!cpu_info[apic_id].cpu_present)
  247                         continue;
  248                 /*
  249                  * If the current group has members and we're not a logical
  250                  * cpu, create a new group.
  251                  */
  252                 if (group->cg_count != 0 && (apic_id % logical_cpus) == 0) {
  253                         group++;
  254                         groups++;
  255                 }
  256                 group->cg_count++;
  257                 group->cg_mask |= 1 << cpu;
  258                 cpu++;
  259         }
  260 
  261         mp_top.ct_count = groups;
  262         mp_top.ct_group = mp_groups;
  263         smp_topology = &mp_top;
  264 }
  265 
  266 
  267 /*
  268  * Calculate usable address in base memory for AP trampoline code.
  269  */
  270 u_int
  271 mp_bootaddress(u_int basemem)
  272 {
  273         POSTCODE(MP_BOOTADDRESS_POST);
  274 
  275         boot_address = trunc_page(basemem);     /* round down to 4k boundary */
  276         if ((basemem - boot_address) < bootMP_size)
  277                 boot_address -= PAGE_SIZE;      /* not enough, lower by 4k */
  278 
  279         return boot_address;
  280 }
  281 
  282 void
  283 cpu_add(u_int apic_id, char boot_cpu)
  284 {
  285 
  286         if (apic_id >= MAXCPU) {
  287                 printf("SMP: CPU %d exceeds maximum CPU %d, ignoring\n",
  288                     apic_id, MAXCPU - 1);
  289                 return;
  290         }
  291         KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
  292             apic_id));
  293         cpu_info[apic_id].cpu_present = 1;
  294         if (boot_cpu) {
  295                 KASSERT(boot_cpu_id == -1,
  296                     ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
  297                     boot_cpu_id));
  298                 boot_cpu_id = apic_id;
  299                 cpu_info[apic_id].cpu_bsp = 1;
  300         }
  301         mp_ncpus++;
  302         if (bootverbose)
  303                 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
  304                     "AP");
  305         
  306 }
  307 
  308 void
  309 cpu_mp_setmaxid(void)
  310 {
  311 
  312         mp_maxid = MAXCPU - 1;
  313 }
  314 
  315 int
  316 cpu_mp_probe(void)
  317 {
  318 
  319         /*
  320          * Always record BSP in CPU map so that the mbuf init code works
  321          * correctly.
  322          */
  323         all_cpus = 1;
  324         if (mp_ncpus == 0) {
  325                 /*
  326                  * No CPUs were found, so this must be a UP system.  Setup
  327                  * the variables to represent a system with a single CPU
  328                  * with an id of 0.
  329                  */
  330                 mp_ncpus = 1;
  331                 return (0);
  332         }
  333 
  334         /* At least one CPU was found. */
  335         if (mp_ncpus == 1) {
  336                 /*
  337                  * One CPU was found, so this must be a UP system with
  338                  * an I/O APIC.
  339                  */
  340                 return (0);
  341         }
  342 
  343         /* At least two CPUs were found. */
  344         return (1);
  345 }
  346 
  347 /*
  348  * Initialize the IPI handlers and start up the AP's.
  349  */
  350 void
  351 cpu_mp_start(void)
  352 {
  353         int i;
  354         u_int threads_per_cache, p[4];
  355 
  356         POSTCODE(MP_START_POST);
  357 
  358         /* Initialize the logical ID to APIC ID table. */
  359         for (i = 0; i < MAXCPU; i++) {
  360                 cpu_apic_ids[i] = -1;
  361                 cpu_ipi_pending[i] = 0;
  362         }
  363 
  364         /* Install an inter-CPU IPI for TLB invalidation */
  365         setidt(IPI_INVLTLB, IDTVEC(invltlb),
  366                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  367         setidt(IPI_INVLPG, IDTVEC(invlpg),
  368                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  369         setidt(IPI_INVLRNG, IDTVEC(invlrng),
  370                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  371         
  372         /* Install an inter-CPU IPI for lazy pmap release */
  373         setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
  374                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  375 
  376         /* Install an inter-CPU IPI for all-CPU rendezvous */
  377         setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
  378                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  379 
  380         /* Install generic inter-CPU IPI handler */
  381         setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
  382                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  383 
  384         /* Install an inter-CPU IPI for CPU stop/restart */
  385         setidt(IPI_STOP, IDTVEC(cpustop),
  386                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  387 
  388 
  389         /* Set boot_cpu_id if needed. */
  390         if (boot_cpu_id == -1) {
  391                 boot_cpu_id = PCPU_GET(apic_id);
  392                 cpu_info[boot_cpu_id].cpu_bsp = 1;
  393         } else
  394                 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
  395                     ("BSP's APIC ID doesn't match boot_cpu_id"));
  396         cpu_apic_ids[0] = boot_cpu_id;
  397 
  398         /* Start each Application Processor */
  399         start_all_aps();
  400 
  401         /* Setup the initial logical CPUs info. */
  402         logical_cpus = logical_cpus_mask = 0;
  403         if (cpu_feature & CPUID_HTT)
  404                 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
  405 
  406         /*
  407          * Work out if hyperthreading is *really* enabled.  This
  408          * is made really ugly by the fact that processors lie: Dual
  409          * core processors claim to be hyperthreaded even when they're
  410          * not, presumably because they want to be treated the same
  411          * way as HTT with respect to per-cpu software licensing.
  412          * At the time of writing (May 12, 2005) the only hyperthreaded
  413          * cpus are from Intel, and Intel's dual-core processors can be
  414          * identified via the "deterministic cache parameters" cpuid
  415          * calls.
  416          */
  417         /*
  418          * First determine if this is an Intel processor which claims
  419          * to have hyperthreading support.
  420          */
  421         if ((cpu_feature & CPUID_HTT) &&
  422             (strcmp(cpu_vendor, "GenuineIntel") == 0)) {
  423                 /*
  424                  * If the "deterministic cache parameters" cpuid calls
  425                  * are available, use them.
  426                  */
  427                 if (cpu_high >= 4) {
  428                         /* Ask the processor about up to 32 caches. */
  429                         for (i = 0; i < 32; i++) {
  430                                 cpuid_count(4, i, p);
  431                                 threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1;
  432                                 if (hyperthreading_cpus < threads_per_cache)
  433                                         hyperthreading_cpus = threads_per_cache;
  434                                 if ((p[0] & 0x1f) == 0)
  435                                         break;
  436                         }
  437                 }
  438 
  439                 /*
  440                  * If the deterministic cache parameters are not
  441                  * available, or if no caches were reported to exist,
  442                  * just accept what the HTT flag indicated.
  443                  */
  444                 if (hyperthreading_cpus == 0)
  445                         hyperthreading_cpus = logical_cpus;
  446         }
  447 
  448         set_logical_apic_ids();
  449 }
  450 
  451 
  452 /*
  453  * Print various information about the SMP system hardware and setup.
  454  */
  455 void
  456 cpu_mp_announce(void)
  457 {
  458         int i, x;
  459 
  460         POSTCODE(MP_ANNOUNCE_POST);
  461 
  462         /* List CPUs */
  463         printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
  464         for (i = 1, x = 0; x < MAXCPU; x++) {
  465                 if (cpu_info[x].cpu_present && !cpu_info[x].cpu_bsp) {
  466                         KASSERT(i < mp_ncpus,
  467                             ("mp_ncpus and actual cpus are out of whack"));
  468                         printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
  469                 }
  470         }
  471 }
  472 
  473 /*
  474  * AP CPU's call this to initialize themselves.
  475  */
  476 void
  477 init_secondary(void)
  478 {
  479         int     gsel_tss;
  480         int     x, myid;
  481         u_int   cr0;
  482 
  483         /* bootAP is set in start_ap() to our ID. */
  484         myid = bootAP;
  485         gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
  486         gdt_segs[GPROC0_SEL].ssd_base =
  487                 (int) &SMP_prvspace[myid].pcpu.pc_common_tss;
  488         SMP_prvspace[myid].pcpu.pc_prvspace =
  489                 &SMP_prvspace[myid].pcpu;
  490 
  491         for (x = 0; x < NGDT; x++) {
  492                 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
  493         }
  494 
  495         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
  496         r_gdt.rd_base = (int) &gdt[myid * NGDT];
  497         lgdt(&r_gdt);                   /* does magic intra-segment return */
  498 
  499         lidt(&r_idt);
  500 
  501         lldt(_default_ldt);
  502         PCPU_SET(currentldt, _default_ldt);
  503 
  504         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
  505         gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
  506         PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
  507         PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
  508         PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
  509         PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
  510         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
  511         ltr(gsel_tss);
  512 
  513         /*
  514          * Set to a known state:
  515          * Set by mpboot.s: CR0_PG, CR0_PE
  516          * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
  517          */
  518         cr0 = rcr0();
  519         cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
  520         load_cr0(cr0);
  521         CHECK_WRITE(0x38, 5);
  522         
  523         /* Disable local APIC just to be sure. */
  524         lapic_disable();
  525 
  526         /* signal our startup to the BSP. */
  527         mp_naps++;
  528         CHECK_WRITE(0x39, 6);
  529 
  530         /* Spin until the BSP releases the AP's. */
  531         while (!aps_ready)
  532                 ia32_pause();
  533 
  534         /* BSP may have changed PTD while we were waiting */
  535         invltlb();
  536         pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
  537 
  538 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
  539         lidt(&r_idt);
  540 #endif
  541 
  542         /* set up CPU registers and state */
  543         cpu_setregs();
  544 
  545         /* set up FPU state on the AP */
  546         npxinit(__INITIAL_NPXCW__);
  547 
  548         /* set up SSE registers */
  549         enable_sse();
  550 
  551         /* A quick check from sanity claus */
  552         if (PCPU_GET(apic_id) != lapic_id()) {
  553                 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
  554                 printf("SMP: actual apic_id = %d\n", lapic_id());
  555                 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
  556                 printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]);
  557                 panic("cpuid mismatch! boom!!");
  558         }
  559 
  560         mtx_lock_spin(&ap_boot_mtx);
  561 
  562         /* Init local apic for irq's */
  563         lapic_setup();
  564 
  565         /* Set memory range attributes for this CPU to match the BSP */
  566         mem_range_AP_init();
  567 
  568         smp_cpus++;
  569 
  570         CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
  571         printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
  572 
  573         /* Determine if we are a logical CPU. */
  574         if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
  575                 logical_cpus_mask |= PCPU_GET(cpumask);
  576         
  577         /* Determine if we are a hyperthread. */
  578         if (hyperthreading_cpus > 1 &&
  579             PCPU_GET(apic_id) % hyperthreading_cpus != 0)
  580                 hyperthreading_cpus_mask |= PCPU_GET(cpumask);
  581 
  582         /* Build our map of 'other' CPUs. */
  583         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  584 
  585         if (bootverbose)
  586                 lapic_dump("AP");
  587 
  588         if (smp_cpus == mp_ncpus) {
  589                 /* enable IPI's, tlb shootdown, freezes etc */
  590                 atomic_store_rel_int(&smp_started, 1);
  591                 smp_active = 1;  /* historic */
  592         }
  593 
  594         mtx_unlock_spin(&ap_boot_mtx);
  595 
  596         /* wait until all the AP's are up */
  597         while (smp_started == 0)
  598                 ia32_pause();
  599 
  600         /* ok, now grab sched_lock and enter the scheduler */
  601         mtx_lock_spin(&sched_lock);
  602 
  603         binuptime(PCPU_PTR(switchtime));
  604         PCPU_SET(switchticks, ticks);
  605 
  606         cpu_throw(NULL, choosethread());        /* doesn't return */
  607 
  608         panic("scheduler returned us to %s", __func__);
  609         /* NOTREACHED */
  610 }
  611 
  612 /*******************************************************************
  613  * local functions and data
  614  */
  615 
  616 /*
  617  * Set the APIC logical IDs.
  618  *
  619  * We want to cluster logical CPU's within the same APIC ID cluster.
  620  * Since logical CPU's are aligned simply filling in the clusters in
  621  * APIC ID order works fine.  Note that this does not try to balance
  622  * the number of CPU's in each cluster. (XXX?)
  623  */
  624 static void
  625 set_logical_apic_ids(void)
  626 {
  627         u_int apic_id, cluster, cluster_id;
  628 
  629         /* Force us to allocate cluster 0 at the start. */
  630         cluster = -1;
  631         cluster_id = APIC_MAX_INTRACLUSTER_ID;
  632         for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
  633                 if (!cpu_info[apic_id].cpu_present)
  634                         continue;
  635                 if (cluster_id == APIC_MAX_INTRACLUSTER_ID) {
  636                         cluster = ioapic_next_logical_cluster();
  637                         cluster_id = 0;
  638                 } else
  639                         cluster_id++;
  640                 if (bootverbose)
  641                         printf("APIC ID: physical %u, logical %u:%u\n",
  642                             apic_id, cluster, cluster_id);
  643                 lapic_set_logical_id(apic_id, cluster, cluster_id);
  644         }
  645 }
  646 
  647 /*
  648  * start each AP in our list
  649  */
  650 static int
  651 start_all_aps(void)
  652 {
  653 #ifndef PC98
  654         u_char mpbiosreason;
  655 #endif
  656         u_long mpbioswarmvec;
  657         struct pcpu *pc;
  658         char *stack;
  659         uintptr_t kptbase;
  660         int i, pg, apic_id, cpu;
  661 
  662         POSTCODE(START_ALL_APS_POST);
  663 
  664         mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
  665 
  666         /* install the AP 1st level boot code */
  667         install_ap_tramp();
  668 
  669         /* save the current value of the warm-start vector */
  670         mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
  671 #ifndef PC98
  672         outb(CMOS_REG, BIOS_RESET);
  673         mpbiosreason = inb(CMOS_DATA);
  674 #endif
  675 
  676         /* set up temporary P==V mapping for AP boot */
  677         /* XXX this is a hack, we should boot the AP on its own stack/PTD */
  678         kptbase = (uintptr_t)(void *)KPTphys;
  679         for (i = 0; i < NKPT; i++)
  680                 PTD[i] = (pd_entry_t)(PG_V | PG_RW |
  681                     ((kptbase + i * PAGE_SIZE) & PG_FRAME));
  682         invltlb();
  683 
  684         /* start each AP */
  685         for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
  686                 if (!cpu_info[apic_id].cpu_present ||
  687                     cpu_info[apic_id].cpu_bsp)
  688                         continue;
  689                 cpu++;
  690 
  691                 /* save APIC ID for this logical ID */
  692                 cpu_apic_ids[cpu] = apic_id;
  693 
  694                 /* first page of AP's private space */
  695                 pg = cpu * i386_btop(sizeof(struct privatespace));
  696 
  697                 /* allocate a new private data page */
  698                 pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
  699 
  700                 /* wire it into the private page table page */
  701                 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
  702 
  703                 /* allocate and set up an idle stack data page */
  704                 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
  705                 for (i = 0; i < KSTACK_PAGES; i++)
  706                         SMPpt[pg + 1 + i] = (pt_entry_t)
  707                             (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
  708 
  709                 /* prime data page for it to use */
  710                 pcpu_init(pc, cpu, sizeof(struct pcpu));
  711                 pc->pc_apic_id = apic_id;
  712 
  713                 /* setup a vector to our boot code */
  714                 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
  715                 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
  716 #ifndef PC98
  717                 outb(CMOS_REG, BIOS_RESET);
  718                 outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
  719 #endif
  720 
  721                 bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
  722                     PAGE_SIZE];
  723                 bootAP = cpu;
  724 
  725                 /* attempt to start the Application Processor */
  726                 CHECK_INIT(99); /* setup checkpoints */
  727                 if (!start_ap(apic_id)) {
  728                         printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
  729                         CHECK_PRINT("trace");   /* show checkpoints */
  730                         /* better panic as the AP may be running loose */
  731                         printf("panic y/n? [y] ");
  732                         if (cngetc() != 'n')
  733                                 panic("bye-bye");
  734                 }
  735                 CHECK_PRINT("trace");           /* show checkpoints */
  736 
  737                 all_cpus |= (1 << cpu);         /* record AP in CPU map */
  738         }
  739 
  740         /* build our map of 'other' CPUs */
  741         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  742 
  743         /* restore the warmstart vector */
  744         *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
  745 #ifndef PC98
  746         outb(CMOS_REG, BIOS_RESET);
  747         outb(CMOS_DATA, mpbiosreason);
  748 #endif
  749 
  750         /*
  751          * Set up the idle context for the BSP.  Similar to above except
  752          * that some was done by locore, some by pmap.c and some is implicit
  753          * because the BSP is cpu#0 and the page is initially zero and also
  754          * because we can refer to variables by name on the BSP..
  755          */
  756 
  757         /* Allocate and setup BSP idle stack */
  758         stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
  759         for (i = 0; i < KSTACK_PAGES; i++)
  760                 SMPpt[1 + i] = (pt_entry_t)
  761                     (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
  762 
  763         for (i = 0; i < NKPT; i++)
  764                 PTD[i] = 0;
  765         pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
  766 
  767         /* number of APs actually started */
  768         return mp_naps;
  769 }
  770 
  771 /*
  772  * load the 1st level AP boot code into base memory.
  773  */
  774 
  775 /* targets for relocation */
  776 extern void bigJump(void);
  777 extern void bootCodeSeg(void);
  778 extern void bootDataSeg(void);
  779 extern void MPentry(void);
  780 extern u_int MP_GDT;
  781 extern u_int mp_gdtbase;
  782 
  783 static void
  784 install_ap_tramp(void)
  785 {
  786         int     x;
  787         int     size = *(int *) ((u_long) & bootMP_size);
  788         vm_offset_t va = boot_address + KERNBASE;
  789         u_char *src = (u_char *) ((u_long) bootMP);
  790         u_char *dst = (u_char *) va;
  791         u_int   boot_base = (u_int) bootMP;
  792         u_int8_t *dst8;
  793         u_int16_t *dst16;
  794         u_int32_t *dst32;
  795 
  796         POSTCODE(INSTALL_AP_TRAMP_POST);
  797 
  798         KASSERT (size <= PAGE_SIZE,
  799             ("'size' do not fit into PAGE_SIZE, as expected."));
  800         pmap_kenter(va, boot_address);
  801         pmap_invalidate_page (kernel_pmap, va);
  802         for (x = 0; x < size; ++x)
  803                 *dst++ = *src++;
  804 
  805         /*
  806          * modify addresses in code we just moved to basemem. unfortunately we
  807          * need fairly detailed info about mpboot.s for this to work.  changes
  808          * to mpboot.s might require changes here.
  809          */
  810 
  811         /* boot code is located in KERNEL space */
  812         dst = (u_char *) va;
  813 
  814         /* modify the lgdt arg */
  815         dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
  816         *dst32 = boot_address + ((u_int) & MP_GDT - boot_base);
  817 
  818         /* modify the ljmp target for MPentry() */
  819         dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
  820         *dst32 = ((u_int) MPentry - KERNBASE);
  821 
  822         /* modify the target for boot code segment */
  823         dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
  824         dst8 = (u_int8_t *) (dst16 + 1);
  825         *dst16 = (u_int) boot_address & 0xffff;
  826         *dst8 = ((u_int) boot_address >> 16) & 0xff;
  827 
  828         /* modify the target for boot data segment */
  829         dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
  830         dst8 = (u_int8_t *) (dst16 + 1);
  831         *dst16 = (u_int) boot_address & 0xffff;
  832         *dst8 = ((u_int) boot_address >> 16) & 0xff;
  833 }
  834 
  835 /*
  836  * This function starts the AP (application processor) identified
  837  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  838  * to accomplish this.  This is necessary because of the nuances
  839  * of the different hardware we might encounter.  It isn't pretty,
  840  * but it seems to work.
  841  */
  842 static int
  843 start_ap(int apic_id)
  844 {
  845         int vector, ms;
  846         int cpus;
  847 
  848         POSTCODE(START_AP_POST);
  849 
  850         /* calculate the vector */
  851         vector = (boot_address >> 12) & 0xff;
  852 
  853         /* used as a watchpoint to signal AP startup */
  854         cpus = mp_naps;
  855 
  856         /*
  857          * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
  858          * and running the target CPU. OR this INIT IPI might be latched (P5
  859          * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
  860          * ignored.
  861          */
  862 
  863         /* do an INIT IPI: assert RESET */
  864         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  865             APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
  866 
  867         /* wait for pending status end */
  868         lapic_ipi_wait(-1);
  869 
  870         /* do an INIT IPI: deassert RESET */
  871         lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
  872             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
  873 
  874         /* wait for pending status end */
  875         DELAY(10000);           /* wait ~10mS */
  876         lapic_ipi_wait(-1);
  877 
  878         /*
  879          * next we do a STARTUP IPI: the previous INIT IPI might still be
  880          * latched, (P5 bug) this 1st STARTUP would then terminate
  881          * immediately, and the previously started INIT IPI would continue. OR
  882          * the previous INIT IPI has already run. and this STARTUP IPI will
  883          * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
  884          * will run.
  885          */
  886 
  887         /* do a STARTUP IPI */
  888         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  889             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  890             vector, apic_id);
  891         lapic_ipi_wait(-1);
  892         DELAY(200);             /* wait ~200uS */
  893 
  894         /*
  895          * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
  896          * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
  897          * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
  898          * recognized after hardware RESET or INIT IPI.
  899          */
  900 
  901         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  902             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  903             vector, apic_id);
  904         lapic_ipi_wait(-1);
  905         DELAY(200);             /* wait ~200uS */
  906 
  907         /* Wait up to 5 seconds for it to start. */
  908         for (ms = 0; ms < 5000; ms++) {
  909                 if (mp_naps > cpus)
  910                         return 1;       /* return SUCCESS */
  911                 DELAY(1000);
  912         }
  913         return 0;               /* return FAILURE */
  914 }
  915 
  916 #ifdef COUNT_XINVLTLB_HITS
  917 u_int xhits_gbl[MAXCPU];
  918 u_int xhits_pg[MAXCPU];
  919 u_int xhits_rng[MAXCPU];
  920 SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
  921 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
  922     sizeof(xhits_gbl), "IU", "");
  923 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
  924     sizeof(xhits_pg), "IU", "");
  925 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
  926     sizeof(xhits_rng), "IU", "");
  927 
  928 u_int ipi_global;
  929 u_int ipi_page;
  930 u_int ipi_range;
  931 u_int ipi_range_size;
  932 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
  933 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
  934 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
  935 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
  936     0, "");
  937 
  938 u_int ipi_masked_global;
  939 u_int ipi_masked_page;
  940 u_int ipi_masked_range;
  941 u_int ipi_masked_range_size;
  942 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
  943     &ipi_masked_global, 0, "");
  944 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
  945     &ipi_masked_page, 0, "");
  946 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
  947     &ipi_masked_range, 0, "");
  948 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
  949     &ipi_masked_range_size, 0, "");
  950 #endif /* COUNT_XINVLTLB_HITS */
  951 
  952 /*
  953  * Flush the TLB on all other CPU's
  954  */
  955 static void
  956 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
  957 {
  958         u_int ncpu;
  959 
  960         ncpu = mp_ncpus - 1;    /* does not shootdown self */
  961         if (ncpu < 1)
  962                 return;         /* no other cpus */
  963         mtx_assert(&smp_rv_mtx, MA_OWNED);
  964         smp_tlb_addr1 = addr1;
  965         smp_tlb_addr2 = addr2;
  966         atomic_store_rel_int(&smp_tlb_wait, 0);
  967         ipi_all_but_self(vector);
  968         while (smp_tlb_wait < ncpu)
  969                 ia32_pause();
  970 }
  971 
  972 /*
  973  * This is about as magic as it gets.  fortune(1) has got similar code
  974  * for reversing bits in a word.  Who thinks up this stuff??
  975  *
  976  * Yes, it does appear to be consistently faster than:
  977  * while (i = ffs(m)) {
  978  *      m >>= i;
  979  *      bits++;
  980  * }
  981  * and
  982  * while (lsb = (m & -m)) {     // This is magic too
  983  *      m &= ~lsb;              // or: m ^= lsb
  984  *      bits++;
  985  * }
  986  * Both of these latter forms do some very strange things on gcc-3.1 with
  987  * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
  988  * There is probably an SSE or MMX popcnt instruction.
  989  *
  990  * I wonder if this should be in libkern?
  991  *
  992  * XXX Stop the presses!  Another one:
  993  * static __inline u_int32_t
  994  * popcnt1(u_int32_t v)
  995  * {
  996  *      v -= ((v >> 1) & 0x55555555);
  997  *      v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
  998  *      v = (v + (v >> 4)) & 0x0F0F0F0F;
  999  *      return (v * 0x01010101) >> 24;
 1000  * }
 1001  * The downside is that it has a multiply.  With a pentium3 with
 1002  * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
 1003  * an imull, and in that case it is faster.  In most other cases
 1004  * it appears slightly slower.
 1005  *
 1006  * Another variant (also from fortune):
 1007  * #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
 1008  * #define  BX_(x)     ((x) - (((x)>>1)&0x77777777)            \
 1009  *                          - (((x)>>2)&0x33333333)            \
 1010  *                          - (((x)>>3)&0x11111111))
 1011  */
 1012 static __inline u_int32_t
 1013 popcnt(u_int32_t m)
 1014 {
 1015 
 1016         m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
 1017         m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
 1018         m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
 1019         m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
 1020         m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
 1021         return m;
 1022 }
 1023 
 1024 static void
 1025 smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 1026 {
 1027         int ncpu, othercpus;
 1028 
 1029         othercpus = mp_ncpus - 1;
 1030         if (mask == (u_int)-1) {
 1031                 ncpu = othercpus;
 1032                 if (ncpu < 1)
 1033                         return;
 1034         } else {
 1035                 mask &= ~PCPU_GET(cpumask);
 1036                 if (mask == 0)
 1037                         return;
 1038                 ncpu = popcnt(mask);
 1039                 if (ncpu > othercpus) {
 1040                         /* XXX this should be a panic offence */
 1041                         printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
 1042                             ncpu, othercpus);
 1043                         ncpu = othercpus;
 1044                 }
 1045                 /* XXX should be a panic, implied by mask == 0 above */
 1046                 if (ncpu < 1)
 1047                         return;
 1048         }
 1049         mtx_assert(&smp_rv_mtx, MA_OWNED);
 1050         smp_tlb_addr1 = addr1;
 1051         smp_tlb_addr2 = addr2;
 1052         atomic_store_rel_int(&smp_tlb_wait, 0);
 1053         if (mask == (u_int)-1)
 1054                 ipi_all_but_self(vector);
 1055         else
 1056                 ipi_selected(mask, vector);
 1057         while (smp_tlb_wait < ncpu)
 1058                 ia32_pause();
 1059 }
 1060 
 1061 void
 1062 smp_invltlb(void)
 1063 {
 1064         if (smp_started) {
 1065                 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
 1066 #ifdef COUNT_XINVLTLB_HITS
 1067                 ipi_global++;
 1068 #endif
 1069         }
 1070 }
 1071 
 1072 void
 1073 smp_invlpg(vm_offset_t addr)
 1074 {
 1075         if (smp_started) {
 1076                 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
 1077 #ifdef COUNT_XINVLTLB_HITS
 1078                 ipi_page++;
 1079 #endif
 1080         }
 1081 }
 1082 
 1083 void
 1084 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 1085 {
 1086         if (smp_started) {
 1087                 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
 1088 #ifdef COUNT_XINVLTLB_HITS
 1089                 ipi_range++;
 1090                 ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
 1091 #endif
 1092         }
 1093 }
 1094 
 1095 void
 1096 smp_masked_invltlb(u_int mask)
 1097 {
 1098         if (smp_started) {
 1099                 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
 1100 #ifdef COUNT_XINVLTLB_HITS
 1101                 ipi_masked_global++;
 1102 #endif
 1103         }
 1104 }
 1105 
 1106 void
 1107 smp_masked_invlpg(u_int mask, vm_offset_t addr)
 1108 {
 1109         if (smp_started) {
 1110                 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
 1111 #ifdef COUNT_XINVLTLB_HITS
 1112                 ipi_masked_page++;
 1113 #endif
 1114         }
 1115 }
 1116 
 1117 void
 1118 smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
 1119 {
 1120         if (smp_started) {
 1121                 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
 1122 #ifdef COUNT_XINVLTLB_HITS
 1123                 ipi_masked_range++;
 1124                 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
 1125 #endif
 1126         }
 1127 }
 1128 
 1129 
 1130 /*
 1131  * For statclock, we send an IPI to all CPU's to have them call this
 1132  * function.
 1133  */
 1134 
 1135 void
 1136 forward_statclock(void)
 1137 {
 1138         int map;
 1139 
 1140         CTR0(KTR_SMP, "forward_statclock");
 1141 
 1142         if (!smp_started || cold || panicstr)
 1143                 return;
 1144 
 1145         map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
 1146         if (map != 0)
 1147                 ipi_selected(map, IPI_STATCLOCK);
 1148 }
 1149 
 1150 /*
 1151  * For each hardclock(), we send an IPI to all other CPU's to have them
 1152  * execute this function.  It would be nice to reduce contention on
 1153  * sched_lock if we could simply peek at the CPU to determine the user/kernel
 1154  * state and call hardclock_process() on the CPU receiving the clock interrupt
 1155  * and then just use a simple IPI to handle any ast's if needed.
 1156  */
 1157 
 1158 void 
 1159 forward_hardclock(void)
 1160 {
 1161         u_int map;
 1162 
 1163         CTR0(KTR_SMP, "forward_hardclock");
 1164 
 1165         if (!smp_started || cold || panicstr)
 1166                 return;
 1167 
 1168         map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
 1169         if (map != 0)
 1170                 ipi_selected(map, IPI_HARDCLOCK);
 1171 }
 1172 
 1173 void
 1174 ipi_bitmap_handler(struct clockframe frame)
 1175 {
 1176         int cpu = PCPU_GET(cpuid);
 1177         u_int ipi_bitmap;
 1178         struct thread *td;
 1179 
 1180         ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
 1181 
 1182         critical_enter();
 1183 
 1184         /* Nothing to do for AST */
 1185 
 1186         if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
 1187                 td = curthread; 
 1188                 td->td_intr_nesting_level++;
 1189                 hardclock_process(&frame);
 1190                 td->td_intr_nesting_level--;    
 1191         }
 1192 
 1193         if (ipi_bitmap & (1 << IPI_STATCLOCK)) {
 1194                 CTR0(KTR_SMP, "forwarded_statclock");
 1195 
 1196                 td = curthread;
 1197                 td->td_intr_nesting_level++;
 1198                 if (profprocs != 0)
 1199                         profclock(&frame);
 1200                 if (pscnt == psdiv)
 1201                         statclock(&frame);
 1202                 td->td_intr_nesting_level--;
 1203         }
 1204 
 1205         critical_exit();
 1206 }
 1207 
 1208 /*
 1209  * send an IPI to a set of cpus.
 1210  */
 1211 void
 1212 ipi_selected(u_int32_t cpus, u_int ipi)
 1213 {
 1214         int cpu;
 1215         u_int bitmap = 0;
 1216         u_int old_pending;
 1217         u_int new_pending;
 1218 
 1219         if (IPI_IS_BITMAPED(ipi)) { 
 1220                 bitmap = 1 << ipi;
 1221                 ipi = IPI_BITMAP_VECTOR;
 1222         }
 1223 
 1224         CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
 1225         while ((cpu = ffs(cpus)) != 0) {
 1226                 cpu--;
 1227                 cpus &= ~(1 << cpu);
 1228 
 1229                 KASSERT(cpu_apic_ids[cpu] != -1,
 1230                     ("IPI to non-existent CPU %d", cpu));
 1231 
 1232                 if (bitmap) {
 1233                         do {
 1234                                 old_pending = cpu_ipi_pending[cpu];
 1235                                 new_pending = old_pending | bitmap;
 1236                         } while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending));  
 1237 
 1238                         if (old_pending)
 1239                                 continue;
 1240                 }
 1241 
 1242                 lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
 1243         }
 1244 
 1245 }
 1246 
 1247 /*
 1248  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
 1249  */
 1250 void
 1251 ipi_all(u_int ipi)
 1252 {
 1253 
 1254         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1255         lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
 1256 }
 1257 
 1258 /*
 1259  * send an IPI to all CPUs EXCEPT myself
 1260  */
 1261 void
 1262 ipi_all_but_self(u_int ipi)
 1263 {
 1264 
 1265         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1266         lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 1267 }
 1268 
 1269 /*
 1270  * send an IPI to myself
 1271  */
 1272 void
 1273 ipi_self(u_int ipi)
 1274 {
 1275 
 1276         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1277         lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
 1278 }
 1279 
 1280 /*
 1281  * This is called once the rest of the system is up and running and we're
 1282  * ready to let the AP's out of the pen.
 1283  */
 1284 static void
 1285 release_aps(void *dummy __unused)
 1286 {
 1287 
 1288         if (mp_ncpus == 1) 
 1289                 return;
 1290         mtx_lock_spin(&sched_lock);
 1291         atomic_store_rel_int(&aps_ready, 1);
 1292         while (smp_started == 0)
 1293                 ia32_pause();
 1294         mtx_unlock_spin(&sched_lock);
 1295 }
 1296 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 1297 
 1298 static int
 1299 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
 1300 {
 1301         u_int mask;
 1302         int error;
 1303 
 1304         mask = hlt_cpus_mask;
 1305         error = sysctl_handle_int(oidp, &mask, 0, req);
 1306         if (error || !req->newptr)
 1307                 return (error);
 1308 
 1309         if (logical_cpus_mask != 0 &&
 1310             (mask & logical_cpus_mask) == logical_cpus_mask)
 1311                 hlt_logical_cpus = 1;
 1312         else
 1313                 hlt_logical_cpus = 0;
 1314 
 1315         if (! hyperthreading_allowed)
 1316                 mask |= hyperthreading_cpus_mask;
 1317 
 1318         if ((mask & all_cpus) == all_cpus)
 1319                 mask &= ~(1<<0);
 1320         hlt_cpus_mask = mask;
 1321         return (error);
 1322 }
 1323 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
 1324     0, 0, sysctl_hlt_cpus, "IU",
 1325     "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
 1326 
 1327 static int
 1328 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 1329 {
 1330         int disable, error;
 1331 
 1332         disable = hlt_logical_cpus;
 1333         error = sysctl_handle_int(oidp, &disable, 0, req);
 1334         if (error || !req->newptr)
 1335                 return (error);
 1336 
 1337         if (disable)
 1338                 hlt_cpus_mask |= logical_cpus_mask;
 1339         else
 1340                 hlt_cpus_mask &= ~logical_cpus_mask;
 1341 
 1342         if (! hyperthreading_allowed)
 1343                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1344 
 1345         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1346                 hlt_cpus_mask &= ~(1<<0);
 1347 
 1348         hlt_logical_cpus = disable;
 1349         return (error);
 1350 }
 1351 
 1352 static int
 1353 sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
 1354 {
 1355         int allowed, error;
 1356 
 1357         allowed = hyperthreading_allowed;
 1358         error = sysctl_handle_int(oidp, &allowed, 0, req);
 1359         if (error || !req->newptr)
 1360                 return (error);
 1361 
 1362         if (allowed)
 1363                 hlt_cpus_mask &= ~hyperthreading_cpus_mask;
 1364         else
 1365                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1366 
 1367         if (logical_cpus_mask != 0 &&
 1368             (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
 1369                 hlt_logical_cpus = 1;
 1370         else
 1371                 hlt_logical_cpus = 0;
 1372 
 1373         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1374                 hlt_cpus_mask &= ~(1<<0);
 1375 
 1376         hyperthreading_allowed = allowed;
 1377         return (error);
 1378 }
 1379 
 1380 static void
 1381 cpu_hlt_setup(void *dummy __unused)
 1382 {
 1383 
 1384         if (logical_cpus_mask != 0) {
 1385                 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 1386                     &hlt_logical_cpus);
 1387                 sysctl_ctx_init(&logical_cpu_clist);
 1388                 SYSCTL_ADD_PROC(&logical_cpu_clist,
 1389                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1390                     "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 1391                     sysctl_hlt_logical_cpus, "IU", "");
 1392                 SYSCTL_ADD_UINT(&logical_cpu_clist,
 1393                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1394                     "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
 1395                     &logical_cpus_mask, 0, "");
 1396 
 1397                 if (hlt_logical_cpus)
 1398                         hlt_cpus_mask |= logical_cpus_mask;
 1399 
 1400                 /*
 1401                  * If necessary for security purposes, force
 1402                  * hyperthreading off, regardless of the value
 1403                  * of hlt_logical_cpus.
 1404                  */
 1405                 if (hyperthreading_cpus_mask) {
 1406                         TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
 1407                             &hyperthreading_allowed);
 1408                         SYSCTL_ADD_PROC(&logical_cpu_clist,
 1409                             SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1410                             "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
 1411                             0, 0, sysctl_hyperthreading_allowed, "IU", "");
 1412                         if (! hyperthreading_allowed)
 1413                                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1414                 }
 1415         }
 1416 }
 1417 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 1418 
 1419 int
 1420 mp_grab_cpu_hlt(void)
 1421 {
 1422         u_int mask = PCPU_GET(cpumask);
 1423 #ifdef MP_WATCHDOG
 1424         u_int cpuid = PCPU_GET(cpuid);
 1425 #endif
 1426         int retval;
 1427 
 1428 #ifdef MP_WATCHDOG
 1429         ap_watchdog(cpuid);
 1430 #endif
 1431 
 1432         retval = mask & hlt_cpus_mask;
 1433         while (mask & hlt_cpus_mask)
 1434                 __asm __volatile("sti; hlt" : : : "memory");
 1435         return (retval);
 1436 }

Cache object: 4c7608509d80d63771b6911d23fd60ad


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.