The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/mp_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1996, by Steve Passe
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. The name of the developer may NOT be used to endorse or promote products
   11  *    derived from this software without specific prior written permission.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  */
   25 
   26 #include <sys/cdefs.h>
   27 __FBSDID("$FreeBSD: releng/6.1/sys/i386/i386/mp_machdep.c 158108 2006-04-28 06:54:34Z cperciva $");
   28 
   29 #include "opt_apic.h"
   30 #include "opt_cpu.h"
   31 #include "opt_kdb.h"
   32 #include "opt_kstack_pages.h"
   33 #include "opt_mp_watchdog.h"
   34 #include "opt_sched.h"
   35 
   36 #if !defined(lint)
   37 #if !defined(SMP)
   38 #error How did you get here?
   39 #endif
   40 
   41 #ifndef DEV_APIC
   42 #error The apic device is required for SMP, add "device apic" to your config file.
   43 #endif
   44 #if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
   45 #error SMP not supported with CPU_DISABLE_CMPXCHG
   46 #endif
   47 #endif /* not lint */
   48 
   49 #include <sys/param.h>
   50 #include <sys/systm.h>
   51 #include <sys/bus.h>
   52 #include <sys/cons.h>   /* cngetc() */
   53 #ifdef GPROF 
   54 #include <sys/gmon.h>
   55 #endif
   56 #include <sys/kernel.h>
   57 #include <sys/ktr.h>
   58 #include <sys/lock.h>
   59 #include <sys/malloc.h>
   60 #include <sys/memrange.h>
   61 #include <sys/mutex.h>
   62 #include <sys/pcpu.h>
   63 #include <sys/proc.h>
   64 #include <sys/smp.h>
   65 #include <sys/sysctl.h>
   66 
   67 #include <vm/vm.h>
   68 #include <vm/vm_param.h>
   69 #include <vm/pmap.h>
   70 #include <vm/vm_kern.h>
   71 #include <vm/vm_extern.h>
   72 
   73 #include <machine/apicreg.h>
   74 #include <machine/clock.h>
   75 #include <machine/md_var.h>
   76 #include <machine/mp_watchdog.h>
   77 #include <machine/pcb.h>
   78 #include <machine/smp.h>
   79 #include <machine/smptests.h>   /** COUNT_XINVLTLB_HITS */
   80 #include <machine/specialreg.h>
   81 #include <machine/privatespace.h>
   82 
   83 #define WARMBOOT_TARGET         0
   84 #define WARMBOOT_OFF            (KERNBASE + 0x0467)
   85 #define WARMBOOT_SEG            (KERNBASE + 0x0469)
   86 
   87 #define CMOS_REG                (0x70)
   88 #define CMOS_DATA               (0x71)
   89 #define BIOS_RESET              (0x0f)
   90 #define BIOS_WARM               (0x0a)
   91 
   92 /*
   93  * this code MUST be enabled here and in mpboot.s.
   94  * it follows the very early stages of AP boot by placing values in CMOS ram.
   95  * it NORMALLY will never be needed and thus the primitive method for enabling.
   96  *
   97 #define CHECK_POINTS
   98  */
   99 
  100 #if defined(CHECK_POINTS) && !defined(PC98)
  101 #define CHECK_READ(A)    (outb(CMOS_REG, (A)), inb(CMOS_DATA))
  102 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
  103 
  104 #define CHECK_INIT(D);                          \
  105         CHECK_WRITE(0x34, (D));                 \
  106         CHECK_WRITE(0x35, (D));                 \
  107         CHECK_WRITE(0x36, (D));                 \
  108         CHECK_WRITE(0x37, (D));                 \
  109         CHECK_WRITE(0x38, (D));                 \
  110         CHECK_WRITE(0x39, (D));
  111 
  112 #define CHECK_PRINT(S);                         \
  113         printf("%s: %d, %d, %d, %d, %d, %d\n",  \
  114            (S),                                 \
  115            CHECK_READ(0x34),                    \
  116            CHECK_READ(0x35),                    \
  117            CHECK_READ(0x36),                    \
  118            CHECK_READ(0x37),                    \
  119            CHECK_READ(0x38),                    \
  120            CHECK_READ(0x39));
  121 
  122 #else                           /* CHECK_POINTS */
  123 
  124 #define CHECK_INIT(D)
  125 #define CHECK_PRINT(S)
  126 #define CHECK_WRITE(A, D)
  127 
  128 #endif                          /* CHECK_POINTS */
  129 
  130 /*
  131  * Values to send to the POST hardware.
  132  */
  133 #define MP_BOOTADDRESS_POST     0x10
  134 #define MP_PROBE_POST           0x11
  135 #define MPTABLE_PASS1_POST      0x12
  136 
  137 #define MP_START_POST           0x13
  138 #define MP_ENABLE_POST          0x14
  139 #define MPTABLE_PASS2_POST      0x15
  140 
  141 #define START_ALL_APS_POST      0x16
  142 #define INSTALL_AP_TRAMP_POST   0x17
  143 #define START_AP_POST           0x18
  144 
  145 #define MP_ANNOUNCE_POST        0x19
  146 
  147 /* lock region used by kernel profiling */
  148 int     mcount_lock;
  149 
  150 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
  151 int     current_postcode;
  152 
  153 int     mp_naps;                /* # of Applications processors */
  154 int     boot_cpu_id = -1;       /* designated BSP */
  155 extern  int nkpt;
  156 
  157 /*
  158  * CPU topology map datastructures for HTT.
  159  */
  160 static struct cpu_group mp_groups[MAXCPU];
  161 static struct cpu_top mp_top;
  162 
  163 /* AP uses this during bootstrap.  Do not staticize.  */
  164 char *bootSTK;
  165 static int bootAP;
  166 
  167 /* Hotwire a 0->4MB V==P mapping */
  168 extern pt_entry_t *KPTphys;
  169 
  170 /* SMP page table page */
  171 extern pt_entry_t *SMPpt;
  172 
  173 struct pcb stoppcbs[MAXCPU];
  174 
  175 /* Variables needed for SMP tlb shootdown. */
  176 vm_offset_t smp_tlb_addr1;
  177 vm_offset_t smp_tlb_addr2;
  178 volatile int smp_tlb_wait;
  179 
  180 #ifdef KDB_STOP_NMI
  181 volatile cpumask_t ipi_nmi_pending;
  182 #endif 
  183 
  184 #ifdef COUNT_IPIS
  185 /* Interrupt counts. */
  186 #ifdef IPI_PREEMPTION
  187 static u_long *ipi_preempt_counts[MAXCPU];
  188 #endif
  189 static u_long *ipi_ast_counts[MAXCPU];
  190 u_long *ipi_invltlb_counts[MAXCPU];
  191 u_long *ipi_invlrng_counts[MAXCPU];
  192 u_long *ipi_invlpg_counts[MAXCPU];
  193 u_long *ipi_rendezvous_counts[MAXCPU];
  194 u_long *ipi_lazypmap_counts[MAXCPU];
  195 #endif
  196 
  197 /*
  198  * Local data and functions.
  199  */
  200 
  201 static u_int logical_cpus;
  202 
  203 /* used to hold the AP's until we are ready to release them */
  204 static struct mtx ap_boot_mtx;
  205 
  206 /* Set to 1 once we're ready to let the APs out of the pen. */
  207 static volatile int aps_ready = 0;
  208 
  209 /*
  210  * Store data from cpu_add() until later in the boot when we actually setup
  211  * the APs.
  212  */
  213 struct cpu_info {
  214         int     cpu_present:1;
  215         int     cpu_bsp:1;
  216         int     cpu_disabled:1;
  217 } static cpu_info[MAXCPU];
  218 static int cpu_apic_ids[MAXCPU];
  219 
  220 /* Holds pending bitmap based IPIs per CPU */
  221 static volatile u_int cpu_ipi_pending[MAXCPU];
  222 
  223 static u_int boot_address;
  224 
  225 static void     set_interrupt_apic_ids(void);
  226 static int      start_all_aps(void);
  227 static void     install_ap_tramp(void);
  228 static int      start_ap(int apic_id);
  229 static void     release_aps(void *dummy);
  230 
  231 static int      hlt_logical_cpus;
  232 static u_int    hyperthreading_cpus;
  233 static cpumask_t        hyperthreading_cpus_mask;
  234 static int      hyperthreading_allowed;
  235 static struct   sysctl_ctx_list logical_cpu_clist;
  236 
  237 static void
  238 mem_range_AP_init(void)
  239 {
  240         if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
  241                 mem_range_softc.mr_op->initAP(&mem_range_softc);
  242 }
  243 
  244 void
  245 mp_topology(void)
  246 {
  247         struct cpu_group *group;
  248         int logical_cpus;
  249         int apic_id;
  250         int groups;
  251         int cpu;
  252 
  253         /* Build the smp_topology map. */
  254         /* Nothing to do if there is no HTT support. */
  255         if ((cpu_feature & CPUID_HTT) == 0)
  256                 return;
  257         logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
  258         if (logical_cpus <= 1)
  259                 return;
  260         group = &mp_groups[0];
  261         groups = 1;
  262         for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
  263                 if (!cpu_info[apic_id].cpu_present)
  264                         continue;
  265                 /*
  266                  * If the current group has members and we're not a logical
  267                  * cpu, create a new group.
  268                  */
  269                 if (group->cg_count != 0 && (apic_id % logical_cpus) == 0) {
  270                         group++;
  271                         groups++;
  272                 }
  273                 group->cg_count++;
  274                 group->cg_mask |= 1 << cpu;
  275                 cpu++;
  276         }
  277 
  278         mp_top.ct_count = groups;
  279         mp_top.ct_group = mp_groups;
  280         smp_topology = &mp_top;
  281 }
  282 
  283 
  284 /*
  285  * Calculate usable address in base memory for AP trampoline code.
  286  */
  287 u_int
  288 mp_bootaddress(u_int basemem)
  289 {
  290         POSTCODE(MP_BOOTADDRESS_POST);
  291 
  292         boot_address = trunc_page(basemem);     /* round down to 4k boundary */
  293         if ((basemem - boot_address) < bootMP_size)
  294                 boot_address -= PAGE_SIZE;      /* not enough, lower by 4k */
  295 
  296         return boot_address;
  297 }
  298 
  299 void
  300 cpu_add(u_int apic_id, char boot_cpu)
  301 {
  302 
  303         if (apic_id >= MAXCPU) {
  304                 printf("SMP: CPU %d exceeds maximum CPU %d, ignoring\n",
  305                     apic_id, MAXCPU - 1);
  306                 return;
  307         }
  308         KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
  309             apic_id));
  310         cpu_info[apic_id].cpu_present = 1;
  311         if (boot_cpu) {
  312                 KASSERT(boot_cpu_id == -1,
  313                     ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
  314                     boot_cpu_id));
  315                 boot_cpu_id = apic_id;
  316                 cpu_info[apic_id].cpu_bsp = 1;
  317         }
  318         mp_ncpus++;
  319         if (bootverbose)
  320                 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
  321                     "AP");
  322         
  323 }
  324 
  325 void
  326 cpu_mp_setmaxid(void)
  327 {
  328 
  329         mp_maxid = MAXCPU - 1;
  330 }
  331 
  332 int
  333 cpu_mp_probe(void)
  334 {
  335 
  336         /*
  337          * Always record BSP in CPU map so that the mbuf init code works
  338          * correctly.
  339          */
  340         all_cpus = 1;
  341         if (mp_ncpus == 0) {
  342                 /*
  343                  * No CPUs were found, so this must be a UP system.  Setup
  344                  * the variables to represent a system with a single CPU
  345                  * with an id of 0.
  346                  */
  347                 mp_ncpus = 1;
  348                 return (0);
  349         }
  350 
  351         /* At least one CPU was found. */
  352         if (mp_ncpus == 1) {
  353                 /*
  354                  * One CPU was found, so this must be a UP system with
  355                  * an I/O APIC.
  356                  */
  357                 return (0);
  358         }
  359 
  360         /* At least two CPUs were found. */
  361         return (1);
  362 }
  363 
  364 /*
  365  * Initialize the IPI handlers and start up the AP's.
  366  */
  367 void
  368 cpu_mp_start(void)
  369 {
  370         int i;
  371         u_int threads_per_cache, p[4];
  372 
  373         POSTCODE(MP_START_POST);
  374 
  375         /* Initialize the logical ID to APIC ID table. */
  376         for (i = 0; i < MAXCPU; i++) {
  377                 cpu_apic_ids[i] = -1;
  378                 cpu_ipi_pending[i] = 0;
  379         }
  380 
  381         /* Install an inter-CPU IPI for TLB invalidation */
  382         setidt(IPI_INVLTLB, IDTVEC(invltlb),
  383                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  384         setidt(IPI_INVLPG, IDTVEC(invlpg),
  385                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  386         setidt(IPI_INVLRNG, IDTVEC(invlrng),
  387                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  388         
  389         /* Install an inter-CPU IPI for lazy pmap release */
  390         setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
  391                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  392 
  393         /* Install an inter-CPU IPI for all-CPU rendezvous */
  394         setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
  395                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  396 
  397         /* Install generic inter-CPU IPI handler */
  398         setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
  399                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  400 
  401         /* Install an inter-CPU IPI for CPU stop/restart */
  402         setidt(IPI_STOP, IDTVEC(cpustop),
  403                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  404 
  405 
  406         /* Set boot_cpu_id if needed. */
  407         if (boot_cpu_id == -1) {
  408                 boot_cpu_id = PCPU_GET(apic_id);
  409                 cpu_info[boot_cpu_id].cpu_bsp = 1;
  410         } else
  411                 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
  412                     ("BSP's APIC ID doesn't match boot_cpu_id"));
  413         cpu_apic_ids[0] = boot_cpu_id;
  414 
  415         /* Start each Application Processor */
  416         start_all_aps();
  417 
  418         /* Setup the initial logical CPUs info. */
  419         logical_cpus = logical_cpus_mask = 0;
  420         if (cpu_feature & CPUID_HTT)
  421                 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
  422 
  423         /*
  424          * Work out if hyperthreading is *really* enabled.  This
  425          * is made really ugly by the fact that processors lie: Dual
  426          * core processors claim to be hyperthreaded even when they're
  427          * not, presumably because they want to be treated the same
  428          * way as HTT with respect to per-cpu software licensing.
  429          * At the time of writing (May 12, 2005) the only hyperthreaded
  430          * cpus are from Intel, and Intel's dual-core processors can be
  431          * identified via the "deterministic cache parameters" cpuid
  432          * calls.
  433          */
  434         /*
  435          * First determine if this is an Intel processor which claims
  436          * to have hyperthreading support.
  437          */
  438         if ((cpu_feature & CPUID_HTT) &&
  439             (strcmp(cpu_vendor, "GenuineIntel") == 0)) {
  440                 /*
  441                  * If the "deterministic cache parameters" cpuid calls
  442                  * are available, use them.
  443                  */
  444                 if (cpu_high >= 4) {
  445                         /* Ask the processor about the L1 cache. */
  446                         for (i = 0; i < 1; i++) {
  447                                 cpuid_count(4, i, p);
  448                                 threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1;
  449                                 if (hyperthreading_cpus < threads_per_cache)
  450                                         hyperthreading_cpus = threads_per_cache;
  451                                 if ((p[0] & 0x1f) == 0)
  452                                         break;
  453                         }
  454                 }
  455 
  456                 /*
  457                  * If the deterministic cache parameters are not
  458                  * available, or if no caches were reported to exist,
  459                  * just accept what the HTT flag indicated.
  460                  */
  461                 if (hyperthreading_cpus == 0)
  462                         hyperthreading_cpus = logical_cpus;
  463         }
  464 
  465         set_interrupt_apic_ids();
  466 }
  467 
  468 
  469 /*
  470  * Print various information about the SMP system hardware and setup.
  471  */
  472 void
  473 cpu_mp_announce(void)
  474 {
  475         int i, x;
  476 
  477         POSTCODE(MP_ANNOUNCE_POST);
  478 
  479         /* List CPUs */
  480         printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
  481         for (i = 1, x = 0; x < MAXCPU; x++) {
  482                 if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
  483                         continue;
  484                 if (cpu_info[x].cpu_disabled)
  485                         printf("  cpu (AP): APIC ID: %2d (disabled)\n", x);
  486                 else {
  487                         KASSERT(i < mp_ncpus,
  488                             ("mp_ncpus and actual cpus are out of whack"));
  489                         printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
  490                 }
  491         }
  492 }
  493 
  494 /*
  495  * AP CPU's call this to initialize themselves.
  496  */
  497 void
  498 init_secondary(void)
  499 {
  500         vm_offset_t addr;
  501         int     gsel_tss;
  502         int     x, myid;
  503         u_int   cr0;
  504 
  505         /* bootAP is set in start_ap() to our ID. */
  506         myid = bootAP;
  507         gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
  508         gdt_segs[GPROC0_SEL].ssd_base =
  509                 (int) &SMP_prvspace[myid].pcpu.pc_common_tss;
  510         SMP_prvspace[myid].pcpu.pc_prvspace =
  511                 &SMP_prvspace[myid].pcpu;
  512 
  513         for (x = 0; x < NGDT; x++) {
  514                 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
  515         }
  516 
  517         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
  518         r_gdt.rd_base = (int) &gdt[myid * NGDT];
  519         lgdt(&r_gdt);                   /* does magic intra-segment return */
  520 
  521         lidt(&r_idt);
  522 
  523         lldt(_default_ldt);
  524         PCPU_SET(currentldt, _default_ldt);
  525 
  526         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
  527         gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
  528         PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
  529         PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
  530         PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
  531         PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
  532         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
  533         ltr(gsel_tss);
  534 
  535         PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd);
  536 
  537         /*
  538          * Set to a known state:
  539          * Set by mpboot.s: CR0_PG, CR0_PE
  540          * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
  541          */
  542         cr0 = rcr0();
  543         cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
  544         load_cr0(cr0);
  545         CHECK_WRITE(0x38, 5);
  546         
  547         /* Disable local APIC just to be sure. */
  548         lapic_disable();
  549 
  550         /* signal our startup to the BSP. */
  551         mp_naps++;
  552         CHECK_WRITE(0x39, 6);
  553 
  554         /* Spin until the BSP releases the AP's. */
  555         while (!aps_ready)
  556                 ia32_pause();
  557 
  558         /* BSP may have changed PTD while we were waiting */
  559         invltlb();
  560         for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
  561                 invlpg(addr);
  562 
  563 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
  564         lidt(&r_idt);
  565 #endif
  566 
  567         /* set up CPU registers and state */
  568         cpu_setregs();
  569 
  570         /* set up FPU state on the AP */
  571         npxinit(__INITIAL_NPXCW__);
  572 
  573         /* set up SSE registers */
  574         enable_sse();
  575 
  576         /* A quick check from sanity claus */
  577         if (PCPU_GET(apic_id) != lapic_id()) {
  578                 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
  579                 printf("SMP: actual apic_id = %d\n", lapic_id());
  580                 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
  581                 printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]);
  582                 panic("cpuid mismatch! boom!!");
  583         }
  584 
  585         /* Initialize curthread. */
  586         KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
  587         PCPU_SET(curthread, PCPU_GET(idlethread));
  588 
  589         mtx_lock_spin(&ap_boot_mtx);
  590 
  591         /* Init local apic for irq's */
  592         lapic_setup();
  593 
  594         /* Set memory range attributes for this CPU to match the BSP */
  595         mem_range_AP_init();
  596 
  597         smp_cpus++;
  598 
  599         CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
  600         printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
  601 
  602         /* Determine if we are a logical CPU. */
  603         if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
  604                 logical_cpus_mask |= PCPU_GET(cpumask);
  605         
  606         /* Determine if we are a hyperthread. */
  607         if (hyperthreading_cpus > 1 &&
  608             PCPU_GET(apic_id) % hyperthreading_cpus != 0)
  609                 hyperthreading_cpus_mask |= PCPU_GET(cpumask);
  610 
  611         /* Build our map of 'other' CPUs. */
  612         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  613 
  614         if (bootverbose)
  615                 lapic_dump("AP");
  616 
  617         if (smp_cpus == mp_ncpus) {
  618                 /* enable IPI's, tlb shootdown, freezes etc */
  619                 atomic_store_rel_int(&smp_started, 1);
  620                 smp_active = 1;  /* historic */
  621         }
  622 
  623         mtx_unlock_spin(&ap_boot_mtx);
  624 
  625         /* wait until all the AP's are up */
  626         while (smp_started == 0)
  627                 ia32_pause();
  628 
  629         /* ok, now grab sched_lock and enter the scheduler */
  630         mtx_lock_spin(&sched_lock);
  631 
  632         /*
  633          * Correct spinlock nesting.  The idle thread context that we are
  634          * borrowing was created so that it would start out with a single
  635          * spin lock (sched_lock) held in fork_trampoline().  Since we've
  636          * explicitly acquired locks in this function, the nesting count
  637          * is now 2 rather than 1.  Since we are nested, calling
  638          * spinlock_exit() will simply adjust the counts without allowing
  639          * spin lock using code to interrupt us.
  640          */
  641         spinlock_exit();
  642         KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
  643 
  644         binuptime(PCPU_PTR(switchtime));
  645         PCPU_SET(switchticks, ticks);
  646 
  647         cpu_throw(NULL, choosethread());        /* doesn't return */
  648 
  649         panic("scheduler returned us to %s", __func__);
  650         /* NOTREACHED */
  651 }
  652 
  653 /*******************************************************************
  654  * local functions and data
  655  */
  656 
  657 /*
  658  * We tell the I/O APIC code about all the CPUs we want to receive
  659  * interrupts.  If we don't want certain CPUs to receive IRQs we
  660  * can simply not tell the I/O APIC code about them in this function.
  661  * We also do not tell it about the BSP since it tells itself about
  662  * the BSP internally to work with UP kernels and on UP machines.
  663  */
  664 static void
  665 set_interrupt_apic_ids(void)
  666 {
  667         u_int apic_id;
  668 
  669         for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
  670                 if (!cpu_info[apic_id].cpu_present)
  671                         continue;
  672                 if (cpu_info[apic_id].cpu_bsp)
  673                         continue;
  674 
  675                 /* Don't let hyperthreads service interrupts. */
  676                 if (hyperthreading_cpus > 1 &&
  677                     apic_id % hyperthreading_cpus != 0)
  678                         continue;
  679 
  680                 intr_add_cpu(apic_id);
  681         }
  682 }
  683 
  684 /*
  685  * start each AP in our list
  686  */
  687 static int
  688 start_all_aps(void)
  689 {
  690 #ifndef PC98
  691         u_char mpbiosreason;
  692 #endif
  693         struct pcpu *pc;
  694         char *stack;
  695         uintptr_t kptbase;
  696         u_int32_t mpbioswarmvec;
  697         int apic_id, cpu, i, pg;
  698 
  699         POSTCODE(START_ALL_APS_POST);
  700 
  701         mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
  702 
  703         /* install the AP 1st level boot code */
  704         install_ap_tramp();
  705 
  706         /* save the current value of the warm-start vector */
  707         mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
  708 #ifndef PC98
  709         outb(CMOS_REG, BIOS_RESET);
  710         mpbiosreason = inb(CMOS_DATA);
  711 #endif
  712 
  713         /* set up temporary P==V mapping for AP boot */
  714         /* XXX this is a hack, we should boot the AP on its own stack/PTD */
  715         kptbase = (uintptr_t)(void *)KPTphys;
  716         for (i = 0; i < NKPT; i++)
  717                 PTD[i] = (pd_entry_t)(PG_V | PG_RW |
  718                     ((kptbase + i * PAGE_SIZE) & PG_FRAME));
  719         invltlb();
  720 
  721         /* start each AP */
  722         for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
  723 
  724                 /* Ignore non-existent CPUs and the BSP. */
  725                 if (!cpu_info[apic_id].cpu_present ||
  726                     cpu_info[apic_id].cpu_bsp)
  727                         continue;
  728 
  729                 /* Don't use this CPU if it has been disabled by a tunable. */
  730                 if (resource_disabled("lapic", apic_id)) {
  731                         cpu_info[apic_id].cpu_disabled = 1;
  732                         mp_ncpus--;
  733                         continue;
  734                 }
  735 
  736                 cpu++;
  737 
  738                 /* save APIC ID for this logical ID */
  739                 cpu_apic_ids[cpu] = apic_id;
  740 
  741                 /* first page of AP's private space */
  742                 pg = cpu * i386_btop(sizeof(struct privatespace));
  743 
  744                 /* allocate a new private data page */
  745                 pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
  746 
  747                 /* wire it into the private page table page */
  748                 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
  749 
  750                 /* allocate and set up an idle stack data page */
  751                 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
  752                 for (i = 0; i < KSTACK_PAGES; i++)
  753                         SMPpt[pg + 1 + i] = (pt_entry_t)
  754                             (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
  755 
  756                 /* prime data page for it to use */
  757                 pcpu_init(pc, cpu, sizeof(struct pcpu));
  758                 pc->pc_apic_id = apic_id;
  759 
  760                 /* setup a vector to our boot code */
  761                 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
  762                 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
  763 #ifndef PC98
  764                 outb(CMOS_REG, BIOS_RESET);
  765                 outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
  766 #endif
  767 
  768                 bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
  769                     PAGE_SIZE];
  770                 bootAP = cpu;
  771 
  772                 /* attempt to start the Application Processor */
  773                 CHECK_INIT(99); /* setup checkpoints */
  774                 if (!start_ap(apic_id)) {
  775                         printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
  776                         CHECK_PRINT("trace");   /* show checkpoints */
  777                         /* better panic as the AP may be running loose */
  778                         printf("panic y/n? [y] ");
  779                         if (cngetc() != 'n')
  780                                 panic("bye-bye");
  781                 }
  782                 CHECK_PRINT("trace");           /* show checkpoints */
  783 
  784                 all_cpus |= (1 << cpu);         /* record AP in CPU map */
  785         }
  786 
  787         /* build our map of 'other' CPUs */
  788         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  789 
  790         /* restore the warmstart vector */
  791         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  792 
  793 #ifndef PC98
  794         outb(CMOS_REG, BIOS_RESET);
  795         outb(CMOS_DATA, mpbiosreason);
  796 #endif
  797 
  798         /*
  799          * Set up the idle context for the BSP.  Similar to above except
  800          * that some was done by locore, some by pmap.c and some is implicit
  801          * because the BSP is cpu#0 and the page is initially zero and also
  802          * because we can refer to variables by name on the BSP..
  803          */
  804 
  805         /* Allocate and setup BSP idle stack */
  806         stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
  807         for (i = 0; i < KSTACK_PAGES; i++)
  808                 SMPpt[1 + i] = (pt_entry_t)
  809                     (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
  810 
  811         for (i = 0; i < NKPT; i++)
  812                 PTD[i] = 0;
  813         pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
  814 
  815         /* number of APs actually started */
  816         return mp_naps;
  817 }
  818 
  819 /*
  820  * load the 1st level AP boot code into base memory.
  821  */
  822 
  823 /* targets for relocation */
  824 extern void bigJump(void);
  825 extern void bootCodeSeg(void);
  826 extern void bootDataSeg(void);
  827 extern void MPentry(void);
  828 extern u_int MP_GDT;
  829 extern u_int mp_gdtbase;
  830 
  831 static void
  832 install_ap_tramp(void)
  833 {
  834         int     x;
  835         int     size = *(int *) ((u_long) & bootMP_size);
  836         vm_offset_t va = boot_address + KERNBASE;
  837         u_char *src = (u_char *) ((u_long) bootMP);
  838         u_char *dst = (u_char *) va;
  839         u_int   boot_base = (u_int) bootMP;
  840         u_int8_t *dst8;
  841         u_int16_t *dst16;
  842         u_int32_t *dst32;
  843 
  844         POSTCODE(INSTALL_AP_TRAMP_POST);
  845 
  846         KASSERT (size <= PAGE_SIZE,
  847             ("'size' do not fit into PAGE_SIZE, as expected."));
  848         pmap_kenter(va, boot_address);
  849         pmap_invalidate_page (kernel_pmap, va);
  850         for (x = 0; x < size; ++x)
  851                 *dst++ = *src++;
  852 
  853         /*
  854          * modify addresses in code we just moved to basemem. unfortunately we
  855          * need fairly detailed info about mpboot.s for this to work.  changes
  856          * to mpboot.s might require changes here.
  857          */
  858 
  859         /* boot code is located in KERNEL space */
  860         dst = (u_char *) va;
  861 
  862         /* modify the lgdt arg */
  863         dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
  864         *dst32 = boot_address + ((u_int) & MP_GDT - boot_base);
  865 
  866         /* modify the ljmp target for MPentry() */
  867         dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
  868         *dst32 = ((u_int) MPentry - KERNBASE);
  869 
  870         /* modify the target for boot code segment */
  871         dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
  872         dst8 = (u_int8_t *) (dst16 + 1);
  873         *dst16 = (u_int) boot_address & 0xffff;
  874         *dst8 = ((u_int) boot_address >> 16) & 0xff;
  875 
  876         /* modify the target for boot data segment */
  877         dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
  878         dst8 = (u_int8_t *) (dst16 + 1);
  879         *dst16 = (u_int) boot_address & 0xffff;
  880         *dst8 = ((u_int) boot_address >> 16) & 0xff;
  881 }
  882 
  883 /*
  884  * This function starts the AP (application processor) identified
  885  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  886  * to accomplish this.  This is necessary because of the nuances
  887  * of the different hardware we might encounter.  It isn't pretty,
  888  * but it seems to work.
  889  */
  890 static int
  891 start_ap(int apic_id)
  892 {
  893         int vector, ms;
  894         int cpus;
  895 
  896         POSTCODE(START_AP_POST);
  897 
  898         /* calculate the vector */
  899         vector = (boot_address >> 12) & 0xff;
  900 
  901         /* used as a watchpoint to signal AP startup */
  902         cpus = mp_naps;
  903 
  904         /*
  905          * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
  906          * and running the target CPU. OR this INIT IPI might be latched (P5
  907          * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
  908          * ignored.
  909          */
  910 
  911         /* do an INIT IPI: assert RESET */
  912         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  913             APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
  914 
  915         /* wait for pending status end */
  916         lapic_ipi_wait(-1);
  917 
  918         /* do an INIT IPI: deassert RESET */
  919         lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
  920             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
  921 
  922         /* wait for pending status end */
  923         DELAY(10000);           /* wait ~10mS */
  924         lapic_ipi_wait(-1);
  925 
  926         /*
  927          * next we do a STARTUP IPI: the previous INIT IPI might still be
  928          * latched, (P5 bug) this 1st STARTUP would then terminate
  929          * immediately, and the previously started INIT IPI would continue. OR
  930          * the previous INIT IPI has already run. and this STARTUP IPI will
  931          * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
  932          * will run.
  933          */
  934 
  935         /* do a STARTUP IPI */
  936         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  937             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  938             vector, apic_id);
  939         lapic_ipi_wait(-1);
  940         DELAY(200);             /* wait ~200uS */
  941 
  942         /*
  943          * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
  944          * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
  945          * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
  946          * recognized after hardware RESET or INIT IPI.
  947          */
  948 
  949         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  950             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  951             vector, apic_id);
  952         lapic_ipi_wait(-1);
  953         DELAY(200);             /* wait ~200uS */
  954 
  955         /* Wait up to 5 seconds for it to start. */
  956         for (ms = 0; ms < 5000; ms++) {
  957                 if (mp_naps > cpus)
  958                         return 1;       /* return SUCCESS */
  959                 DELAY(1000);
  960         }
  961         return 0;               /* return FAILURE */
  962 }
  963 
  964 #ifdef COUNT_XINVLTLB_HITS
  965 u_int xhits_gbl[MAXCPU];
  966 u_int xhits_pg[MAXCPU];
  967 u_int xhits_rng[MAXCPU];
  968 SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
  969 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
  970     sizeof(xhits_gbl), "IU", "");
  971 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
  972     sizeof(xhits_pg), "IU", "");
  973 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
  974     sizeof(xhits_rng), "IU", "");
  975 
  976 u_int ipi_global;
  977 u_int ipi_page;
  978 u_int ipi_range;
  979 u_int ipi_range_size;
  980 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
  981 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
  982 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
  983 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
  984     0, "");
  985 
  986 u_int ipi_masked_global;
  987 u_int ipi_masked_page;
  988 u_int ipi_masked_range;
  989 u_int ipi_masked_range_size;
  990 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
  991     &ipi_masked_global, 0, "");
  992 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
  993     &ipi_masked_page, 0, "");
  994 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
  995     &ipi_masked_range, 0, "");
  996 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
  997     &ipi_masked_range_size, 0, "");
  998 #endif /* COUNT_XINVLTLB_HITS */
  999 
 1000 /*
 1001  * Flush the TLB on all other CPU's
 1002  */
 1003 static void
 1004 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 1005 {
 1006         u_int ncpu;
 1007 
 1008         ncpu = mp_ncpus - 1;    /* does not shootdown self */
 1009         if (ncpu < 1)
 1010                 return;         /* no other cpus */
 1011         mtx_assert(&smp_ipi_mtx, MA_OWNED);
 1012         smp_tlb_addr1 = addr1;
 1013         smp_tlb_addr2 = addr2;
 1014         atomic_store_rel_int(&smp_tlb_wait, 0);
 1015         ipi_all_but_self(vector);
 1016         while (smp_tlb_wait < ncpu)
 1017                 ia32_pause();
 1018 }
 1019 
 1020 static void
 1021 smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 1022 {
 1023         int ncpu, othercpus;
 1024 
 1025         othercpus = mp_ncpus - 1;
 1026         if (mask == (u_int)-1) {
 1027                 ncpu = othercpus;
 1028                 if (ncpu < 1)
 1029                         return;
 1030         } else {
 1031                 mask &= ~PCPU_GET(cpumask);
 1032                 if (mask == 0)
 1033                         return;
 1034                 ncpu = bitcount32(mask);
 1035                 if (ncpu > othercpus) {
 1036                         /* XXX this should be a panic offence */
 1037                         printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
 1038                             ncpu, othercpus);
 1039                         ncpu = othercpus;
 1040                 }
 1041                 /* XXX should be a panic, implied by mask == 0 above */
 1042                 if (ncpu < 1)
 1043                         return;
 1044         }
 1045         mtx_assert(&smp_ipi_mtx, MA_OWNED);
 1046         smp_tlb_addr1 = addr1;
 1047         smp_tlb_addr2 = addr2;
 1048         atomic_store_rel_int(&smp_tlb_wait, 0);
 1049         if (mask == (u_int)-1)
 1050                 ipi_all_but_self(vector);
 1051         else
 1052                 ipi_selected(mask, vector);
 1053         while (smp_tlb_wait < ncpu)
 1054                 ia32_pause();
 1055 }
 1056 
 1057 void
 1058 smp_invltlb(void)
 1059 {
 1060 
 1061         if (smp_started) {
 1062                 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
 1063 #ifdef COUNT_XINVLTLB_HITS
 1064                 ipi_global++;
 1065 #endif
 1066         }
 1067 }
 1068 
 1069 void
 1070 smp_invlpg(vm_offset_t addr)
 1071 {
 1072 
 1073         if (smp_started) {
 1074                 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
 1075 #ifdef COUNT_XINVLTLB_HITS
 1076                 ipi_page++;
 1077 #endif
 1078         }
 1079 }
 1080 
 1081 void
 1082 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 1083 {
 1084 
 1085         if (smp_started) {
 1086                 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
 1087 #ifdef COUNT_XINVLTLB_HITS
 1088                 ipi_range++;
 1089                 ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
 1090 #endif
 1091         }
 1092 }
 1093 
 1094 void
 1095 smp_masked_invltlb(u_int mask)
 1096 {
 1097 
 1098         if (smp_started) {
 1099                 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
 1100 #ifdef COUNT_XINVLTLB_HITS
 1101                 ipi_masked_global++;
 1102 #endif
 1103         }
 1104 }
 1105 
 1106 void
 1107 smp_masked_invlpg(u_int mask, vm_offset_t addr)
 1108 {
 1109 
 1110         if (smp_started) {
 1111                 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
 1112 #ifdef COUNT_XINVLTLB_HITS
 1113                 ipi_masked_page++;
 1114 #endif
 1115         }
 1116 }
 1117 
 1118 void
 1119 smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
 1120 {
 1121 
 1122         if (smp_started) {
 1123                 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
 1124 #ifdef COUNT_XINVLTLB_HITS
 1125                 ipi_masked_range++;
 1126                 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
 1127 #endif
 1128         }
 1129 }
 1130 
 1131 
 1132 void
 1133 ipi_bitmap_handler(struct clockframe frame)
 1134 {
 1135         int cpu = PCPU_GET(cpuid);
 1136         u_int ipi_bitmap;
 1137 
 1138         ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
 1139 
 1140 #ifdef IPI_PREEMPTION
 1141         if (ipi_bitmap & IPI_PREEMPT) {
 1142 #ifdef COUNT_IPIS
 1143                 *ipi_preempt_counts[cpu]++;
 1144 #endif
 1145                 mtx_lock_spin(&sched_lock);
 1146                 /* Don't preempt the idle thread */
 1147                 if (curthread->td_priority <  PRI_MIN_IDLE) {
 1148                         struct thread *running_thread = curthread;
 1149                         if (running_thread->td_critnest > 1) 
 1150                                 running_thread->td_owepreempt = 1;
 1151                         else            
 1152                                 mi_switch(SW_INVOL | SW_PREEMPT, NULL);
 1153                 }
 1154                 mtx_unlock_spin(&sched_lock);
 1155         }
 1156 #endif
 1157 
 1158         if (ipi_bitmap & IPI_AST) {
 1159 #ifdef COUNT_IPIS
 1160                 *ipi_ast_counts[cpu]++;
 1161 #endif
 1162                 /* Nothing to do for AST */
 1163         }
 1164 }
 1165 
 1166 /*
 1167  * send an IPI to a set of cpus.
 1168  */
 1169 void
 1170 ipi_selected(u_int32_t cpus, u_int ipi)
 1171 {
 1172         int cpu;
 1173         u_int bitmap = 0;
 1174         u_int old_pending;
 1175         u_int new_pending;
 1176 
 1177         if (IPI_IS_BITMAPED(ipi)) { 
 1178                 bitmap = 1 << ipi;
 1179                 ipi = IPI_BITMAP_VECTOR;
 1180         }
 1181 
 1182         CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
 1183         while ((cpu = ffs(cpus)) != 0) {
 1184                 cpu--;
 1185                 cpus &= ~(1 << cpu);
 1186 
 1187                 KASSERT(cpu_apic_ids[cpu] != -1,
 1188                     ("IPI to non-existent CPU %d", cpu));
 1189 
 1190                 if (bitmap) {
 1191                         do {
 1192                                 old_pending = cpu_ipi_pending[cpu];
 1193                                 new_pending = old_pending | bitmap;
 1194                         } while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending));  
 1195 
 1196                         if (old_pending)
 1197                                 continue;
 1198                 }
 1199 
 1200                 lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
 1201         }
 1202 
 1203 }
 1204 
 1205 /*
 1206  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
 1207  */
 1208 void
 1209 ipi_all(u_int ipi)
 1210 {
 1211 
 1212         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1213         lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
 1214 }
 1215 
 1216 /*
 1217  * send an IPI to all CPUs EXCEPT myself
 1218  */
 1219 void
 1220 ipi_all_but_self(u_int ipi)
 1221 {
 1222 
 1223         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1224         lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 1225 }
 1226 
 1227 /*
 1228  * send an IPI to myself
 1229  */
 1230 void
 1231 ipi_self(u_int ipi)
 1232 {
 1233 
 1234         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1235         lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
 1236 }
 1237 
 1238 #ifdef KDB_STOP_NMI
 1239 /*
 1240  * send NMI IPI to selected CPUs
 1241  */
 1242 
 1243 #define BEFORE_SPIN     1000000
 1244 
 1245 void
 1246 ipi_nmi_selected(u_int32_t cpus)
 1247 {
 1248 
 1249         int cpu;
 1250         register_t icrlo;
 1251 
 1252         icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 
 1253                 | APIC_TRIGMOD_EDGE; 
 1254         
 1255         CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus);
 1256 
 1257 
 1258         atomic_set_int(&ipi_nmi_pending, cpus);
 1259 
 1260 
 1261         while ((cpu = ffs(cpus)) != 0) {
 1262                 cpu--;
 1263                 cpus &= ~(1 << cpu);
 1264 
 1265                 KASSERT(cpu_apic_ids[cpu] != -1,
 1266                     ("IPI NMI to non-existent CPU %d", cpu));
 1267                 
 1268                 /* Wait for an earlier IPI to finish. */
 1269                 if (!lapic_ipi_wait(BEFORE_SPIN))
 1270                         panic("ipi_nmi_selected: previous IPI has not cleared");
 1271 
 1272                 lapic_ipi_raw(icrlo,cpu_apic_ids[cpu]);
 1273         }
 1274 }
 1275 
 1276 
 1277 int
 1278 ipi_nmi_handler()
 1279 {
 1280         int cpu  = PCPU_GET(cpuid);
 1281 
 1282         if(!(atomic_load_acq_int(&ipi_nmi_pending) & (1 << cpu)))
 1283                 return 1;
 1284 
 1285         atomic_clear_int(&ipi_nmi_pending,1 << cpu);
 1286 
 1287         savectx(&stoppcbs[cpu]);
 1288 
 1289         /* Indicate that we are stopped */
 1290         atomic_set_int(&stopped_cpus,1 << cpu);
 1291 
 1292 
 1293         /* Wait for restart */
 1294         while(!(atomic_load_acq_int(&started_cpus) & (1 << cpu)))
 1295             ia32_pause();
 1296 
 1297         atomic_clear_int(&started_cpus,1 << cpu);
 1298         atomic_clear_int(&stopped_cpus,1 << cpu);
 1299 
 1300         if(cpu == 0 && cpustop_restartfunc != NULL)
 1301                 cpustop_restartfunc();
 1302 
 1303         return 0;
 1304 }
 1305      
 1306 #endif /* KDB_STOP_NMI */
 1307 
 1308 /*
 1309  * This is called once the rest of the system is up and running and we're
 1310  * ready to let the AP's out of the pen.
 1311  */
 1312 static void
 1313 release_aps(void *dummy __unused)
 1314 {
 1315 
 1316         if (mp_ncpus == 1) 
 1317                 return;
 1318         mtx_lock_spin(&sched_lock);
 1319         atomic_store_rel_int(&aps_ready, 1);
 1320         while (smp_started == 0)
 1321                 ia32_pause();
 1322         mtx_unlock_spin(&sched_lock);
 1323 }
 1324 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 1325 
 1326 static int
 1327 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
 1328 {
 1329         u_int mask;
 1330         int error;
 1331 
 1332         mask = hlt_cpus_mask;
 1333         error = sysctl_handle_int(oidp, &mask, 0, req);
 1334         if (error || !req->newptr)
 1335                 return (error);
 1336 
 1337         if (logical_cpus_mask != 0 &&
 1338             (mask & logical_cpus_mask) == logical_cpus_mask)
 1339                 hlt_logical_cpus = 1;
 1340         else
 1341                 hlt_logical_cpus = 0;
 1342 
 1343         if (! hyperthreading_allowed)
 1344                 mask |= hyperthreading_cpus_mask;
 1345 
 1346         if ((mask & all_cpus) == all_cpus)
 1347                 mask &= ~(1<<0);
 1348         hlt_cpus_mask = mask;
 1349         return (error);
 1350 }
 1351 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
 1352     0, 0, sysctl_hlt_cpus, "IU",
 1353     "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
 1354 
 1355 static int
 1356 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 1357 {
 1358         int disable, error;
 1359 
 1360         disable = hlt_logical_cpus;
 1361         error = sysctl_handle_int(oidp, &disable, 0, req);
 1362         if (error || !req->newptr)
 1363                 return (error);
 1364 
 1365         if (disable)
 1366                 hlt_cpus_mask |= logical_cpus_mask;
 1367         else
 1368                 hlt_cpus_mask &= ~logical_cpus_mask;
 1369 
 1370         if (! hyperthreading_allowed)
 1371                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1372 
 1373         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1374                 hlt_cpus_mask &= ~(1<<0);
 1375 
 1376         hlt_logical_cpus = disable;
 1377         return (error);
 1378 }
 1379 
 1380 static int
 1381 sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
 1382 {
 1383         int allowed, error;
 1384 
 1385         allowed = hyperthreading_allowed;
 1386         error = sysctl_handle_int(oidp, &allowed, 0, req);
 1387         if (error || !req->newptr)
 1388                 return (error);
 1389 
 1390         if (allowed)
 1391                 hlt_cpus_mask &= ~hyperthreading_cpus_mask;
 1392         else
 1393                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1394 
 1395         if (logical_cpus_mask != 0 &&
 1396             (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
 1397                 hlt_logical_cpus = 1;
 1398         else
 1399                 hlt_logical_cpus = 0;
 1400 
 1401         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1402                 hlt_cpus_mask &= ~(1<<0);
 1403 
 1404         hyperthreading_allowed = allowed;
 1405         return (error);
 1406 }
 1407 
 1408 static void
 1409 cpu_hlt_setup(void *dummy __unused)
 1410 {
 1411 
 1412         if (logical_cpus_mask != 0) {
 1413                 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 1414                     &hlt_logical_cpus);
 1415                 sysctl_ctx_init(&logical_cpu_clist);
 1416                 SYSCTL_ADD_PROC(&logical_cpu_clist,
 1417                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1418                     "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 1419                     sysctl_hlt_logical_cpus, "IU", "");
 1420                 SYSCTL_ADD_UINT(&logical_cpu_clist,
 1421                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1422                     "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
 1423                     &logical_cpus_mask, 0, "");
 1424 
 1425                 if (hlt_logical_cpus)
 1426                         hlt_cpus_mask |= logical_cpus_mask;
 1427 
 1428                 /*
 1429                  * If necessary for security purposes, force
 1430                  * hyperthreading off, regardless of the value
 1431                  * of hlt_logical_cpus.
 1432                  */
 1433                 if (hyperthreading_cpus_mask) {
 1434                         TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
 1435                             &hyperthreading_allowed);
 1436                         SYSCTL_ADD_PROC(&logical_cpu_clist,
 1437                             SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1438                             "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
 1439                             0, 0, sysctl_hyperthreading_allowed, "IU", "");
 1440                         if (! hyperthreading_allowed)
 1441                                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1442                 }
 1443         }
 1444 }
 1445 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 1446 
 1447 int
 1448 mp_grab_cpu_hlt(void)
 1449 {
 1450         u_int mask = PCPU_GET(cpumask);
 1451 #ifdef MP_WATCHDOG
 1452         u_int cpuid = PCPU_GET(cpuid);
 1453 #endif
 1454         int retval;
 1455 
 1456 #ifdef MP_WATCHDOG
 1457         ap_watchdog(cpuid);
 1458 #endif
 1459 
 1460         retval = mask & hlt_cpus_mask;
 1461         while (mask & hlt_cpus_mask)
 1462                 __asm __volatile("sti; hlt" : : : "memory");
 1463         return (retval);
 1464 }
 1465 
 1466 #ifdef COUNT_IPIS
 1467 /*
 1468  * Setup interrupt counters for IPI handlers.
 1469  */
 1470 static void
 1471 mp_ipi_intrcnt(void *dummy)
 1472 {
 1473         char buf[64];
 1474         int i;
 1475 
 1476         for (i = 0; i < mp_maxid; i++) {
 1477                 if (CPU_ABSENT(i))
 1478                         continue;
 1479                 snprintf(buf, sizeof(buf), "cpu%d: invltlb", i);
 1480                 intrcnt_add(buf, &ipi_invltlb_counts[i]);
 1481                 snprintf(buf, sizeof(buf), "cpu%d: invlrng", i);
 1482                 intrcnt_add(buf, &ipi_invlrng_counts[i]);
 1483                 snprintf(buf, sizeof(buf), "cpu%d: invlpg", i);
 1484                 intrcnt_add(buf, &ipi_invlpg_counts[i]);
 1485 #ifdef IPI_PREEMPTION
 1486                 snprintf(buf, sizeof(buf), "cpu%d: preempt", i);
 1487                 intrcnt_add(buf, &ipi_preempt_counts[i]);
 1488 #endif
 1489                 snprintf(buf, sizeof(buf), "cpu%d: ast", i);
 1490                 intrcnt_add(buf, &ipi_ast_counts[i]);
 1491                 snprintf(buf, sizeof(buf), "cpu%d: rendezvous", i);
 1492                 intrcnt_add(buf, &ipi_rendezvous_counts[i]);
 1493                 snprintf(buf, sizeof(buf), "cpu%d: lazypmap", i);
 1494                 intrcnt_add(buf, &ipi_lazypmap_counts[i]);
 1495         }               
 1496 }
 1497 SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL)
 1498 #endif

Cache object: 8e11dc21d28a7147be60f1bb9ffcf3e1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.