The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/mp_machdep.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1996, by Steve Passe
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. The name of the developer may NOT be used to endorse or promote products
   11  *    derived from this software without specific prior written permission.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  */
   25 
   26 #include <sys/cdefs.h>
   27 __FBSDID("$FreeBSD: releng/6.0/sys/i386/i386/mp_machdep.c 150932 2005-10-04 15:15:22Z jhb $");
   28 
   29 #include "opt_apic.h"
   30 #include "opt_cpu.h"
   31 #include "opt_kdb.h"
   32 #include "opt_kstack_pages.h"
   33 #include "opt_mp_watchdog.h"
   34 #include "opt_sched.h"
   35 
   36 #if !defined(lint)
   37 #if !defined(SMP)
   38 #error How did you get here?
   39 #endif
   40 
   41 #ifndef DEV_APIC
   42 #error The apic device is required for SMP, add "device apic" to your config file.
   43 #endif
   44 #if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
   45 #error SMP not supported with CPU_DISABLE_CMPXCHG
   46 #endif
   47 #endif /* not lint */
   48 
   49 #include <sys/param.h>
   50 #include <sys/systm.h>
   51 #include <sys/bus.h>
   52 #include <sys/cons.h>   /* cngetc() */
   53 #ifdef GPROF 
   54 #include <sys/gmon.h>
   55 #endif
   56 #include <sys/kernel.h>
   57 #include <sys/ktr.h>
   58 #include <sys/lock.h>
   59 #include <sys/malloc.h>
   60 #include <sys/memrange.h>
   61 #include <sys/mutex.h>
   62 #include <sys/pcpu.h>
   63 #include <sys/proc.h>
   64 #include <sys/smp.h>
   65 #include <sys/sysctl.h>
   66 
   67 #include <vm/vm.h>
   68 #include <vm/vm_param.h>
   69 #include <vm/pmap.h>
   70 #include <vm/vm_kern.h>
   71 #include <vm/vm_extern.h>
   72 
   73 #include <machine/apicreg.h>
   74 #include <machine/clock.h>
   75 #include <machine/md_var.h>
   76 #include <machine/mp_watchdog.h>
   77 #include <machine/pcb.h>
   78 #include <machine/smp.h>
   79 #include <machine/smptests.h>   /** COUNT_XINVLTLB_HITS */
   80 #include <machine/specialreg.h>
   81 #include <machine/privatespace.h>
   82 
   83 #define WARMBOOT_TARGET         0
   84 #define WARMBOOT_OFF            (KERNBASE + 0x0467)
   85 #define WARMBOOT_SEG            (KERNBASE + 0x0469)
   86 
   87 #define CMOS_REG                (0x70)
   88 #define CMOS_DATA               (0x71)
   89 #define BIOS_RESET              (0x0f)
   90 #define BIOS_WARM               (0x0a)
   91 
   92 /*
   93  * this code MUST be enabled here and in mpboot.s.
   94  * it follows the very early stages of AP boot by placing values in CMOS ram.
   95  * it NORMALLY will never be needed and thus the primitive method for enabling.
   96  *
   97 #define CHECK_POINTS
   98  */
   99 
  100 #if defined(CHECK_POINTS) && !defined(PC98)
  101 #define CHECK_READ(A)    (outb(CMOS_REG, (A)), inb(CMOS_DATA))
  102 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
  103 
  104 #define CHECK_INIT(D);                          \
  105         CHECK_WRITE(0x34, (D));                 \
  106         CHECK_WRITE(0x35, (D));                 \
  107         CHECK_WRITE(0x36, (D));                 \
  108         CHECK_WRITE(0x37, (D));                 \
  109         CHECK_WRITE(0x38, (D));                 \
  110         CHECK_WRITE(0x39, (D));
  111 
  112 #define CHECK_PRINT(S);                         \
  113         printf("%s: %d, %d, %d, %d, %d, %d\n",  \
  114            (S),                                 \
  115            CHECK_READ(0x34),                    \
  116            CHECK_READ(0x35),                    \
  117            CHECK_READ(0x36),                    \
  118            CHECK_READ(0x37),                    \
  119            CHECK_READ(0x38),                    \
  120            CHECK_READ(0x39));
  121 
  122 #else                           /* CHECK_POINTS */
  123 
  124 #define CHECK_INIT(D)
  125 #define CHECK_PRINT(S)
  126 #define CHECK_WRITE(A, D)
  127 
  128 #endif                          /* CHECK_POINTS */
  129 
  130 /*
  131  * Values to send to the POST hardware.
  132  */
  133 #define MP_BOOTADDRESS_POST     0x10
  134 #define MP_PROBE_POST           0x11
  135 #define MPTABLE_PASS1_POST      0x12
  136 
  137 #define MP_START_POST           0x13
  138 #define MP_ENABLE_POST          0x14
  139 #define MPTABLE_PASS2_POST      0x15
  140 
  141 #define START_ALL_APS_POST      0x16
  142 #define INSTALL_AP_TRAMP_POST   0x17
  143 #define START_AP_POST           0x18
  144 
  145 #define MP_ANNOUNCE_POST        0x19
  146 
  147 /* lock region used by kernel profiling */
  148 int     mcount_lock;
  149 
  150 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
  151 int     current_postcode;
  152 
  153 int     mp_naps;                /* # of Applications processors */
  154 int     boot_cpu_id = -1;       /* designated BSP */
  155 extern  int nkpt;
  156 
  157 /*
  158  * CPU topology map datastructures for HTT.
  159  */
  160 static struct cpu_group mp_groups[MAXCPU];
  161 static struct cpu_top mp_top;
  162 
  163 /* AP uses this during bootstrap.  Do not staticize.  */
  164 char *bootSTK;
  165 static int bootAP;
  166 
  167 /* Hotwire a 0->4MB V==P mapping */
  168 extern pt_entry_t *KPTphys;
  169 
  170 /* SMP page table page */
  171 extern pt_entry_t *SMPpt;
  172 
  173 struct pcb stoppcbs[MAXCPU];
  174 
  175 /* Variables needed for SMP tlb shootdown. */
  176 vm_offset_t smp_tlb_addr1;
  177 vm_offset_t smp_tlb_addr2;
  178 volatile int smp_tlb_wait;
  179 
  180 #ifdef KDB_STOP_NMI
  181 volatile cpumask_t ipi_nmi_pending;
  182 #endif 
  183 
  184 #ifdef COUNT_IPIS
  185 /* Interrupt counts. */
  186 #ifdef IPI_PREEMPTION
  187 static u_long *ipi_preempt_counts[MAXCPU];
  188 #endif
  189 static u_long *ipi_ast_counts[MAXCPU];
  190 u_long *ipi_invltlb_counts[MAXCPU];
  191 u_long *ipi_invlrng_counts[MAXCPU];
  192 u_long *ipi_invlpg_counts[MAXCPU];
  193 u_long *ipi_rendezvous_counts[MAXCPU];
  194 u_long *ipi_lazypmap_counts[MAXCPU];
  195 #endif
  196 
  197 /*
  198  * Local data and functions.
  199  */
  200 
  201 static u_int logical_cpus;
  202 
  203 /* used to hold the AP's until we are ready to release them */
  204 static struct mtx ap_boot_mtx;
  205 
  206 /* Set to 1 once we're ready to let the APs out of the pen. */
  207 static volatile int aps_ready = 0;
  208 
  209 /*
  210  * Store data from cpu_add() until later in the boot when we actually setup
  211  * the APs.
  212  */
  213 struct cpu_info {
  214         int     cpu_present:1;
  215         int     cpu_bsp:1;
  216         int     cpu_disabled:1;
  217 } static cpu_info[MAXCPU];
  218 static int cpu_apic_ids[MAXCPU];
  219 
  220 /* Holds pending bitmap based IPIs per CPU */
  221 static volatile u_int cpu_ipi_pending[MAXCPU];
  222 
  223 static u_int boot_address;
  224 
  225 static void     set_logical_apic_ids(void);
  226 static int      start_all_aps(void);
  227 static void     install_ap_tramp(void);
  228 static int      start_ap(int apic_id);
  229 static void     release_aps(void *dummy);
  230 
  231 static int      hlt_logical_cpus;
  232 static u_int    hyperthreading_cpus;
  233 static cpumask_t        hyperthreading_cpus_mask;
  234 static int      hyperthreading_allowed;
  235 static struct   sysctl_ctx_list logical_cpu_clist;
  236 
  237 static void
  238 mem_range_AP_init(void)
  239 {
  240         if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
  241                 mem_range_softc.mr_op->initAP(&mem_range_softc);
  242 }
  243 
  244 void
  245 mp_topology(void)
  246 {
  247         struct cpu_group *group;
  248         int logical_cpus;
  249         int apic_id;
  250         int groups;
  251         int cpu;
  252 
  253         /* Build the smp_topology map. */
  254         /* Nothing to do if there is no HTT support. */
  255         if ((cpu_feature & CPUID_HTT) == 0)
  256                 return;
  257         logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
  258         if (logical_cpus <= 1)
  259                 return;
  260         group = &mp_groups[0];
  261         groups = 1;
  262         for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
  263                 if (!cpu_info[apic_id].cpu_present)
  264                         continue;
  265                 /*
  266                  * If the current group has members and we're not a logical
  267                  * cpu, create a new group.
  268                  */
  269                 if (group->cg_count != 0 && (apic_id % logical_cpus) == 0) {
  270                         group++;
  271                         groups++;
  272                 }
  273                 group->cg_count++;
  274                 group->cg_mask |= 1 << cpu;
  275                 cpu++;
  276         }
  277 
  278         mp_top.ct_count = groups;
  279         mp_top.ct_group = mp_groups;
  280         smp_topology = &mp_top;
  281 }
  282 
  283 
  284 /*
  285  * Calculate usable address in base memory for AP trampoline code.
  286  */
  287 u_int
  288 mp_bootaddress(u_int basemem)
  289 {
  290         POSTCODE(MP_BOOTADDRESS_POST);
  291 
  292         boot_address = trunc_page(basemem);     /* round down to 4k boundary */
  293         if ((basemem - boot_address) < bootMP_size)
  294                 boot_address -= PAGE_SIZE;      /* not enough, lower by 4k */
  295 
  296         return boot_address;
  297 }
  298 
  299 void
  300 cpu_add(u_int apic_id, char boot_cpu)
  301 {
  302 
  303         if (apic_id >= MAXCPU) {
  304                 printf("SMP: CPU %d exceeds maximum CPU %d, ignoring\n",
  305                     apic_id, MAXCPU - 1);
  306                 return;
  307         }
  308         KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
  309             apic_id));
  310         cpu_info[apic_id].cpu_present = 1;
  311         if (boot_cpu) {
  312                 KASSERT(boot_cpu_id == -1,
  313                     ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
  314                     boot_cpu_id));
  315                 boot_cpu_id = apic_id;
  316                 cpu_info[apic_id].cpu_bsp = 1;
  317         }
  318         mp_ncpus++;
  319         if (bootverbose)
  320                 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
  321                     "AP");
  322         
  323 }
  324 
  325 void
  326 cpu_mp_setmaxid(void)
  327 {
  328 
  329         mp_maxid = MAXCPU - 1;
  330 }
  331 
  332 int
  333 cpu_mp_probe(void)
  334 {
  335 
  336         /*
  337          * Always record BSP in CPU map so that the mbuf init code works
  338          * correctly.
  339          */
  340         all_cpus = 1;
  341         if (mp_ncpus == 0) {
  342                 /*
  343                  * No CPUs were found, so this must be a UP system.  Setup
  344                  * the variables to represent a system with a single CPU
  345                  * with an id of 0.
  346                  */
  347                 mp_ncpus = 1;
  348                 return (0);
  349         }
  350 
  351         /* At least one CPU was found. */
  352         if (mp_ncpus == 1) {
  353                 /*
  354                  * One CPU was found, so this must be a UP system with
  355                  * an I/O APIC.
  356                  */
  357                 return (0);
  358         }
  359 
  360         /* At least two CPUs were found. */
  361         return (1);
  362 }
  363 
  364 /*
  365  * Initialize the IPI handlers and start up the AP's.
  366  */
  367 void
  368 cpu_mp_start(void)
  369 {
  370         int i;
  371         u_int threads_per_cache, p[4];
  372 
  373         POSTCODE(MP_START_POST);
  374 
  375         /* Initialize the logical ID to APIC ID table. */
  376         for (i = 0; i < MAXCPU; i++) {
  377                 cpu_apic_ids[i] = -1;
  378                 cpu_ipi_pending[i] = 0;
  379         }
  380 
  381         /* Install an inter-CPU IPI for TLB invalidation */
  382         setidt(IPI_INVLTLB, IDTVEC(invltlb),
  383                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  384         setidt(IPI_INVLPG, IDTVEC(invlpg),
  385                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  386         setidt(IPI_INVLRNG, IDTVEC(invlrng),
  387                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  388         
  389         /* Install an inter-CPU IPI for lazy pmap release */
  390         setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
  391                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  392 
  393         /* Install an inter-CPU IPI for all-CPU rendezvous */
  394         setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
  395                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  396 
  397         /* Install generic inter-CPU IPI handler */
  398         setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
  399                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  400 
  401         /* Install an inter-CPU IPI for CPU stop/restart */
  402         setidt(IPI_STOP, IDTVEC(cpustop),
  403                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  404 
  405 
  406         /* Set boot_cpu_id if needed. */
  407         if (boot_cpu_id == -1) {
  408                 boot_cpu_id = PCPU_GET(apic_id);
  409                 cpu_info[boot_cpu_id].cpu_bsp = 1;
  410         } else
  411                 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
  412                     ("BSP's APIC ID doesn't match boot_cpu_id"));
  413         cpu_apic_ids[0] = boot_cpu_id;
  414 
  415         /* Start each Application Processor */
  416         start_all_aps();
  417 
  418         /* Setup the initial logical CPUs info. */
  419         logical_cpus = logical_cpus_mask = 0;
  420         if (cpu_feature & CPUID_HTT)
  421                 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
  422 
  423         /*
  424          * Work out if hyperthreading is *really* enabled.  This
  425          * is made really ugly by the fact that processors lie: Dual
  426          * core processors claim to be hyperthreaded even when they're
  427          * not, presumably because they want to be treated the same
  428          * way as HTT with respect to per-cpu software licensing.
  429          * At the time of writing (May 12, 2005) the only hyperthreaded
  430          * cpus are from Intel, and Intel's dual-core processors can be
  431          * identified via the "deterministic cache parameters" cpuid
  432          * calls.
  433          */
  434         /*
  435          * First determine if this is an Intel processor which claims
  436          * to have hyperthreading support.
  437          */
  438         if ((cpu_feature & CPUID_HTT) &&
  439             (strcmp(cpu_vendor, "GenuineIntel") == 0)) {
  440                 /*
  441                  * If the "deterministic cache parameters" cpuid calls
  442                  * are available, use them.
  443                  */
  444                 if (cpu_high >= 4) {
  445                         /* Ask the processor about up to 32 caches. */
  446                         for (i = 0; i < 32; i++) {
  447                                 cpuid_count(4, i, p);
  448                                 threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1;
  449                                 if (hyperthreading_cpus < threads_per_cache)
  450                                         hyperthreading_cpus = threads_per_cache;
  451                                 if ((p[0] & 0x1f) == 0)
  452                                         break;
  453                         }
  454                 }
  455 
  456                 /*
  457                  * If the deterministic cache parameters are not
  458                  * available, or if no caches were reported to exist,
  459                  * just accept what the HTT flag indicated.
  460                  */
  461                 if (hyperthreading_cpus == 0)
  462                         hyperthreading_cpus = logical_cpus;
  463         }
  464 
  465         set_logical_apic_ids();
  466 }
  467 
  468 
  469 /*
  470  * Print various information about the SMP system hardware and setup.
  471  */
  472 void
  473 cpu_mp_announce(void)
  474 {
  475         int i, x;
  476 
  477         POSTCODE(MP_ANNOUNCE_POST);
  478 
  479         /* List CPUs */
  480         printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
  481         for (i = 1, x = 0; x < MAXCPU; x++) {
  482                 if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
  483                         continue;
  484                 if (cpu_info[x].cpu_disabled)
  485                         printf("  cpu (AP): APIC ID: %2d (disabled)\n", x);
  486                 else {
  487                         KASSERT(i < mp_ncpus,
  488                             ("mp_ncpus and actual cpus are out of whack"));
  489                         printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
  490                 }
  491         }
  492 }
  493 
  494 /*
  495  * AP CPU's call this to initialize themselves.
  496  */
  497 void
  498 init_secondary(void)
  499 {
  500         vm_offset_t addr;
  501         int     gsel_tss;
  502         int     x, myid;
  503         u_int   cr0;
  504 
  505         /* bootAP is set in start_ap() to our ID. */
  506         myid = bootAP;
  507         gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
  508         gdt_segs[GPROC0_SEL].ssd_base =
  509                 (int) &SMP_prvspace[myid].pcpu.pc_common_tss;
  510         SMP_prvspace[myid].pcpu.pc_prvspace =
  511                 &SMP_prvspace[myid].pcpu;
  512 
  513         for (x = 0; x < NGDT; x++) {
  514                 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
  515         }
  516 
  517         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
  518         r_gdt.rd_base = (int) &gdt[myid * NGDT];
  519         lgdt(&r_gdt);                   /* does magic intra-segment return */
  520 
  521         lidt(&r_idt);
  522 
  523         lldt(_default_ldt);
  524         PCPU_SET(currentldt, _default_ldt);
  525 
  526         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
  527         gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
  528         PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
  529         PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
  530         PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
  531         PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
  532         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
  533         ltr(gsel_tss);
  534 
  535         PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd);
  536 
  537         /*
  538          * Set to a known state:
  539          * Set by mpboot.s: CR0_PG, CR0_PE
  540          * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
  541          */
  542         cr0 = rcr0();
  543         cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
  544         load_cr0(cr0);
  545         CHECK_WRITE(0x38, 5);
  546         
  547         /* Disable local APIC just to be sure. */
  548         lapic_disable();
  549 
  550         /* signal our startup to the BSP. */
  551         mp_naps++;
  552         CHECK_WRITE(0x39, 6);
  553 
  554         /* Spin until the BSP releases the AP's. */
  555         while (!aps_ready)
  556                 ia32_pause();
  557 
  558         /* BSP may have changed PTD while we were waiting */
  559         invltlb();
  560         for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
  561                 invlpg(addr);
  562 
  563 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
  564         lidt(&r_idt);
  565 #endif
  566 
  567         /* set up CPU registers and state */
  568         cpu_setregs();
  569 
  570         /* set up FPU state on the AP */
  571         npxinit(__INITIAL_NPXCW__);
  572 
  573         /* set up SSE registers */
  574         enable_sse();
  575 
  576         /* A quick check from sanity claus */
  577         if (PCPU_GET(apic_id) != lapic_id()) {
  578                 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
  579                 printf("SMP: actual apic_id = %d\n", lapic_id());
  580                 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
  581                 printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]);
  582                 panic("cpuid mismatch! boom!!");
  583         }
  584 
  585         /* Initialize curthread. */
  586         KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
  587         PCPU_SET(curthread, PCPU_GET(idlethread));
  588 
  589         mtx_lock_spin(&ap_boot_mtx);
  590 
  591         /* Init local apic for irq's */
  592         lapic_setup();
  593 
  594         /* Set memory range attributes for this CPU to match the BSP */
  595         mem_range_AP_init();
  596 
  597         smp_cpus++;
  598 
  599         CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
  600         printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
  601 
  602         /* Determine if we are a logical CPU. */
  603         if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
  604                 logical_cpus_mask |= PCPU_GET(cpumask);
  605         
  606         /* Determine if we are a hyperthread. */
  607         if (hyperthreading_cpus > 1 &&
  608             PCPU_GET(apic_id) % hyperthreading_cpus != 0)
  609                 hyperthreading_cpus_mask |= PCPU_GET(cpumask);
  610 
  611         /* Build our map of 'other' CPUs. */
  612         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  613 
  614         if (bootverbose)
  615                 lapic_dump("AP");
  616 
  617         if (smp_cpus == mp_ncpus) {
  618                 /* enable IPI's, tlb shootdown, freezes etc */
  619                 atomic_store_rel_int(&smp_started, 1);
  620                 smp_active = 1;  /* historic */
  621         }
  622 
  623         mtx_unlock_spin(&ap_boot_mtx);
  624 
  625         /* wait until all the AP's are up */
  626         while (smp_started == 0)
  627                 ia32_pause();
  628 
  629         /* ok, now grab sched_lock and enter the scheduler */
  630         mtx_lock_spin(&sched_lock);
  631 
  632         /*
  633          * Correct spinlock nesting.  The idle thread context that we are
  634          * borrowing was created so that it would start out with a single
  635          * spin lock (sched_lock) held in fork_trampoline().  Since we've
  636          * explicitly acquired locks in this function, the nesting count
  637          * is now 2 rather than 1.  Since we are nested, calling
  638          * spinlock_exit() will simply adjust the counts without allowing
  639          * spin lock using code to interrupt us.
  640          */
  641         spinlock_exit();
  642         KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
  643 
  644         binuptime(PCPU_PTR(switchtime));
  645         PCPU_SET(switchticks, ticks);
  646 
  647         cpu_throw(NULL, choosethread());        /* doesn't return */
  648 
  649         panic("scheduler returned us to %s", __func__);
  650         /* NOTREACHED */
  651 }
  652 
  653 /*******************************************************************
  654  * local functions and data
  655  */
  656 
  657 /*
  658  * Set the APIC logical IDs.
  659  *
  660  * We want to cluster logical CPU's within the same APIC ID cluster.
  661  * Since logical CPU's are aligned simply filling in the clusters in
  662  * APIC ID order works fine.  Note that this does not try to balance
  663  * the number of CPU's in each cluster. (XXX?)
  664  */
  665 static void
  666 set_logical_apic_ids(void)
  667 {
  668         u_int apic_id, cluster, cluster_id;
  669 
  670         /* Force us to allocate cluster 0 at the start. */
  671         cluster = -1;
  672         cluster_id = APIC_MAX_INTRACLUSTER_ID;
  673         for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
  674                 if (!cpu_info[apic_id].cpu_present)
  675                         continue;
  676                 if (cluster_id == APIC_MAX_INTRACLUSTER_ID) {
  677                         cluster = ioapic_next_logical_cluster();
  678                         cluster_id = 0;
  679                 } else
  680                         cluster_id++;
  681                 if (bootverbose)
  682                         printf("APIC ID: physical %u, logical %u:%u\n",
  683                             apic_id, cluster, cluster_id);
  684                 lapic_set_logical_id(apic_id, cluster, cluster_id);
  685         }
  686 }
  687 
  688 /*
  689  * start each AP in our list
  690  */
  691 static int
  692 start_all_aps(void)
  693 {
  694 #ifndef PC98
  695         u_char mpbiosreason;
  696 #endif
  697         struct pcpu *pc;
  698         char *stack;
  699         uintptr_t kptbase;
  700         u_int32_t mpbioswarmvec;
  701         int apic_id, cpu, i, pg;
  702 
  703         POSTCODE(START_ALL_APS_POST);
  704 
  705         mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
  706 
  707         /* install the AP 1st level boot code */
  708         install_ap_tramp();
  709 
  710         /* save the current value of the warm-start vector */
  711         mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
  712 #ifndef PC98
  713         outb(CMOS_REG, BIOS_RESET);
  714         mpbiosreason = inb(CMOS_DATA);
  715 #endif
  716 
  717         /* set up temporary P==V mapping for AP boot */
  718         /* XXX this is a hack, we should boot the AP on its own stack/PTD */
  719         kptbase = (uintptr_t)(void *)KPTphys;
  720         for (i = 0; i < NKPT; i++)
  721                 PTD[i] = (pd_entry_t)(PG_V | PG_RW |
  722                     ((kptbase + i * PAGE_SIZE) & PG_FRAME));
  723         invltlb();
  724 
  725         /* start each AP */
  726         for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
  727 
  728                 /* Ignore non-existent CPUs and the BSP. */
  729                 if (!cpu_info[apic_id].cpu_present ||
  730                     cpu_info[apic_id].cpu_bsp)
  731                         continue;
  732 
  733                 /* Don't use this CPU if it has been disabled by a tunable. */
  734                 if (resource_disabled("lapic", apic_id)) {
  735                         cpu_info[apic_id].cpu_disabled = 1;
  736                         mp_ncpus--;
  737                         continue;
  738                 }
  739 
  740                 cpu++;
  741 
  742                 /* save APIC ID for this logical ID */
  743                 cpu_apic_ids[cpu] = apic_id;
  744 
  745                 /* first page of AP's private space */
  746                 pg = cpu * i386_btop(sizeof(struct privatespace));
  747 
  748                 /* allocate a new private data page */
  749                 pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
  750 
  751                 /* wire it into the private page table page */
  752                 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
  753 
  754                 /* allocate and set up an idle stack data page */
  755                 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
  756                 for (i = 0; i < KSTACK_PAGES; i++)
  757                         SMPpt[pg + 1 + i] = (pt_entry_t)
  758                             (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
  759 
  760                 /* prime data page for it to use */
  761                 pcpu_init(pc, cpu, sizeof(struct pcpu));
  762                 pc->pc_apic_id = apic_id;
  763 
  764                 /* setup a vector to our boot code */
  765                 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
  766                 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
  767 #ifndef PC98
  768                 outb(CMOS_REG, BIOS_RESET);
  769                 outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
  770 #endif
  771 
  772                 bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
  773                     PAGE_SIZE];
  774                 bootAP = cpu;
  775 
  776                 /* attempt to start the Application Processor */
  777                 CHECK_INIT(99); /* setup checkpoints */
  778                 if (!start_ap(apic_id)) {
  779                         printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
  780                         CHECK_PRINT("trace");   /* show checkpoints */
  781                         /* better panic as the AP may be running loose */
  782                         printf("panic y/n? [y] ");
  783                         if (cngetc() != 'n')
  784                                 panic("bye-bye");
  785                 }
  786                 CHECK_PRINT("trace");           /* show checkpoints */
  787 
  788                 all_cpus |= (1 << cpu);         /* record AP in CPU map */
  789         }
  790 
  791         /* build our map of 'other' CPUs */
  792         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  793 
  794         /* restore the warmstart vector */
  795         *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
  796 
  797 #ifndef PC98
  798         outb(CMOS_REG, BIOS_RESET);
  799         outb(CMOS_DATA, mpbiosreason);
  800 #endif
  801 
  802         /*
  803          * Set up the idle context for the BSP.  Similar to above except
  804          * that some was done by locore, some by pmap.c and some is implicit
  805          * because the BSP is cpu#0 and the page is initially zero and also
  806          * because we can refer to variables by name on the BSP..
  807          */
  808 
  809         /* Allocate and setup BSP idle stack */
  810         stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
  811         for (i = 0; i < KSTACK_PAGES; i++)
  812                 SMPpt[1 + i] = (pt_entry_t)
  813                     (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
  814 
  815         for (i = 0; i < NKPT; i++)
  816                 PTD[i] = 0;
  817         pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
  818 
  819         /* number of APs actually started */
  820         return mp_naps;
  821 }
  822 
  823 /*
  824  * load the 1st level AP boot code into base memory.
  825  */
  826 
  827 /* targets for relocation */
  828 extern void bigJump(void);
  829 extern void bootCodeSeg(void);
  830 extern void bootDataSeg(void);
  831 extern void MPentry(void);
  832 extern u_int MP_GDT;
  833 extern u_int mp_gdtbase;
  834 
  835 static void
  836 install_ap_tramp(void)
  837 {
  838         int     x;
  839         int     size = *(int *) ((u_long) & bootMP_size);
  840         vm_offset_t va = boot_address + KERNBASE;
  841         u_char *src = (u_char *) ((u_long) bootMP);
  842         u_char *dst = (u_char *) va;
  843         u_int   boot_base = (u_int) bootMP;
  844         u_int8_t *dst8;
  845         u_int16_t *dst16;
  846         u_int32_t *dst32;
  847 
  848         POSTCODE(INSTALL_AP_TRAMP_POST);
  849 
  850         KASSERT (size <= PAGE_SIZE,
  851             ("'size' do not fit into PAGE_SIZE, as expected."));
  852         pmap_kenter(va, boot_address);
  853         pmap_invalidate_page (kernel_pmap, va);
  854         for (x = 0; x < size; ++x)
  855                 *dst++ = *src++;
  856 
  857         /*
  858          * modify addresses in code we just moved to basemem. unfortunately we
  859          * need fairly detailed info about mpboot.s for this to work.  changes
  860          * to mpboot.s might require changes here.
  861          */
  862 
  863         /* boot code is located in KERNEL space */
  864         dst = (u_char *) va;
  865 
  866         /* modify the lgdt arg */
  867         dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
  868         *dst32 = boot_address + ((u_int) & MP_GDT - boot_base);
  869 
  870         /* modify the ljmp target for MPentry() */
  871         dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
  872         *dst32 = ((u_int) MPentry - KERNBASE);
  873 
  874         /* modify the target for boot code segment */
  875         dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
  876         dst8 = (u_int8_t *) (dst16 + 1);
  877         *dst16 = (u_int) boot_address & 0xffff;
  878         *dst8 = ((u_int) boot_address >> 16) & 0xff;
  879 
  880         /* modify the target for boot data segment */
  881         dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
  882         dst8 = (u_int8_t *) (dst16 + 1);
  883         *dst16 = (u_int) boot_address & 0xffff;
  884         *dst8 = ((u_int) boot_address >> 16) & 0xff;
  885 }
  886 
  887 /*
  888  * This function starts the AP (application processor) identified
  889  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  890  * to accomplish this.  This is necessary because of the nuances
  891  * of the different hardware we might encounter.  It isn't pretty,
  892  * but it seems to work.
  893  */
  894 static int
  895 start_ap(int apic_id)
  896 {
  897         int vector, ms;
  898         int cpus;
  899 
  900         POSTCODE(START_AP_POST);
  901 
  902         /* calculate the vector */
  903         vector = (boot_address >> 12) & 0xff;
  904 
  905         /* used as a watchpoint to signal AP startup */
  906         cpus = mp_naps;
  907 
  908         /*
  909          * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
  910          * and running the target CPU. OR this INIT IPI might be latched (P5
  911          * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
  912          * ignored.
  913          */
  914 
  915         /* do an INIT IPI: assert RESET */
  916         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  917             APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
  918 
  919         /* wait for pending status end */
  920         lapic_ipi_wait(-1);
  921 
  922         /* do an INIT IPI: deassert RESET */
  923         lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
  924             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
  925 
  926         /* wait for pending status end */
  927         DELAY(10000);           /* wait ~10mS */
  928         lapic_ipi_wait(-1);
  929 
  930         /*
  931          * next we do a STARTUP IPI: the previous INIT IPI might still be
  932          * latched, (P5 bug) this 1st STARTUP would then terminate
  933          * immediately, and the previously started INIT IPI would continue. OR
  934          * the previous INIT IPI has already run. and this STARTUP IPI will
  935          * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
  936          * will run.
  937          */
  938 
  939         /* do a STARTUP IPI */
  940         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  941             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  942             vector, apic_id);
  943         lapic_ipi_wait(-1);
  944         DELAY(200);             /* wait ~200uS */
  945 
  946         /*
  947          * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
  948          * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
  949          * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
  950          * recognized after hardware RESET or INIT IPI.
  951          */
  952 
  953         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  954             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  955             vector, apic_id);
  956         lapic_ipi_wait(-1);
  957         DELAY(200);             /* wait ~200uS */
  958 
  959         /* Wait up to 5 seconds for it to start. */
  960         for (ms = 0; ms < 5000; ms++) {
  961                 if (mp_naps > cpus)
  962                         return 1;       /* return SUCCESS */
  963                 DELAY(1000);
  964         }
  965         return 0;               /* return FAILURE */
  966 }
  967 
  968 #ifdef COUNT_XINVLTLB_HITS
  969 u_int xhits_gbl[MAXCPU];
  970 u_int xhits_pg[MAXCPU];
  971 u_int xhits_rng[MAXCPU];
  972 SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
  973 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
  974     sizeof(xhits_gbl), "IU", "");
  975 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
  976     sizeof(xhits_pg), "IU", "");
  977 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
  978     sizeof(xhits_rng), "IU", "");
  979 
  980 u_int ipi_global;
  981 u_int ipi_page;
  982 u_int ipi_range;
  983 u_int ipi_range_size;
  984 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
  985 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
  986 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
  987 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
  988     0, "");
  989 
  990 u_int ipi_masked_global;
  991 u_int ipi_masked_page;
  992 u_int ipi_masked_range;
  993 u_int ipi_masked_range_size;
  994 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
  995     &ipi_masked_global, 0, "");
  996 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
  997     &ipi_masked_page, 0, "");
  998 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
  999     &ipi_masked_range, 0, "");
 1000 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
 1001     &ipi_masked_range_size, 0, "");
 1002 #endif /* COUNT_XINVLTLB_HITS */
 1003 
 1004 /*
 1005  * Flush the TLB on all other CPU's
 1006  */
 1007 static void
 1008 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 1009 {
 1010         u_int ncpu;
 1011 
 1012         ncpu = mp_ncpus - 1;    /* does not shootdown self */
 1013         if (ncpu < 1)
 1014                 return;         /* no other cpus */
 1015         mtx_assert(&smp_ipi_mtx, MA_OWNED);
 1016         smp_tlb_addr1 = addr1;
 1017         smp_tlb_addr2 = addr2;
 1018         atomic_store_rel_int(&smp_tlb_wait, 0);
 1019         ipi_all_but_self(vector);
 1020         while (smp_tlb_wait < ncpu)
 1021                 ia32_pause();
 1022 }
 1023 
 1024 static void
 1025 smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 1026 {
 1027         int ncpu, othercpus;
 1028 
 1029         othercpus = mp_ncpus - 1;
 1030         if (mask == (u_int)-1) {
 1031                 ncpu = othercpus;
 1032                 if (ncpu < 1)
 1033                         return;
 1034         } else {
 1035                 mask &= ~PCPU_GET(cpumask);
 1036                 if (mask == 0)
 1037                         return;
 1038                 ncpu = bitcount32(mask);
 1039                 if (ncpu > othercpus) {
 1040                         /* XXX this should be a panic offence */
 1041                         printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
 1042                             ncpu, othercpus);
 1043                         ncpu = othercpus;
 1044                 }
 1045                 /* XXX should be a panic, implied by mask == 0 above */
 1046                 if (ncpu < 1)
 1047                         return;
 1048         }
 1049         mtx_assert(&smp_ipi_mtx, MA_OWNED);
 1050         smp_tlb_addr1 = addr1;
 1051         smp_tlb_addr2 = addr2;
 1052         atomic_store_rel_int(&smp_tlb_wait, 0);
 1053         if (mask == (u_int)-1)
 1054                 ipi_all_but_self(vector);
 1055         else
 1056                 ipi_selected(mask, vector);
 1057         while (smp_tlb_wait < ncpu)
 1058                 ia32_pause();
 1059 }
 1060 
 1061 void
 1062 smp_invltlb(void)
 1063 {
 1064 
 1065         if (smp_started) {
 1066                 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
 1067 #ifdef COUNT_XINVLTLB_HITS
 1068                 ipi_global++;
 1069 #endif
 1070         }
 1071 }
 1072 
 1073 void
 1074 smp_invlpg(vm_offset_t addr)
 1075 {
 1076 
 1077         if (smp_started) {
 1078                 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
 1079 #ifdef COUNT_XINVLTLB_HITS
 1080                 ipi_page++;
 1081 #endif
 1082         }
 1083 }
 1084 
 1085 void
 1086 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 1087 {
 1088 
 1089         if (smp_started) {
 1090                 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
 1091 #ifdef COUNT_XINVLTLB_HITS
 1092                 ipi_range++;
 1093                 ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
 1094 #endif
 1095         }
 1096 }
 1097 
 1098 void
 1099 smp_masked_invltlb(u_int mask)
 1100 {
 1101 
 1102         if (smp_started) {
 1103                 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
 1104 #ifdef COUNT_XINVLTLB_HITS
 1105                 ipi_masked_global++;
 1106 #endif
 1107         }
 1108 }
 1109 
 1110 void
 1111 smp_masked_invlpg(u_int mask, vm_offset_t addr)
 1112 {
 1113 
 1114         if (smp_started) {
 1115                 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
 1116 #ifdef COUNT_XINVLTLB_HITS
 1117                 ipi_masked_page++;
 1118 #endif
 1119         }
 1120 }
 1121 
 1122 void
 1123 smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
 1124 {
 1125 
 1126         if (smp_started) {
 1127                 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
 1128 #ifdef COUNT_XINVLTLB_HITS
 1129                 ipi_masked_range++;
 1130                 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
 1131 #endif
 1132         }
 1133 }
 1134 
 1135 
 1136 void
 1137 ipi_bitmap_handler(struct clockframe frame)
 1138 {
 1139         int cpu = PCPU_GET(cpuid);
 1140         u_int ipi_bitmap;
 1141 
 1142         ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
 1143 
 1144 #ifdef IPI_PREEMPTION
 1145         if (ipi_bitmap & IPI_PREEMPT) {
 1146 #ifdef COUNT_IPIS
 1147                 *ipi_preempt_counts[cpu]++;
 1148 #endif
 1149                 mtx_lock_spin(&sched_lock);
 1150                 /* Don't preempt the idle thread */
 1151                 if (curthread->td_priority <  PRI_MIN_IDLE) {
 1152                         struct thread *running_thread = curthread;
 1153                         if (running_thread->td_critnest > 1) 
 1154                                 running_thread->td_owepreempt = 1;
 1155                         else            
 1156                                 mi_switch(SW_INVOL | SW_PREEMPT, NULL);
 1157                 }
 1158                 mtx_unlock_spin(&sched_lock);
 1159         }
 1160 #endif
 1161 
 1162         if (ipi_bitmap & IPI_AST) {
 1163 #ifdef COUNT_IPIS
 1164                 *ipi_ast_counts[cpu]++;
 1165 #endif
 1166                 /* Nothing to do for AST */
 1167         }
 1168 }
 1169 
 1170 /*
 1171  * send an IPI to a set of cpus.
 1172  */
 1173 void
 1174 ipi_selected(u_int32_t cpus, u_int ipi)
 1175 {
 1176         int cpu;
 1177         u_int bitmap = 0;
 1178         u_int old_pending;
 1179         u_int new_pending;
 1180 
 1181         if (IPI_IS_BITMAPED(ipi)) { 
 1182                 bitmap = 1 << ipi;
 1183                 ipi = IPI_BITMAP_VECTOR;
 1184         }
 1185 
 1186         CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
 1187         while ((cpu = ffs(cpus)) != 0) {
 1188                 cpu--;
 1189                 cpus &= ~(1 << cpu);
 1190 
 1191                 KASSERT(cpu_apic_ids[cpu] != -1,
 1192                     ("IPI to non-existent CPU %d", cpu));
 1193 
 1194                 if (bitmap) {
 1195                         do {
 1196                                 old_pending = cpu_ipi_pending[cpu];
 1197                                 new_pending = old_pending | bitmap;
 1198                         } while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending));  
 1199 
 1200                         if (old_pending)
 1201                                 continue;
 1202                 }
 1203 
 1204                 lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
 1205         }
 1206 
 1207 }
 1208 
 1209 /*
 1210  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
 1211  */
 1212 void
 1213 ipi_all(u_int ipi)
 1214 {
 1215 
 1216         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1217         lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
 1218 }
 1219 
 1220 /*
 1221  * send an IPI to all CPUs EXCEPT myself
 1222  */
 1223 void
 1224 ipi_all_but_self(u_int ipi)
 1225 {
 1226 
 1227         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1228         lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 1229 }
 1230 
 1231 /*
 1232  * send an IPI to myself
 1233  */
 1234 void
 1235 ipi_self(u_int ipi)
 1236 {
 1237 
 1238         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1239         lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
 1240 }
 1241 
 1242 #ifdef KDB_STOP_NMI
 1243 /*
 1244  * send NMI IPI to selected CPUs
 1245  */
 1246 
 1247 #define BEFORE_SPIN     1000000
 1248 
 1249 void
 1250 ipi_nmi_selected(u_int32_t cpus)
 1251 {
 1252 
 1253         int cpu;
 1254         register_t icrlo;
 1255 
 1256         icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 
 1257                 | APIC_TRIGMOD_EDGE; 
 1258         
 1259         CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus);
 1260 
 1261 
 1262         atomic_set_int(&ipi_nmi_pending, cpus);
 1263 
 1264 
 1265         while ((cpu = ffs(cpus)) != 0) {
 1266                 cpu--;
 1267                 cpus &= ~(1 << cpu);
 1268 
 1269                 KASSERT(cpu_apic_ids[cpu] != -1,
 1270                     ("IPI NMI to non-existent CPU %d", cpu));
 1271                 
 1272                 /* Wait for an earlier IPI to finish. */
 1273                 if (!lapic_ipi_wait(BEFORE_SPIN))
 1274                         panic("ipi_nmi_selected: previous IPI has not cleared");
 1275 
 1276                 lapic_ipi_raw(icrlo,cpu_apic_ids[cpu]);
 1277         }
 1278 }
 1279 
 1280 
 1281 int
 1282 ipi_nmi_handler()
 1283 {
 1284         int cpu  = PCPU_GET(cpuid);
 1285 
 1286         if(!(atomic_load_acq_int(&ipi_nmi_pending) & (1 << cpu)))
 1287                 return 1;
 1288 
 1289         atomic_clear_int(&ipi_nmi_pending,1 << cpu);
 1290 
 1291         savectx(&stoppcbs[cpu]);
 1292 
 1293         /* Indicate that we are stopped */
 1294         atomic_set_int(&stopped_cpus,1 << cpu);
 1295 
 1296 
 1297         /* Wait for restart */
 1298         while(!(atomic_load_acq_int(&started_cpus) & (1 << cpu)))
 1299             ia32_pause();
 1300 
 1301         atomic_clear_int(&started_cpus,1 << cpu);
 1302         atomic_clear_int(&stopped_cpus,1 << cpu);
 1303 
 1304         if(cpu == 0 && cpustop_restartfunc != NULL)
 1305                 cpustop_restartfunc();
 1306 
 1307         return 0;
 1308 }
 1309      
 1310 #endif /* KDB_STOP_NMI */
 1311 
 1312 /*
 1313  * This is called once the rest of the system is up and running and we're
 1314  * ready to let the AP's out of the pen.
 1315  */
 1316 static void
 1317 release_aps(void *dummy __unused)
 1318 {
 1319 
 1320         if (mp_ncpus == 1) 
 1321                 return;
 1322         mtx_lock_spin(&sched_lock);
 1323         atomic_store_rel_int(&aps_ready, 1);
 1324         while (smp_started == 0)
 1325                 ia32_pause();
 1326         mtx_unlock_spin(&sched_lock);
 1327 }
 1328 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 1329 
 1330 static int
 1331 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
 1332 {
 1333         u_int mask;
 1334         int error;
 1335 
 1336         mask = hlt_cpus_mask;
 1337         error = sysctl_handle_int(oidp, &mask, 0, req);
 1338         if (error || !req->newptr)
 1339                 return (error);
 1340 
 1341         if (logical_cpus_mask != 0 &&
 1342             (mask & logical_cpus_mask) == logical_cpus_mask)
 1343                 hlt_logical_cpus = 1;
 1344         else
 1345                 hlt_logical_cpus = 0;
 1346 
 1347         if (! hyperthreading_allowed)
 1348                 mask |= hyperthreading_cpus_mask;
 1349 
 1350         if ((mask & all_cpus) == all_cpus)
 1351                 mask &= ~(1<<0);
 1352         hlt_cpus_mask = mask;
 1353         return (error);
 1354 }
 1355 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
 1356     0, 0, sysctl_hlt_cpus, "IU",
 1357     "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
 1358 
 1359 static int
 1360 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 1361 {
 1362         int disable, error;
 1363 
 1364         disable = hlt_logical_cpus;
 1365         error = sysctl_handle_int(oidp, &disable, 0, req);
 1366         if (error || !req->newptr)
 1367                 return (error);
 1368 
 1369         if (disable)
 1370                 hlt_cpus_mask |= logical_cpus_mask;
 1371         else
 1372                 hlt_cpus_mask &= ~logical_cpus_mask;
 1373 
 1374         if (! hyperthreading_allowed)
 1375                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1376 
 1377         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1378                 hlt_cpus_mask &= ~(1<<0);
 1379 
 1380         hlt_logical_cpus = disable;
 1381         return (error);
 1382 }
 1383 
 1384 static int
 1385 sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
 1386 {
 1387         int allowed, error;
 1388 
 1389         allowed = hyperthreading_allowed;
 1390         error = sysctl_handle_int(oidp, &allowed, 0, req);
 1391         if (error || !req->newptr)
 1392                 return (error);
 1393 
 1394         if (allowed)
 1395                 hlt_cpus_mask &= ~hyperthreading_cpus_mask;
 1396         else
 1397                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1398 
 1399         if (logical_cpus_mask != 0 &&
 1400             (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
 1401                 hlt_logical_cpus = 1;
 1402         else
 1403                 hlt_logical_cpus = 0;
 1404 
 1405         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1406                 hlt_cpus_mask &= ~(1<<0);
 1407 
 1408         hyperthreading_allowed = allowed;
 1409         return (error);
 1410 }
 1411 
 1412 static void
 1413 cpu_hlt_setup(void *dummy __unused)
 1414 {
 1415 
 1416         if (logical_cpus_mask != 0) {
 1417                 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 1418                     &hlt_logical_cpus);
 1419                 sysctl_ctx_init(&logical_cpu_clist);
 1420                 SYSCTL_ADD_PROC(&logical_cpu_clist,
 1421                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1422                     "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 1423                     sysctl_hlt_logical_cpus, "IU", "");
 1424                 SYSCTL_ADD_UINT(&logical_cpu_clist,
 1425                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1426                     "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
 1427                     &logical_cpus_mask, 0, "");
 1428 
 1429                 if (hlt_logical_cpus)
 1430                         hlt_cpus_mask |= logical_cpus_mask;
 1431 
 1432                 /*
 1433                  * If necessary for security purposes, force
 1434                  * hyperthreading off, regardless of the value
 1435                  * of hlt_logical_cpus.
 1436                  */
 1437                 if (hyperthreading_cpus_mask) {
 1438                         TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
 1439                             &hyperthreading_allowed);
 1440                         SYSCTL_ADD_PROC(&logical_cpu_clist,
 1441                             SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1442                             "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
 1443                             0, 0, sysctl_hyperthreading_allowed, "IU", "");
 1444                         if (! hyperthreading_allowed)
 1445                                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 1446                 }
 1447         }
 1448 }
 1449 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 1450 
 1451 int
 1452 mp_grab_cpu_hlt(void)
 1453 {
 1454         u_int mask = PCPU_GET(cpumask);
 1455 #ifdef MP_WATCHDOG
 1456         u_int cpuid = PCPU_GET(cpuid);
 1457 #endif
 1458         int retval;
 1459 
 1460 #ifdef MP_WATCHDOG
 1461         ap_watchdog(cpuid);
 1462 #endif
 1463 
 1464         retval = mask & hlt_cpus_mask;
 1465         while (mask & hlt_cpus_mask)
 1466                 __asm __volatile("sti; hlt" : : : "memory");
 1467         return (retval);
 1468 }
 1469 
 1470 #ifdef COUNT_IPIS
 1471 /*
 1472  * Setup interrupt counters for IPI handlers.
 1473  */
 1474 static void
 1475 mp_ipi_intrcnt(void *dummy)
 1476 {
 1477         char buf[64];
 1478         int i;
 1479 
 1480         for (i = 0; i < mp_maxid; i++) {
 1481                 if (CPU_ABSENT(i))
 1482                         continue;
 1483                 snprintf(buf, sizeof(buf), "cpu%d: invltlb", i);
 1484                 intrcnt_add(buf, &ipi_invltlb_counts[i]);
 1485                 snprintf(buf, sizeof(buf), "cpu%d: invlrng", i);
 1486                 intrcnt_add(buf, &ipi_invlrng_counts[i]);
 1487                 snprintf(buf, sizeof(buf), "cpu%d: invlpg", i);
 1488                 intrcnt_add(buf, &ipi_invlpg_counts[i]);
 1489 #ifdef IPI_PREEMPTION
 1490                 snprintf(buf, sizeof(buf), "cpu%d: preempt", i);
 1491                 intrcnt_add(buf, &ipi_preempt_counts[i]);
 1492 #endif
 1493                 snprintf(buf, sizeof(buf), "cpu%d: ast", i);
 1494                 intrcnt_add(buf, &ipi_ast_counts[i]);
 1495                 snprintf(buf, sizeof(buf), "cpu%d: rendezvous", i);
 1496                 intrcnt_add(buf, &ipi_rendezvous_counts[i]);
 1497                 snprintf(buf, sizeof(buf), "cpu%d: lazypmap", i);
 1498                 intrcnt_add(buf, &ipi_lazypmap_counts[i]);
 1499         }               
 1500 }
 1501 SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL)
 1502 #endif

Cache object: e452af6c86c8ff13d48cfc0ba7d235f1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.