The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/mp_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1996, by Steve Passe
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. The name of the developer may NOT be used to endorse or promote products
   11  *    derived from this software without specific prior written permission.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  */
   25 
   26 #include <sys/cdefs.h>
   27 __FBSDID("$FreeBSD: releng/5.2/sys/i386/i386/mp_machdep.c 123126 2003-12-03 14:57:26Z jhb $");
   28 
   29 #include "opt_apic.h"
   30 #include "opt_cpu.h"
   31 #include "opt_kstack_pages.h"
   32 
   33 #if !defined(lint)
   34 #if !defined(SMP)
   35 #error How did you get here?
   36 #endif
   37 
   38 #if defined(I386_CPU) && !defined(COMPILING_LINT)
   39 #error SMP not supported with I386_CPU
   40 #endif
   41 #ifndef DEV_APIC
   42 #error The apic device is required for SMP, add "device apic" to your config file.
   43 #endif
   44 #if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
   45 #error SMP not supported with CPU_DISABLE_CMPXCHG
   46 #endif
   47 #endif /* not lint */
   48 
   49 #include <sys/param.h>
   50 #include <sys/systm.h>
   51 #include <sys/bus.h>
   52 #include <sys/cons.h>   /* cngetc() */
   53 #ifdef GPROF 
   54 #include <sys/gmon.h>
   55 #endif
   56 #include <sys/kernel.h>
   57 #include <sys/ktr.h>
   58 #include <sys/lock.h>
   59 #include <sys/malloc.h>
   60 #include <sys/memrange.h>
   61 #include <sys/mutex.h>
   62 #include <sys/pcpu.h>
   63 #include <sys/proc.h>
   64 #include <sys/smp.h>
   65 #include <sys/sysctl.h>
   66 
   67 #include <vm/vm.h>
   68 #include <vm/vm_param.h>
   69 #include <vm/pmap.h>
   70 #include <vm/vm_kern.h>
   71 #include <vm/vm_extern.h>
   72 
   73 #include <machine/apicreg.h>
   74 #include <machine/clock.h>
   75 #include <machine/md_var.h>
   76 #include <machine/pcb.h>
   77 #include <machine/smp.h>
   78 #include <machine/smptests.h>   /** COUNT_XINVLTLB_HITS */
   79 #include <machine/specialreg.h>
   80 #include <machine/privatespace.h>
   81 
   82 #define WARMBOOT_TARGET         0
   83 #define WARMBOOT_OFF            (KERNBASE + 0x0467)
   84 #define WARMBOOT_SEG            (KERNBASE + 0x0469)
   85 
   86 #define CMOS_REG                (0x70)
   87 #define CMOS_DATA               (0x71)
   88 #define BIOS_RESET              (0x0f)
   89 #define BIOS_WARM               (0x0a)
   90 
   91 /*
   92  * this code MUST be enabled here and in mpboot.s.
   93  * it follows the very early stages of AP boot by placing values in CMOS ram.
   94  * it NORMALLY will never be needed and thus the primitive method for enabling.
   95  *
   96 #define CHECK_POINTS
   97  */
   98 
   99 #if defined(CHECK_POINTS) && !defined(PC98)
  100 #define CHECK_READ(A)    (outb(CMOS_REG, (A)), inb(CMOS_DATA))
  101 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
  102 
  103 #define CHECK_INIT(D);                          \
  104         CHECK_WRITE(0x34, (D));                 \
  105         CHECK_WRITE(0x35, (D));                 \
  106         CHECK_WRITE(0x36, (D));                 \
  107         CHECK_WRITE(0x37, (D));                 \
  108         CHECK_WRITE(0x38, (D));                 \
  109         CHECK_WRITE(0x39, (D));
  110 
  111 #define CHECK_PRINT(S);                         \
  112         printf("%s: %d, %d, %d, %d, %d, %d\n",  \
  113            (S),                                 \
  114            CHECK_READ(0x34),                    \
  115            CHECK_READ(0x35),                    \
  116            CHECK_READ(0x36),                    \
  117            CHECK_READ(0x37),                    \
  118            CHECK_READ(0x38),                    \
  119            CHECK_READ(0x39));
  120 
  121 #else                           /* CHECK_POINTS */
  122 
  123 #define CHECK_INIT(D)
  124 #define CHECK_PRINT(S)
  125 #define CHECK_WRITE(A, D)
  126 
  127 #endif                          /* CHECK_POINTS */
  128 
  129 /*
  130  * Values to send to the POST hardware.
  131  */
  132 #define MP_BOOTADDRESS_POST     0x10
  133 #define MP_PROBE_POST           0x11
  134 #define MPTABLE_PASS1_POST      0x12
  135 
  136 #define MP_START_POST           0x13
  137 #define MP_ENABLE_POST          0x14
  138 #define MPTABLE_PASS2_POST      0x15
  139 
  140 #define START_ALL_APS_POST      0x16
  141 #define INSTALL_AP_TRAMP_POST   0x17
  142 #define START_AP_POST           0x18
  143 
  144 #define MP_ANNOUNCE_POST        0x19
  145 
  146 /* lock region used by kernel profiling */
  147 int     mcount_lock;
  148 
  149 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
  150 int     current_postcode;
  151 
  152 int     mp_naps;                /* # of Applications processors */
  153 int     boot_cpu_id = -1;       /* designated BSP */
  154 extern  int nkpt;
  155 
  156 /*
  157  * CPU topology map datastructures for HTT. (XXX)
  158  */
  159 struct cpu_group mp_groups[MAXCPU];
  160 struct cpu_top mp_top;
  161 struct cpu_top *smp_topology;
  162 
  163 /* AP uses this during bootstrap.  Do not staticize.  */
  164 char *bootSTK;
  165 static int bootAP;
  166 
  167 /* Hotwire a 0->4MB V==P mapping */
  168 extern pt_entry_t *KPTphys;
  169 
  170 /* SMP page table page */
  171 extern pt_entry_t *SMPpt;
  172 
  173 struct pcb stoppcbs[MAXCPU];
  174 
  175 /* Variables needed for SMP tlb shootdown. */
  176 vm_offset_t smp_tlb_addr1;
  177 vm_offset_t smp_tlb_addr2;
  178 volatile int smp_tlb_wait;
  179 struct mtx smp_tlb_mtx;
  180 
  181 /*
  182  * Local data and functions.
  183  */
  184 
  185 static u_int logical_cpus;
  186 static u_int logical_cpus_mask;
  187 
  188 /* used to hold the AP's until we are ready to release them */
  189 static struct mtx ap_boot_mtx;
  190 
  191 /* Set to 1 once we're ready to let the APs out of the pen. */
  192 static volatile int aps_ready = 0;
  193 
  194 /*
  195  * Store data from cpu_add() until later in the boot when we actually setup
  196  * the APs.
  197  */
  198 struct cpu_info {
  199         int     cpu_present:1;
  200         int     cpu_bsp:1;
  201 } static cpu_info[MAXCPU];
  202 static int cpu_apic_ids[MAXCPU];
  203 
  204 static u_int boot_address;
  205 
  206 static void     set_logical_apic_ids(void);
  207 static int      start_all_aps(void);
  208 static void     install_ap_tramp(void);
  209 static int      start_ap(int apic_id);
  210 static void     release_aps(void *dummy);
  211 
  212 static int      hlt_cpus_mask;
  213 static int      hlt_logical_cpus;
  214 static struct   sysctl_ctx_list logical_cpu_clist;
  215 
  216 /*
  217  * Calculate usable address in base memory for AP trampoline code.
  218  */
  219 u_int
  220 mp_bootaddress(u_int basemem)
  221 {
  222         POSTCODE(MP_BOOTADDRESS_POST);
  223 
  224         boot_address = trunc_page(basemem);     /* round down to 4k boundary */
  225         if ((basemem - boot_address) < bootMP_size)
  226                 boot_address -= PAGE_SIZE;      /* not enough, lower by 4k */
  227 
  228         return boot_address;
  229 }
  230 
  231 void
  232 cpu_add(u_int apic_id, char boot_cpu)
  233 {
  234 
  235         if (apic_id > MAXCPU) {
  236                 printf("SMP: CPU %d exceeds maximum CPU %d, ignoring\n",
  237                     apic_id, MAXCPU);
  238                 return;
  239         }
  240         KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
  241             apic_id));
  242         cpu_info[apic_id].cpu_present = 1;
  243         if (boot_cpu) {
  244                 KASSERT(boot_cpu_id == -1,
  245                     ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
  246                     boot_cpu_id));
  247                 boot_cpu_id = apic_id;
  248                 cpu_info[apic_id].cpu_bsp = 1;
  249         }
  250         mp_ncpus++;
  251         if (bootverbose)
  252                 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
  253                     "AP");
  254         
  255 }
  256 
  257 void
  258 cpu_mp_setmaxid(void)
  259 {
  260 
  261         mp_maxid = MAXCPU - 1;
  262 }
  263 
  264 int
  265 cpu_mp_probe(void)
  266 {
  267 
  268         /*
  269          * Always record BSP in CPU map so that the mbuf init code works
  270          * correctly.
  271          */
  272         all_cpus = 1;
  273         if (mp_ncpus == 0) {
  274                 /*
  275                  * No CPUs were found, so this must be a UP system.  Setup
  276                  * the variables to represent a system with a single CPU
  277                  * with an id of 0.
  278                  */
  279                 mp_ncpus = 1;
  280                 return (0);
  281         }
  282 
  283         /* At least one CPU was found. */
  284         if (mp_ncpus == 1) {
  285                 /*
  286                  * One CPU was found, so this must be a UP system with
  287                  * an I/O APIC.
  288                  */
  289                 return (0);
  290         }
  291 
  292         /* At least two CPUs were found. */
  293         return (1);
  294 }
  295 
  296 /*
  297  * Initialize the IPI handlers and start up the AP's.
  298  */
  299 void
  300 cpu_mp_start(void)
  301 {
  302         int i;
  303 
  304         POSTCODE(MP_START_POST);
  305 
  306         /* Initialize the logical ID to APIC ID table. */
  307         for (i = 0; i < MAXCPU; i++)
  308                 cpu_apic_ids[i] = -1;
  309 
  310         /* Install an inter-CPU IPI for TLB invalidation */
  311         setidt(IPI_INVLTLB, IDTVEC(invltlb),
  312                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  313         setidt(IPI_INVLPG, IDTVEC(invlpg),
  314                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  315         setidt(IPI_INVLRNG, IDTVEC(invlrng),
  316                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  317 
  318         /* Install an inter-CPU IPI for forwarding hardclock() */
  319         setidt(IPI_HARDCLOCK, IDTVEC(hardclock),
  320                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  321         
  322         /* Install an inter-CPU IPI for forwarding statclock() */
  323         setidt(IPI_STATCLOCK, IDTVEC(statclock),
  324                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  325         
  326         /* Install an inter-CPU IPI for lazy pmap release */
  327         setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
  328                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  329 
  330         /* Install an inter-CPU IPI for all-CPU rendezvous */
  331         setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
  332                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  333 
  334         /* Install an inter-CPU IPI for forcing an additional software trap */
  335         setidt(IPI_AST, IDTVEC(cpuast),
  336                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  337 
  338         /* Install an inter-CPU IPI for CPU stop/restart */
  339         setidt(IPI_STOP, IDTVEC(cpustop),
  340                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  341 
  342         mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
  343 
  344         /* Set boot_cpu_id if needed. */
  345         if (boot_cpu_id == -1) {
  346                 boot_cpu_id = PCPU_GET(apic_id);
  347                 cpu_info[boot_cpu_id].cpu_bsp = 1;
  348         } else
  349                 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
  350                     ("BSP's APIC ID doesn't match boot_cpu_id"));
  351         cpu_apic_ids[0] = boot_cpu_id;
  352 
  353         /* Start each Application Processor */
  354         start_all_aps();
  355 
  356         /* Setup the initial logical CPUs info. */
  357         logical_cpus = logical_cpus_mask = 0;
  358         if (cpu_feature & CPUID_HTT)
  359                 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
  360 
  361         set_logical_apic_ids();
  362 }
  363 
  364 
  365 /*
  366  * Print various information about the SMP system hardware and setup.
  367  */
  368 void
  369 cpu_mp_announce(void)
  370 {
  371         int i, x;
  372 
  373         POSTCODE(MP_ANNOUNCE_POST);
  374 
  375         /* List CPUs */
  376         printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
  377         for (i = 1, x = 0; x < MAXCPU; x++) {
  378                 if (cpu_info[x].cpu_present && !cpu_info[x].cpu_bsp) {
  379                         KASSERT(i < mp_ncpus,
  380                             ("mp_ncpus and actual cpus are out of whack"));
  381                         printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
  382                 }
  383         }
  384 }
  385 
  386 /*
  387  * AP CPU's call this to initialize themselves.
  388  */
  389 void
  390 init_secondary(void)
  391 {
  392         int     gsel_tss;
  393         int     x, myid;
  394         u_int   cr0;
  395 
  396         /* bootAP is set in start_ap() to our ID. */
  397         myid = bootAP;
  398         gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
  399         gdt_segs[GPROC0_SEL].ssd_base =
  400                 (int) &SMP_prvspace[myid].pcpu.pc_common_tss;
  401         SMP_prvspace[myid].pcpu.pc_prvspace =
  402                 &SMP_prvspace[myid].pcpu;
  403 
  404         for (x = 0; x < NGDT; x++) {
  405                 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
  406         }
  407 
  408         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
  409         r_gdt.rd_base = (int) &gdt[myid * NGDT];
  410         lgdt(&r_gdt);                   /* does magic intra-segment return */
  411 
  412         lidt(&r_idt);
  413 
  414         lldt(_default_ldt);
  415         PCPU_SET(currentldt, _default_ldt);
  416 
  417         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
  418         gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
  419         PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
  420         PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
  421         PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
  422         PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
  423         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
  424         ltr(gsel_tss);
  425 
  426         /*
  427          * Set to a known state:
  428          * Set by mpboot.s: CR0_PG, CR0_PE
  429          * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
  430          */
  431         cr0 = rcr0();
  432         cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
  433         load_cr0(cr0);
  434         CHECK_WRITE(0x38, 5);
  435         
  436         /* Disable local APIC just to be sure. */
  437         lapic_disable();
  438 
  439         /* signal our startup to the BSP. */
  440         mp_naps++;
  441         CHECK_WRITE(0x39, 6);
  442 
  443         /* Spin until the BSP releases the AP's. */
  444         while (!aps_ready)
  445                 ia32_pause();
  446 
  447         /* BSP may have changed PTD while we were waiting */
  448         invltlb();
  449         pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
  450 
  451 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
  452         lidt(&r_idt);
  453 #endif
  454 
  455         /* set up CPU registers and state */
  456         cpu_setregs();
  457 
  458         /* set up FPU state on the AP */
  459         npxinit(__INITIAL_NPXCW__);
  460 
  461         /* set up SSE registers */
  462         enable_sse();
  463 
  464         /* A quick check from sanity claus */
  465         if (PCPU_GET(apic_id) != lapic_id()) {
  466                 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
  467                 printf("SMP: actual apic_id = %d\n", lapic_id());
  468                 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
  469                 printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]);
  470                 panic("cpuid mismatch! boom!!");
  471         }
  472 
  473         mtx_lock_spin(&ap_boot_mtx);
  474 
  475         /* Init local apic for irq's */
  476         lapic_setup();
  477 
  478         /* Set memory range attributes for this CPU to match the BSP */
  479         mem_range_AP_init();
  480 
  481         smp_cpus++;
  482 
  483         CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
  484         printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
  485 
  486         /* Determine if we are a logical CPU. */
  487         if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
  488                 logical_cpus_mask |= PCPU_GET(cpumask);
  489         
  490         /* Build our map of 'other' CPUs. */
  491         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  492 
  493         if (bootverbose)
  494                 lapic_dump("AP");
  495 
  496         if (smp_cpus == mp_ncpus) {
  497                 /* enable IPI's, tlb shootdown, freezes etc */
  498                 atomic_store_rel_int(&smp_started, 1);
  499                 smp_active = 1;  /* historic */
  500         }
  501 
  502         mtx_unlock_spin(&ap_boot_mtx);
  503 
  504         /* wait until all the AP's are up */
  505         while (smp_started == 0)
  506                 ia32_pause();
  507 
  508         /* ok, now grab sched_lock and enter the scheduler */
  509         mtx_lock_spin(&sched_lock);
  510 
  511         binuptime(PCPU_PTR(switchtime));
  512         PCPU_SET(switchticks, ticks);
  513 
  514         cpu_throw(NULL, choosethread());        /* doesn't return */
  515 
  516         panic("scheduler returned us to %s", __func__);
  517         /* NOTREACHED */
  518 }
  519 
  520 /*******************************************************************
  521  * local functions and data
  522  */
  523 
  524 /*
  525  * Set the APIC logical IDs.
  526  *
  527  * We want to cluster logical CPU's within the same APIC ID cluster.
  528  * Since logical CPU's are aligned simply filling in the clusters in
  529  * APIC ID order works fine.  Note that this does not try to balance
  530  * the number of CPU's in each cluster. (XXX?)
  531  */
  532 static void
  533 set_logical_apic_ids(void)
  534 {
  535         u_int apic_id, cluster, cluster_id;
  536 
  537         /* Force us to allocate cluster 0 at the start. */
  538         cluster = -1;
  539         cluster_id = APIC_MAX_INTRACLUSTER_ID;
  540         for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
  541                 if (!cpu_info[apic_id].cpu_present)
  542                         continue;
  543                 if (cluster_id == APIC_MAX_INTRACLUSTER_ID) {
  544                         cluster = ioapic_next_logical_cluster();
  545                         cluster_id = 0;
  546                 } else
  547                         cluster_id++;
  548                 if (bootverbose)
  549                         printf("APIC ID: physical %u, logical %u:%u\n",
  550                             apic_id, cluster, cluster_id);
  551                 lapic_set_logical_id(apic_id, cluster, cluster_id);
  552         }
  553 }
  554 
  555 /*
  556  * start each AP in our list
  557  */
  558 static int
  559 start_all_aps(void)
  560 {
  561 #ifndef PC98
  562         u_char mpbiosreason;
  563 #endif
  564         u_long mpbioswarmvec;
  565         struct pcpu *pc;
  566         char *stack;
  567         uintptr_t kptbase;
  568         int i, pg, apic_id, cpu;
  569 
  570         POSTCODE(START_ALL_APS_POST);
  571 
  572         mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
  573 
  574         /* install the AP 1st level boot code */
  575         install_ap_tramp();
  576 
  577         /* save the current value of the warm-start vector */
  578         mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
  579 #ifndef PC98
  580         outb(CMOS_REG, BIOS_RESET);
  581         mpbiosreason = inb(CMOS_DATA);
  582 #endif
  583 
  584         /* set up temporary P==V mapping for AP boot */
  585         /* XXX this is a hack, we should boot the AP on its own stack/PTD */
  586         kptbase = (uintptr_t)(void *)KPTphys;
  587         for (i = 0; i < NKPT; i++)
  588                 PTD[i] = (pd_entry_t)(PG_V | PG_RW |
  589                     ((kptbase + i * PAGE_SIZE) & PG_FRAME));
  590         invltlb();
  591 
  592         /* start each AP */
  593         for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
  594                 if (!cpu_info[apic_id].cpu_present ||
  595                     cpu_info[apic_id].cpu_bsp)
  596                         continue;
  597                 cpu++;
  598 
  599                 /* save APIC ID for this logical ID */
  600                 cpu_apic_ids[cpu] = apic_id;
  601 
  602                 /* first page of AP's private space */
  603                 pg = cpu * i386_btop(sizeof(struct privatespace));
  604 
  605                 /* allocate a new private data page */
  606                 pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
  607 
  608                 /* wire it into the private page table page */
  609                 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
  610 
  611                 /* allocate and set up an idle stack data page */
  612                 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
  613                 for (i = 0; i < KSTACK_PAGES; i++)
  614                         SMPpt[pg + 1 + i] = (pt_entry_t)
  615                             (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
  616 
  617                 /* prime data page for it to use */
  618                 pcpu_init(pc, cpu, sizeof(struct pcpu));
  619                 pc->pc_apic_id = apic_id;
  620 
  621                 /* setup a vector to our boot code */
  622                 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
  623                 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
  624 #ifndef PC98
  625                 outb(CMOS_REG, BIOS_RESET);
  626                 outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
  627 #endif
  628 
  629                 bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
  630                     PAGE_SIZE];
  631                 bootAP = cpu;
  632 
  633                 /* attempt to start the Application Processor */
  634                 CHECK_INIT(99); /* setup checkpoints */
  635                 if (!start_ap(apic_id)) {
  636                         printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
  637                         CHECK_PRINT("trace");   /* show checkpoints */
  638                         /* better panic as the AP may be running loose */
  639                         printf("panic y/n? [y] ");
  640                         if (cngetc() != 'n')
  641                                 panic("bye-bye");
  642                 }
  643                 CHECK_PRINT("trace");           /* show checkpoints */
  644 
  645                 all_cpus |= (1 << cpu);         /* record AP in CPU map */
  646         }
  647 
  648         /* build our map of 'other' CPUs */
  649         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
  650 
  651         /* restore the warmstart vector */
  652         *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
  653 #ifndef PC98
  654         outb(CMOS_REG, BIOS_RESET);
  655         outb(CMOS_DATA, mpbiosreason);
  656 #endif
  657 
  658         /*
  659          * Set up the idle context for the BSP.  Similar to above except
  660          * that some was done by locore, some by pmap.c and some is implicit
  661          * because the BSP is cpu#0 and the page is initially zero and also
  662          * because we can refer to variables by name on the BSP..
  663          */
  664 
  665         /* Allocate and setup BSP idle stack */
  666         stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
  667         for (i = 0; i < KSTACK_PAGES; i++)
  668                 SMPpt[1 + i] = (pt_entry_t)
  669                     (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
  670 
  671         for (i = 0; i < NKPT; i++)
  672                 PTD[i] = 0;
  673         pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
  674 
  675         /* number of APs actually started */
  676         return mp_naps;
  677 }
  678 
  679 /*
  680  * load the 1st level AP boot code into base memory.
  681  */
  682 
  683 /* targets for relocation */
  684 extern void bigJump(void);
  685 extern void bootCodeSeg(void);
  686 extern void bootDataSeg(void);
  687 extern void MPentry(void);
  688 extern u_int MP_GDT;
  689 extern u_int mp_gdtbase;
  690 
  691 static void
  692 install_ap_tramp(void)
  693 {
  694         int     x;
  695         int     size = *(int *) ((u_long) & bootMP_size);
  696         u_char *src = (u_char *) ((u_long) bootMP);
  697         u_char *dst = (u_char *) boot_address + KERNBASE;
  698         u_int   boot_base = (u_int) bootMP;
  699         u_int8_t *dst8;
  700         u_int16_t *dst16;
  701         u_int32_t *dst32;
  702 
  703         POSTCODE(INSTALL_AP_TRAMP_POST);
  704 
  705         pmap_kenter(boot_address + KERNBASE, boot_address);
  706         for (x = 0; x < size; ++x)
  707                 *dst++ = *src++;
  708 
  709         /*
  710          * modify addresses in code we just moved to basemem. unfortunately we
  711          * need fairly detailed info about mpboot.s for this to work.  changes
  712          * to mpboot.s might require changes here.
  713          */
  714 
  715         /* boot code is located in KERNEL space */
  716         dst = (u_char *) boot_address + KERNBASE;
  717 
  718         /* modify the lgdt arg */
  719         dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
  720         *dst32 = boot_address + ((u_int) & MP_GDT - boot_base);
  721 
  722         /* modify the ljmp target for MPentry() */
  723         dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
  724         *dst32 = ((u_int) MPentry - KERNBASE);
  725 
  726         /* modify the target for boot code segment */
  727         dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
  728         dst8 = (u_int8_t *) (dst16 + 1);
  729         *dst16 = (u_int) boot_address & 0xffff;
  730         *dst8 = ((u_int) boot_address >> 16) & 0xff;
  731 
  732         /* modify the target for boot data segment */
  733         dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
  734         dst8 = (u_int8_t *) (dst16 + 1);
  735         *dst16 = (u_int) boot_address & 0xffff;
  736         *dst8 = ((u_int) boot_address >> 16) & 0xff;
  737 }
  738 
  739 /*
  740  * This function starts the AP (application processor) identified
  741  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  742  * to accomplish this.  This is necessary because of the nuances
  743  * of the different hardware we might encounter.  It isn't pretty,
  744  * but it seems to work.
  745  */
  746 static int
  747 start_ap(int apic_id)
  748 {
  749         int vector, ms;
  750         int cpus;
  751 
  752         POSTCODE(START_AP_POST);
  753 
  754         /* calculate the vector */
  755         vector = (boot_address >> 12) & 0xff;
  756 
  757         /* used as a watchpoint to signal AP startup */
  758         cpus = mp_naps;
  759 
  760         /*
  761          * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
  762          * and running the target CPU. OR this INIT IPI might be latched (P5
  763          * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
  764          * ignored.
  765          */
  766 
  767         /* do an INIT IPI: assert RESET */
  768         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  769             APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
  770 
  771         /* wait for pending status end */
  772         lapic_ipi_wait(-1);
  773 
  774         /* do an INIT IPI: deassert RESET */
  775         lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
  776             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
  777 
  778         /* wait for pending status end */
  779         DELAY(10000);           /* wait ~10mS */
  780         lapic_ipi_wait(-1);
  781 
  782         /*
  783          * next we do a STARTUP IPI: the previous INIT IPI might still be
  784          * latched, (P5 bug) this 1st STARTUP would then terminate
  785          * immediately, and the previously started INIT IPI would continue. OR
  786          * the previous INIT IPI has already run. and this STARTUP IPI will
  787          * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
  788          * will run.
  789          */
  790 
  791         /* do a STARTUP IPI */
  792         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  793             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  794             vector, apic_id);
  795         lapic_ipi_wait(-1);
  796         DELAY(200);             /* wait ~200uS */
  797 
  798         /*
  799          * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
  800          * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
  801          * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
  802          * recognized after hardware RESET or INIT IPI.
  803          */
  804 
  805         lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
  806             APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
  807             vector, apic_id);
  808         lapic_ipi_wait(-1);
  809         DELAY(200);             /* wait ~200uS */
  810 
  811         /* Wait up to 5 seconds for it to start. */
  812         for (ms = 0; ms < 5000; ms++) {
  813                 if (mp_naps > cpus)
  814                         return 1;       /* return SUCCESS */
  815                 DELAY(1000);
  816         }
  817         return 0;               /* return FAILURE */
  818 }
  819 
  820 #ifdef COUNT_XINVLTLB_HITS
  821 u_int xhits_gbl[MAXCPU];
  822 u_int xhits_pg[MAXCPU];
  823 u_int xhits_rng[MAXCPU];
  824 SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
  825 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
  826     sizeof(xhits_gbl), "IU", "");
  827 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
  828     sizeof(xhits_pg), "IU", "");
  829 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
  830     sizeof(xhits_rng), "IU", "");
  831 
  832 u_int ipi_global;
  833 u_int ipi_page;
  834 u_int ipi_range;
  835 u_int ipi_range_size;
  836 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
  837 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
  838 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
  839 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
  840     0, "");
  841 
  842 u_int ipi_masked_global;
  843 u_int ipi_masked_page;
  844 u_int ipi_masked_range;
  845 u_int ipi_masked_range_size;
  846 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
  847     &ipi_masked_global, 0, "");
  848 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
  849     &ipi_masked_page, 0, "");
  850 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
  851     &ipi_masked_range, 0, "");
  852 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
  853     &ipi_masked_range_size, 0, "");
  854 #endif /* COUNT_XINVLTLB_HITS */
  855 
  856 /*
  857  * Flush the TLB on all other CPU's
  858  */
  859 static void
  860 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
  861 {
  862         u_int ncpu;
  863 
  864         ncpu = mp_ncpus - 1;    /* does not shootdown self */
  865         if (ncpu < 1)
  866                 return;         /* no other cpus */
  867         mtx_assert(&smp_tlb_mtx, MA_OWNED);
  868         smp_tlb_addr1 = addr1;
  869         smp_tlb_addr2 = addr2;
  870         atomic_store_rel_int(&smp_tlb_wait, 0);
  871         ipi_all_but_self(vector);
  872         while (smp_tlb_wait < ncpu)
  873                 ia32_pause();
  874 }
  875 
  876 /*
  877  * This is about as magic as it gets.  fortune(1) has got similar code
  878  * for reversing bits in a word.  Who thinks up this stuff??
  879  *
  880  * Yes, it does appear to be consistently faster than:
  881  * while (i = ffs(m)) {
  882  *      m >>= i;
  883  *      bits++;
  884  * }
  885  * and
  886  * while (lsb = (m & -m)) {     // This is magic too
  887  *      m &= ~lsb;              // or: m ^= lsb
  888  *      bits++;
  889  * }
  890  * Both of these latter forms do some very strange things on gcc-3.1 with
  891  * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
  892  * There is probably an SSE or MMX popcnt instruction.
  893  *
  894  * I wonder if this should be in libkern?
  895  *
  896  * XXX Stop the presses!  Another one:
  897  * static __inline u_int32_t
  898  * popcnt1(u_int32_t v)
  899  * {
  900  *      v -= ((v >> 1) & 0x55555555);
  901  *      v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
  902  *      v = (v + (v >> 4)) & 0x0F0F0F0F;
  903  *      return (v * 0x01010101) >> 24;
  904  * }
  905  * The downside is that it has a multiply.  With a pentium3 with
  906  * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
  907  * an imull, and in that case it is faster.  In most other cases
  908  * it appears slightly slower.
  909  *
  910  * Another variant (also from fortune):
  911  * #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
  912  * #define  BX_(x)     ((x) - (((x)>>1)&0x77777777)            \
  913  *                          - (((x)>>2)&0x33333333)            \
  914  *                          - (((x)>>3)&0x11111111))
  915  */
  916 static __inline u_int32_t
  917 popcnt(u_int32_t m)
  918 {
  919 
  920         m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
  921         m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
  922         m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
  923         m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
  924         m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
  925         return m;
  926 }
  927 
  928 static void
  929 smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
  930 {
  931         int ncpu, othercpus;
  932 
  933         othercpus = mp_ncpus - 1;
  934         if (mask == (u_int)-1) {
  935                 ncpu = othercpus;
  936                 if (ncpu < 1)
  937                         return;
  938         } else {
  939                 mask &= ~PCPU_GET(cpumask);
  940                 if (mask == 0)
  941                         return;
  942                 ncpu = popcnt(mask);
  943                 if (ncpu > othercpus) {
  944                         /* XXX this should be a panic offence */
  945                         printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
  946                             ncpu, othercpus);
  947                         ncpu = othercpus;
  948                 }
  949                 /* XXX should be a panic, implied by mask == 0 above */
  950                 if (ncpu < 1)
  951                         return;
  952         }
  953         mtx_assert(&smp_tlb_mtx, MA_OWNED);
  954         smp_tlb_addr1 = addr1;
  955         smp_tlb_addr2 = addr2;
  956         atomic_store_rel_int(&smp_tlb_wait, 0);
  957         if (mask == (u_int)-1)
  958                 ipi_all_but_self(vector);
  959         else
  960                 ipi_selected(mask, vector);
  961         while (smp_tlb_wait < ncpu)
  962                 ia32_pause();
  963 }
  964 
  965 void
  966 smp_invltlb(void)
  967 {
  968         if (smp_started) {
  969                 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
  970 #ifdef COUNT_XINVLTLB_HITS
  971                 ipi_global++;
  972 #endif
  973         }
  974 }
  975 
  976 void
  977 smp_invlpg(vm_offset_t addr)
  978 {
  979         if (smp_started) {
  980                 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
  981 #ifdef COUNT_XINVLTLB_HITS
  982                 ipi_page++;
  983 #endif
  984         }
  985 }
  986 
  987 void
  988 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
  989 {
  990         if (smp_started) {
  991                 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
  992 #ifdef COUNT_XINVLTLB_HITS
  993                 ipi_range++;
  994                 ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
  995 #endif
  996         }
  997 }
  998 
  999 void
 1000 smp_masked_invltlb(u_int mask)
 1001 {
 1002         if (smp_started) {
 1003                 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
 1004 #ifdef COUNT_XINVLTLB_HITS
 1005                 ipi_masked_global++;
 1006 #endif
 1007         }
 1008 }
 1009 
 1010 void
 1011 smp_masked_invlpg(u_int mask, vm_offset_t addr)
 1012 {
 1013         if (smp_started) {
 1014                 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
 1015 #ifdef COUNT_XINVLTLB_HITS
 1016                 ipi_masked_page++;
 1017 #endif
 1018         }
 1019 }
 1020 
 1021 void
 1022 smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
 1023 {
 1024         if (smp_started) {
 1025                 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
 1026 #ifdef COUNT_XINVLTLB_HITS
 1027                 ipi_masked_range++;
 1028                 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
 1029 #endif
 1030         }
 1031 }
 1032 
 1033 
 1034 /*
 1035  * For statclock, we send an IPI to all CPU's to have them call this
 1036  * function.
 1037  */
 1038 void
 1039 forwarded_statclock(struct clockframe frame)
 1040 {
 1041         struct thread *td;
 1042 
 1043         CTR0(KTR_SMP, "forwarded_statclock");
 1044         td = curthread;
 1045         td->td_intr_nesting_level++;
 1046         if (profprocs != 0)
 1047                 profclock(&frame);
 1048         if (pscnt == psdiv)
 1049                 statclock(&frame);
 1050         td->td_intr_nesting_level--;
 1051 }
 1052 
 1053 void
 1054 forward_statclock(void)
 1055 {
 1056         int map;
 1057 
 1058         CTR0(KTR_SMP, "forward_statclock");
 1059 
 1060         if (!smp_started || cold || panicstr)
 1061                 return;
 1062 
 1063         map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
 1064         if (map != 0)
 1065                 ipi_selected(map, IPI_STATCLOCK);
 1066 }
 1067 
 1068 /*
 1069  * For each hardclock(), we send an IPI to all other CPU's to have them
 1070  * execute this function.  It would be nice to reduce contention on
 1071  * sched_lock if we could simply peek at the CPU to determine the user/kernel
 1072  * state and call hardclock_process() on the CPU receiving the clock interrupt
 1073  * and then just use a simple IPI to handle any ast's if needed.
 1074  */
 1075 void
 1076 forwarded_hardclock(struct clockframe frame)
 1077 {
 1078         struct thread *td;
 1079 
 1080         CTR0(KTR_SMP, "forwarded_hardclock");
 1081         td = curthread;
 1082         td->td_intr_nesting_level++;
 1083         hardclock_process(&frame);
 1084         td->td_intr_nesting_level--;
 1085 }
 1086 
 1087 void 
 1088 forward_hardclock(void)
 1089 {
 1090         u_int map;
 1091 
 1092         CTR0(KTR_SMP, "forward_hardclock");
 1093 
 1094         if (!smp_started || cold || panicstr)
 1095                 return;
 1096 
 1097         map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
 1098         if (map != 0)
 1099                 ipi_selected(map, IPI_HARDCLOCK);
 1100 }
 1101 
 1102 /*
 1103  * send an IPI to a set of cpus.
 1104  */
 1105 void
 1106 ipi_selected(u_int32_t cpus, u_int ipi)
 1107 {
 1108         int cpu;
 1109 
 1110         CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
 1111         while ((cpu = ffs(cpus)) != 0) {
 1112                 cpu--;
 1113                 KASSERT(cpu_apic_ids[cpu] != -1,
 1114                     ("IPI to non-existent CPU %d", cpu));
 1115                 lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
 1116                 cpus &= ~(1 << cpu);
 1117         }
 1118 }
 1119 
 1120 /*
 1121  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
 1122  */
 1123 void
 1124 ipi_all(u_int ipi)
 1125 {
 1126 
 1127         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1128         lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
 1129 }
 1130 
 1131 /*
 1132  * send an IPI to all CPUs EXCEPT myself
 1133  */
 1134 void
 1135 ipi_all_but_self(u_int ipi)
 1136 {
 1137 
 1138         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1139         lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 1140 }
 1141 
 1142 /*
 1143  * send an IPI to myself
 1144  */
 1145 void
 1146 ipi_self(u_int ipi)
 1147 {
 1148 
 1149         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 1150         lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
 1151 }
 1152 
 1153 /*
 1154  * This is called once the rest of the system is up and running and we're
 1155  * ready to let the AP's out of the pen.
 1156  */
 1157 static void
 1158 release_aps(void *dummy __unused)
 1159 {
 1160 
 1161         if (mp_ncpus == 1) 
 1162                 return;
 1163         mtx_lock_spin(&sched_lock);
 1164         atomic_store_rel_int(&aps_ready, 1);
 1165         while (smp_started == 0)
 1166                 ia32_pause();
 1167         mtx_unlock_spin(&sched_lock);
 1168 }
 1169 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 1170 
 1171 static int
 1172 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
 1173 {
 1174         u_int mask;
 1175         int error;
 1176 
 1177         mask = hlt_cpus_mask;
 1178         error = sysctl_handle_int(oidp, &mask, 0, req);
 1179         if (error || !req->newptr)
 1180                 return (error);
 1181 
 1182         if (logical_cpus_mask != 0 &&
 1183             (mask & logical_cpus_mask) == logical_cpus_mask)
 1184                 hlt_logical_cpus = 1;
 1185         else
 1186                 hlt_logical_cpus = 0;
 1187 
 1188         if ((mask & all_cpus) == all_cpus)
 1189                 mask &= ~(1<<0);
 1190         hlt_cpus_mask = mask;
 1191         return (error);
 1192 }
 1193 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
 1194     0, 0, sysctl_hlt_cpus, "IU", "");
 1195 
 1196 static int
 1197 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 1198 {
 1199         int disable, error;
 1200 
 1201         disable = hlt_logical_cpus;
 1202         error = sysctl_handle_int(oidp, &disable, 0, req);
 1203         if (error || !req->newptr)
 1204                 return (error);
 1205 
 1206         if (disable)
 1207                 hlt_cpus_mask |= logical_cpus_mask;
 1208         else
 1209                 hlt_cpus_mask &= ~logical_cpus_mask;
 1210 
 1211         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 1212                 hlt_cpus_mask &= ~(1<<0);
 1213 
 1214         hlt_logical_cpus = disable;
 1215         return (error);
 1216 }
 1217 
 1218 static void
 1219 cpu_hlt_setup(void *dummy __unused)
 1220 {
 1221 
 1222         if (logical_cpus_mask != 0) {
 1223                 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 1224                     &hlt_logical_cpus);
 1225                 sysctl_ctx_init(&logical_cpu_clist);
 1226                 SYSCTL_ADD_PROC(&logical_cpu_clist,
 1227                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1228                     "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 1229                     sysctl_hlt_logical_cpus, "IU", "");
 1230                 SYSCTL_ADD_UINT(&logical_cpu_clist,
 1231                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 1232                     "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
 1233                     &logical_cpus_mask, 0, "");
 1234 
 1235                 if (hlt_logical_cpus)
 1236                         hlt_cpus_mask |= logical_cpus_mask;
 1237         }
 1238 }
 1239 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 1240 
 1241 int
 1242 mp_grab_cpu_hlt(void)
 1243 {
 1244         u_int mask = PCPU_GET(cpumask);
 1245         int retval;
 1246 
 1247         retval = mask & hlt_cpus_mask;
 1248         while (mask & hlt_cpus_mask)
 1249                 __asm __volatile("sti; hlt" : : : "memory");
 1250         return (retval);
 1251 }

Cache object: deb471d9b2e149004458ed4a9d50045b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.