The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/mp_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1996, by Steve Passe
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. The name of the developer may NOT be used to endorse or promote products
   11  *    derived from this software without specific prior written permission.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  *
   25  * $FreeBSD: releng/5.1/sys/i386/i386/mp_machdep.c 115016 2003-05-15 05:12:24Z alc $
   26  */
   27 
   28 #include "opt_cpu.h"
   29 #include "opt_kstack_pages.h"
   30 #include "opt_swtch.h"
   31 
   32 #ifdef SMP
   33 #include <machine/smptests.h>
   34 #else
   35 #error
   36 #endif
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/bus.h>
   41 #include <sys/cons.h>   /* cngetc() */
   42 #ifdef GPROF 
   43 #include <sys/gmon.h>
   44 #endif
   45 #include <sys/kernel.h>
   46 #include <sys/ktr.h>
   47 #include <sys/lock.h>
   48 #include <sys/malloc.h>
   49 #include <sys/memrange.h>
   50 #include <sys/mutex.h>
   51 #include <sys/pcpu.h>
   52 #include <sys/proc.h>
   53 #include <sys/smp.h>
   54 #include <sys/sysctl.h>
   55 #include <sys/user.h>
   56 
   57 #include <vm/vm.h>
   58 #include <vm/vm_param.h>
   59 #include <vm/pmap.h>
   60 #include <vm/vm_kern.h>
   61 #include <vm/vm_extern.h>
   62 #include <vm/vm_map.h>
   63 
   64 #include <machine/apic.h>
   65 #include <machine/atomic.h>
   66 #include <machine/clock.h>
   67 #include <machine/cpu.h>
   68 #include <machine/cpufunc.h>
   69 #include <machine/mpapic.h>
   70 #include <machine/psl.h>
   71 #include <machine/segments.h>
   72 #include <machine/smp.h>
   73 #include <machine/smptests.h>   /** TEST_DEFAULT_CONFIG, TEST_TEST1 */
   74 #include <machine/tss.h>
   75 #include <machine/specialreg.h>
   76 #include <machine/privatespace.h>
   77 
   78 #if defined(APIC_IO)
   79 #include <machine/md_var.h>             /* setidt() */
   80 #include <i386/isa/icu.h>               /* IPIs */
   81 #include <i386/isa/intr_machdep.h>      /* IPIs */
   82 #endif  /* APIC_IO */
   83 
   84 #if defined(TEST_DEFAULT_CONFIG)
   85 #define MPFPS_MPFB1     TEST_DEFAULT_CONFIG
   86 #else
   87 #define MPFPS_MPFB1     mpfps->mpfb1
   88 #endif  /* TEST_DEFAULT_CONFIG */
   89 
   90 #define WARMBOOT_TARGET         0
   91 #define WARMBOOT_OFF            (KERNBASE + 0x0467)
   92 #define WARMBOOT_SEG            (KERNBASE + 0x0469)
   93 
   94 #ifdef PC98
   95 #define BIOS_BASE               (0xe8000)
   96 #define BIOS_SIZE               (0x18000)
   97 #else
   98 #define BIOS_BASE               (0xf0000)
   99 #define BIOS_SIZE               (0x10000)
  100 #endif
  101 #define BIOS_COUNT              (BIOS_SIZE/4)
  102 
  103 #define CMOS_REG                (0x70)
  104 #define CMOS_DATA               (0x71)
  105 #define BIOS_RESET              (0x0f)
  106 #define BIOS_WARM               (0x0a)
  107 
  108 #define PROCENTRY_FLAG_EN       0x01
  109 #define PROCENTRY_FLAG_BP       0x02
  110 #define IOAPICENTRY_FLAG_EN     0x01
  111 
  112 
  113 /* MP Floating Pointer Structure */
  114 typedef struct MPFPS {
  115         char    signature[4];
  116         void   *pap;
  117         u_char  length;
  118         u_char  spec_rev;
  119         u_char  checksum;
  120         u_char  mpfb1;
  121         u_char  mpfb2;
  122         u_char  mpfb3;
  123         u_char  mpfb4;
  124         u_char  mpfb5;
  125 }      *mpfps_t;
  126 
  127 /* MP Configuration Table Header */
  128 typedef struct MPCTH {
  129         char    signature[4];
  130         u_short base_table_length;
  131         u_char  spec_rev;
  132         u_char  checksum;
  133         u_char  oem_id[8];
  134         u_char  product_id[12];
  135         void   *oem_table_pointer;
  136         u_short oem_table_size;
  137         u_short entry_count;
  138         void   *apic_address;
  139         u_short extended_table_length;
  140         u_char  extended_table_checksum;
  141         u_char  reserved;
  142 }      *mpcth_t;
  143 
  144 
  145 typedef struct PROCENTRY {
  146         u_char  type;
  147         u_char  apic_id;
  148         u_char  apic_version;
  149         u_char  cpu_flags;
  150         u_long  cpu_signature;
  151         u_long  feature_flags;
  152         u_long  reserved1;
  153         u_long  reserved2;
  154 }      *proc_entry_ptr;
  155 
  156 typedef struct BUSENTRY {
  157         u_char  type;
  158         u_char  bus_id;
  159         char    bus_type[6];
  160 }      *bus_entry_ptr;
  161 
  162 typedef struct IOAPICENTRY {
  163         u_char  type;
  164         u_char  apic_id;
  165         u_char  apic_version;
  166         u_char  apic_flags;
  167         void   *apic_address;
  168 }      *io_apic_entry_ptr;
  169 
  170 typedef struct INTENTRY {
  171         u_char  type;
  172         u_char  int_type;
  173         u_short int_flags;
  174         u_char  src_bus_id;
  175         u_char  src_bus_irq;
  176         u_char  dst_apic_id;
  177         u_char  dst_apic_int;
  178 }      *int_entry_ptr;
  179 
  180 /* descriptions of MP basetable entries */
  181 typedef struct BASETABLE_ENTRY {
  182         u_char  type;
  183         u_char  length;
  184         char    name[16];
  185 }       basetable_entry;
  186 
  187 /*
  188  * this code MUST be enabled here and in mpboot.s.
  189  * it follows the very early stages of AP boot by placing values in CMOS ram.
  190  * it NORMALLY will never be needed and thus the primitive method for enabling.
  191  *
  192 #define CHECK_POINTS
  193  */
  194 
  195 #if defined(CHECK_POINTS) && !defined(PC98)
  196 #define CHECK_READ(A)    (outb(CMOS_REG, (A)), inb(CMOS_DATA))
  197 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
  198 
  199 #define CHECK_INIT(D);                          \
  200         CHECK_WRITE(0x34, (D));                 \
  201         CHECK_WRITE(0x35, (D));                 \
  202         CHECK_WRITE(0x36, (D));                 \
  203         CHECK_WRITE(0x37, (D));                 \
  204         CHECK_WRITE(0x38, (D));                 \
  205         CHECK_WRITE(0x39, (D));
  206 
  207 #define CHECK_PRINT(S);                         \
  208         printf("%s: %d, %d, %d, %d, %d, %d\n",  \
  209            (S),                                 \
  210            CHECK_READ(0x34),                    \
  211            CHECK_READ(0x35),                    \
  212            CHECK_READ(0x36),                    \
  213            CHECK_READ(0x37),                    \
  214            CHECK_READ(0x38),                    \
  215            CHECK_READ(0x39));
  216 
  217 #else                           /* CHECK_POINTS */
  218 
  219 #define CHECK_INIT(D)
  220 #define CHECK_PRINT(S)
  221 
  222 #endif                          /* CHECK_POINTS */
  223 
  224 /*
  225  * Values to send to the POST hardware.
  226  */
  227 #define MP_BOOTADDRESS_POST     0x10
  228 #define MP_PROBE_POST           0x11
  229 #define MPTABLE_PASS1_POST      0x12
  230 
  231 #define MP_START_POST           0x13
  232 #define MP_ENABLE_POST          0x14
  233 #define MPTABLE_PASS2_POST      0x15
  234 
  235 #define START_ALL_APS_POST      0x16
  236 #define INSTALL_AP_TRAMP_POST   0x17
  237 #define START_AP_POST           0x18
  238 
  239 #define MP_ANNOUNCE_POST        0x19
  240 
  241 static int need_hyperthreading_fixup;
  242 static u_int logical_cpus;
  243 static u_int logical_cpus_mask;
  244 
  245 /* used to hold the AP's until we are ready to release them */
  246 static struct mtx ap_boot_mtx;
  247 
  248 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
  249 int     current_postcode;
  250 
  251 /** XXX FIXME: what system files declare these??? */
  252 extern struct region_descriptor r_gdt, r_idt;
  253 
  254 int     bsp_apic_ready = 0;     /* flags useability of BSP apic */
  255 int     mp_naps;                /* # of Applications processors */
  256 int     mp_nbusses;             /* # of busses */
  257 int     mp_napics;              /* # of IO APICs */
  258 int     boot_cpu_id;            /* designated BSP */
  259 vm_offset_t cpu_apic_address;
  260 vm_offset_t io_apic_address[NAPICID];   /* NAPICID is more than enough */
  261 extern  int nkpt;
  262 
  263 u_int32_t cpu_apic_versions[MAXCPU];
  264 u_int32_t *io_apic_versions;
  265 
  266 #ifdef APIC_INTR_REORDER
  267 struct {
  268         volatile int *location;
  269         int bit;
  270 } apic_isrbit_location[32];
  271 #endif
  272 
  273 struct apic_intmapinfo  int_to_apicintpin[APIC_INTMAPSIZE];
  274 
  275 /*
  276  * APIC ID logical/physical mapping structures.
  277  * We oversize these to simplify boot-time config.
  278  */
  279 int     cpu_num_to_apic_id[NAPICID];
  280 int     io_num_to_apic_id[NAPICID];
  281 int     apic_id_to_logical[NAPICID];
  282 
  283 
  284 /* AP uses this during bootstrap.  Do not staticize.  */
  285 char *bootSTK;
  286 static int bootAP;
  287 
  288 /* Hotwire a 0->4MB V==P mapping */
  289 extern pt_entry_t *KPTphys;
  290 
  291 /* SMP page table page */
  292 extern pt_entry_t *SMPpt;
  293 
  294 struct pcb stoppcbs[MAXCPU];
  295 
  296 #ifdef APIC_IO
  297 /* Variables needed for SMP tlb shootdown. */
  298 vm_offset_t smp_tlb_addr1;
  299 vm_offset_t smp_tlb_addr2;
  300 volatile int smp_tlb_wait;
  301 static struct mtx smp_tlb_mtx;
  302 #endif
  303 
  304 /*
  305  * Local data and functions.
  306  */
  307 
  308 /* Set to 1 once we're ready to let the APs out of the pen. */
  309 static volatile int aps_ready = 0;
  310 
  311 static int      mp_capable;
  312 static u_int    boot_address;
  313 static u_int    base_memory;
  314 
  315 static int      picmode;                /* 0: virtual wire mode, 1: PIC mode */
  316 static mpfps_t  mpfps;
  317 static int      search_for_sig(u_int32_t target, int count);
  318 static void     mp_enable(u_int boot_addr);
  319 
  320 static void     mptable_hyperthread_fixup(u_int id_mask);
  321 static void     mptable_pass1(void);
  322 static int      mptable_pass2(void);
  323 static void     default_mp_table(int type);
  324 static void     fix_mp_table(void);
  325 static void     setup_apic_irq_mapping(void);
  326 static void     init_locks(void);
  327 static int      start_all_aps(u_int boot_addr);
  328 static void     install_ap_tramp(u_int boot_addr);
  329 static int      start_ap(int logicalCpu, u_int boot_addr);
  330 void            ap_init(void);
  331 static int      apic_int_is_bus_type(int intr, int bus_type);
  332 static void     release_aps(void *dummy);
  333 
  334 /*
  335  * initialize all the SMP locks
  336  */
  337 
  338 /* lock region used by kernel profiling */
  339 int     mcount_lock;
  340 
  341 #ifdef USE_COMLOCK
  342 /* locks com (tty) data/hardware accesses: a FASTINTR() */
  343 struct mtx              com_mtx;
  344 #endif /* USE_COMLOCK */
  345 
  346 static void
  347 init_locks(void)
  348 {
  349 
  350 #ifdef USE_COMLOCK
  351         mtx_init(&com_mtx, "com", NULL, MTX_SPIN);
  352 #endif /* USE_COMLOCK */
  353 #ifdef APIC_IO
  354         mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
  355 #endif
  356 }
  357 
  358 /*
  359  * Calculate usable address in base memory for AP trampoline code.
  360  */
  361 u_int
  362 mp_bootaddress(u_int basemem)
  363 {
  364         POSTCODE(MP_BOOTADDRESS_POST);
  365 
  366         base_memory = basemem * 1024;   /* convert to bytes */
  367 
  368         boot_address = base_memory & ~0xfff;    /* round down to 4k boundary */
  369         if ((base_memory - boot_address) < bootMP_size)
  370                 boot_address -= 4096;   /* not enough, lower by 4k */
  371 
  372         return boot_address;
  373 }
  374 
  375 
  376 /*
  377  * Look for an Intel MP spec table (ie, SMP capable hardware).
  378  */
  379 void
  380 i386_mp_probe(void)
  381 {
  382         int     x;
  383         u_long  segment;
  384         u_int32_t target;
  385 
  386         POSTCODE(MP_PROBE_POST);
  387 
  388         /* see if EBDA exists */
  389         if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
  390                 /* search first 1K of EBDA */
  391                 target = (u_int32_t) (segment << 4);
  392                 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
  393                         goto found;
  394         } else {
  395                 /* last 1K of base memory, effective 'top of base' passed in */
  396                 target = (u_int32_t) (base_memory - 0x400);
  397                 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
  398                         goto found;
  399         }
  400 
  401         /* search the BIOS */
  402         target = (u_int32_t) BIOS_BASE;
  403         if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
  404                 goto found;
  405 
  406         /* nothing found */
  407         mpfps = (mpfps_t)0;
  408         mp_capable = 0;
  409         return;
  410 
  411 found:
  412         /* calculate needed resources */
  413         mpfps = (mpfps_t)x;
  414         mptable_pass1();
  415 
  416         /* flag fact that we are running multiple processors */
  417         mp_capable = 1;
  418 }
  419 
  420 int
  421 cpu_mp_probe(void)
  422 {
  423         /*
  424          * Record BSP in CPU map
  425          * This is done here so that MBUF init code works correctly.
  426          */
  427         all_cpus = 1;
  428 
  429         return (mp_capable);
  430 }
  431 
  432 /*
  433  * Initialize the SMP hardware and the APIC and start up the AP's.
  434  */
  435 void
  436 cpu_mp_start(void)
  437 {
  438         POSTCODE(MP_START_POST);
  439 
  440         /* look for MP capable motherboard */
  441         if (mp_capable)
  442                 mp_enable(boot_address);
  443         else
  444                 panic("MP hardware not found!");
  445 
  446         cpu_setregs();
  447 }
  448 
  449 
  450 /*
  451  * Print various information about the SMP system hardware and setup.
  452  */
  453 void
  454 cpu_mp_announce(void)
  455 {
  456         int     x;
  457 
  458         POSTCODE(MP_ANNOUNCE_POST);
  459 
  460         printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
  461         printf(", version: 0x%08x", cpu_apic_versions[0]);
  462         printf(", at 0x%08x\n", cpu_apic_address);
  463         for (x = 1; x <= mp_naps; ++x) {
  464                 printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
  465                 printf(", version: 0x%08x", cpu_apic_versions[x]);
  466                 printf(", at 0x%08x\n", cpu_apic_address);
  467         }
  468 
  469 #if defined(APIC_IO)
  470         for (x = 0; x < mp_napics; ++x) {
  471                 printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
  472                 printf(", version: 0x%08x", io_apic_versions[x]);
  473                 printf(", at 0x%08x\n", io_apic_address[x]);
  474         }
  475 #else
  476         printf(" Warning: APIC I/O disabled\n");
  477 #endif  /* APIC_IO */
  478 }
  479 
  480 /*
  481  * AP cpu's call this to sync up protected mode.
  482  */
  483 void
  484 init_secondary(void)
  485 {
  486         int     gsel_tss;
  487         int     x, myid = bootAP;
  488         u_int   cr0;
  489 
  490         gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
  491         gdt_segs[GPROC0_SEL].ssd_base =
  492                 (int) &SMP_prvspace[myid].pcpu.pc_common_tss;
  493         SMP_prvspace[myid].pcpu.pc_prvspace =
  494                 &SMP_prvspace[myid].pcpu;
  495 
  496         for (x = 0; x < NGDT; x++) {
  497                 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
  498         }
  499 
  500         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
  501         r_gdt.rd_base = (int) &gdt[myid * NGDT];
  502         lgdt(&r_gdt);                   /* does magic intra-segment return */
  503 
  504         lidt(&r_idt);
  505 
  506         lldt(_default_ldt);
  507         PCPU_SET(currentldt, _default_ldt);
  508 
  509         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
  510         gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
  511         PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
  512         PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
  513         PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
  514         PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
  515         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
  516         ltr(gsel_tss);
  517 
  518         /*
  519          * Set to a known state:
  520          * Set by mpboot.s: CR0_PG, CR0_PE
  521          * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
  522          */
  523         cr0 = rcr0();
  524         cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
  525         load_cr0(cr0);
  526 
  527         pmap_set_opt();
  528 }
  529 
  530 
  531 #if defined(APIC_IO)
  532 /*
  533  * Final configuration of the BSP's local APIC:
  534  *  - disable 'pic mode'.
  535  *  - disable 'virtual wire mode'.
  536  *  - enable NMI.
  537  */
  538 void
  539 bsp_apic_configure(void)
  540 {
  541         u_char          byte;
  542         u_int32_t       temp;
  543 
  544         /* leave 'pic mode' if necessary */
  545         if (picmode) {
  546                 outb(0x22, 0x70);       /* select IMCR */
  547                 byte = inb(0x23);       /* current contents */
  548                 byte |= 0x01;           /* mask external INTR */
  549                 outb(0x23, byte);       /* disconnect 8259s/NMI */
  550         }
  551 
  552         /* mask lint0 (the 8259 'virtual wire' connection) */
  553         temp = lapic.lvt_lint0;
  554         temp |= APIC_LVT_M;             /* set the mask */
  555         lapic.lvt_lint0 = temp;
  556 
  557         /* setup lint1 to handle NMI */
  558         temp = lapic.lvt_lint1;
  559         temp &= ~APIC_LVT_M;            /* clear the mask */
  560         lapic.lvt_lint1 = temp;
  561 
  562         if (bootverbose)
  563                 apic_dump("bsp_apic_configure()");
  564 }
  565 #endif  /* APIC_IO */
  566 
  567 
  568 /*******************************************************************
  569  * local functions and data
  570  */
  571 
  572 /*
  573  * start the SMP system
  574  */
  575 static void
  576 mp_enable(u_int boot_addr)
  577 {
  578         int     x;
  579 #if defined(APIC_IO)
  580         int     apic;
  581         u_int   ux;
  582 #endif  /* APIC_IO */
  583 
  584         POSTCODE(MP_ENABLE_POST);
  585 
  586         /* turn on 4MB of V == P addressing so we can get to MP table */
  587         *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
  588         invltlb();
  589 
  590         /* examine the MP table for needed info, uses physical addresses */
  591         x = mptable_pass2();
  592 
  593         *(int *)PTD = 0;
  594         invltlb();
  595 
  596         /* can't process default configs till the CPU APIC is pmapped */
  597         if (x)
  598                 default_mp_table(x);
  599 
  600         /* post scan cleanup */
  601         fix_mp_table();
  602         setup_apic_irq_mapping();
  603 
  604 #if defined(APIC_IO)
  605 
  606         /* fill the LOGICAL io_apic_versions table */
  607         for (apic = 0; apic < mp_napics; ++apic) {
  608                 ux = io_apic_read(apic, IOAPIC_VER);
  609                 io_apic_versions[apic] = ux;
  610                 io_apic_set_id(apic, IO_TO_ID(apic));
  611         }
  612 
  613         /* program each IO APIC in the system */
  614         for (apic = 0; apic < mp_napics; ++apic)
  615                 if (io_apic_setup(apic) < 0)
  616                         panic("IO APIC setup failure");
  617 
  618         /* install a 'Spurious INTerrupt' vector */
  619         setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
  620                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  621 
  622         /* install an inter-CPU IPI for TLB invalidation */
  623         setidt(XINVLTLB_OFFSET, Xinvltlb,
  624                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  625         setidt(XINVLPG_OFFSET, Xinvlpg,
  626                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  627         setidt(XINVLRNG_OFFSET, Xinvlrng,
  628                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  629 
  630         /* install an inter-CPU IPI for forwarding hardclock() */
  631         setidt(XHARDCLOCK_OFFSET, Xhardclock,
  632                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  633         
  634         /* install an inter-CPU IPI for forwarding statclock() */
  635         setidt(XSTATCLOCK_OFFSET, Xstatclock,
  636                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  637         
  638 #ifdef LAZY_SWITCH
  639         /* install an inter-CPU IPI for lazy pmap release */
  640         setidt(XLAZYPMAP_OFFSET, Xlazypmap,
  641                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  642 #endif
  643 
  644         /* install an inter-CPU IPI for all-CPU rendezvous */
  645         setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
  646                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  647 
  648         /* install an inter-CPU IPI for forcing an additional software trap */
  649         setidt(XCPUAST_OFFSET, Xcpuast,
  650                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  651 
  652         /* install an inter-CPU IPI for CPU stop/restart */
  653         setidt(XCPUSTOP_OFFSET, Xcpustop,
  654                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  655 
  656 #if defined(TEST_TEST1)
  657         /* install a "fake hardware INTerrupt" vector */
  658         setidt(XTEST1_OFFSET, Xtest1,
  659                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  660 #endif  /** TEST_TEST1 */
  661 
  662 #endif  /* APIC_IO */
  663 
  664         /* initialize all SMP locks */
  665         init_locks();
  666 
  667         /* start each Application Processor */
  668         start_all_aps(boot_addr);
  669 }
  670 
  671 
  672 /*
  673  * look for the MP spec signature
  674  */
  675 
  676 /* string defined by the Intel MP Spec as identifying the MP table */
  677 #define MP_SIG          0x5f504d5f      /* _MP_ */
  678 #define NEXT(X)         ((X) += 4)
  679 static int
  680 search_for_sig(u_int32_t target, int count)
  681 {
  682         int     x;
  683         u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
  684 
  685         for (x = 0; x < count; NEXT(x))
  686                 if (addr[x] == MP_SIG)
  687                         /* make array index a byte index */
  688                         return (target + (x * sizeof(u_int32_t)));
  689 
  690         return -1;
  691 }
  692 
  693 
  694 static basetable_entry basetable_entry_types[] =
  695 {
  696         {0, 20, "Processor"},
  697         {1, 8, "Bus"},
  698         {2, 8, "I/O APIC"},
  699         {3, 8, "I/O INT"},
  700         {4, 8, "Local INT"}
  701 };
  702 
  703 typedef struct BUSDATA {
  704         u_char  bus_id;
  705         enum busTypes bus_type;
  706 }       bus_datum;
  707 
  708 typedef struct INTDATA {
  709         u_char  int_type;
  710         u_short int_flags;
  711         u_char  src_bus_id;
  712         u_char  src_bus_irq;
  713         u_char  dst_apic_id;
  714         u_char  dst_apic_int;
  715         u_char  int_vector;
  716 }       io_int, local_int;
  717 
  718 typedef struct BUSTYPENAME {
  719         u_char  type;
  720         char    name[7];
  721 }       bus_type_name;
  722 
  723 static bus_type_name bus_type_table[] =
  724 {
  725         {CBUS, "CBUS"},
  726         {CBUSII, "CBUSII"},
  727         {EISA, "EISA"},
  728         {MCA, "MCA"},
  729         {UNKNOWN_BUSTYPE, "---"},
  730         {ISA, "ISA"},
  731         {MCA, "MCA"},
  732         {UNKNOWN_BUSTYPE, "---"},
  733         {UNKNOWN_BUSTYPE, "---"},
  734         {UNKNOWN_BUSTYPE, "---"},
  735         {UNKNOWN_BUSTYPE, "---"},
  736         {UNKNOWN_BUSTYPE, "---"},
  737         {PCI, "PCI"},
  738         {UNKNOWN_BUSTYPE, "---"},
  739         {UNKNOWN_BUSTYPE, "---"},
  740         {UNKNOWN_BUSTYPE, "---"},
  741         {UNKNOWN_BUSTYPE, "---"},
  742         {XPRESS, "XPRESS"},
  743         {UNKNOWN_BUSTYPE, "---"}
  744 };
  745 /* from MP spec v1.4, table 5-1 */
  746 static int default_data[7][5] =
  747 {
  748 /*   nbus, id0, type0, id1, type1 */
  749         {1, 0, ISA, 255, 255},
  750         {1, 0, EISA, 255, 255},
  751         {1, 0, EISA, 255, 255},
  752         {1, 0, MCA, 255, 255},
  753         {2, 0, ISA, 1, PCI},
  754         {2, 0, EISA, 1, PCI},
  755         {2, 0, MCA, 1, PCI}
  756 };
  757 
  758 
  759 /* the bus data */
  760 static bus_datum *bus_data;
  761 
  762 /* the IO INT data, one entry per possible APIC INTerrupt */
  763 static io_int  *io_apic_ints;
  764 
  765 static int nintrs;
  766 
  767 static int processor_entry(proc_entry_ptr entry, int cpu);
  768 static int bus_entry(bus_entry_ptr entry, int bus);
  769 static int io_apic_entry(io_apic_entry_ptr entry, int apic);
  770 static int int_entry(int_entry_ptr entry, int intr);
  771 static int lookup_bus_type(char *name);
  772 
  773 
  774 /*
  775  * 1st pass on motherboard's Intel MP specification table.
  776  *
  777  * initializes:
  778  *      mp_ncpus = 1
  779  *
  780  * determines:
  781  *      cpu_apic_address (common to all CPUs)
  782  *      io_apic_address[N]
  783  *      mp_naps
  784  *      mp_nbusses
  785  *      mp_napics
  786  *      nintrs
  787  */
  788 static void
  789 mptable_pass1(void)
  790 {
  791         int     x;
  792         mpcth_t cth;
  793         int     totalSize;
  794         void*   position;
  795         int     count;
  796         int     type;
  797         u_int   id_mask;
  798 
  799         POSTCODE(MPTABLE_PASS1_POST);
  800 
  801         /* clear various tables */
  802         for (x = 0; x < NAPICID; ++x) {
  803                 io_apic_address[x] = ~0;        /* IO APIC address table */
  804         }
  805 
  806         /* init everything to empty */
  807         mp_naps = 0;
  808         mp_nbusses = 0;
  809         mp_napics = 0;
  810         nintrs = 0;
  811         id_mask = 0;
  812 
  813         /* check for use of 'default' configuration */
  814         if (MPFPS_MPFB1 != 0) {
  815                 /* use default addresses */
  816                 cpu_apic_address = DEFAULT_APIC_BASE;
  817                 io_apic_address[0] = DEFAULT_IO_APIC_BASE;
  818 
  819                 /* fill in with defaults */
  820                 mp_naps = 2;            /* includes BSP */
  821                 mp_maxid = 1;
  822                 mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
  823 #if defined(APIC_IO)
  824                 mp_napics = 1;
  825                 nintrs = 16;
  826 #endif  /* APIC_IO */
  827         }
  828         else {
  829                 if ((cth = mpfps->pap) == 0)
  830                         panic("MP Configuration Table Header MISSING!");
  831 
  832                 cpu_apic_address = (vm_offset_t) cth->apic_address;
  833 
  834                 /* walk the table, recording info of interest */
  835                 totalSize = cth->base_table_length - sizeof(struct MPCTH);
  836                 position = (u_char *) cth + sizeof(struct MPCTH);
  837                 count = cth->entry_count;
  838 
  839                 while (count--) {
  840                         switch (type = *(u_char *) position) {
  841                         case 0: /* processor_entry */
  842                                 if (((proc_entry_ptr)position)->cpu_flags
  843                                     & PROCENTRY_FLAG_EN) {
  844                                         ++mp_naps;
  845                                         mp_maxid++;
  846                                         id_mask |= 1 <<
  847                                             ((proc_entry_ptr)position)->apic_id;
  848                                 }
  849                                 break;
  850                         case 1: /* bus_entry */
  851                                 ++mp_nbusses;
  852                                 break;
  853                         case 2: /* io_apic_entry */
  854                                 if (((io_apic_entry_ptr)position)->apic_flags
  855                                         & IOAPICENTRY_FLAG_EN)
  856                                         io_apic_address[mp_napics++] =
  857                                             (vm_offset_t)((io_apic_entry_ptr)
  858                                                 position)->apic_address;
  859                                 break;
  860                         case 3: /* int_entry */
  861                                 ++nintrs;
  862                                 break;
  863                         case 4: /* int_entry */
  864                                 break;
  865                         default:
  866                                 panic("mpfps Base Table HOSED!");
  867                                 /* NOTREACHED */
  868                         }
  869 
  870                         totalSize -= basetable_entry_types[type].length;
  871                         (u_char*)position += basetable_entry_types[type].length;
  872                 }
  873         }
  874 
  875         /* qualify the numbers */
  876         if (mp_naps > MAXCPU) {
  877                 printf("Warning: only using %d of %d available CPUs!\n",
  878                         MAXCPU, mp_naps);
  879                 mp_naps = MAXCPU;
  880         }
  881 
  882         /* See if we need to fixup HT logical CPUs. */
  883         mptable_hyperthread_fixup(id_mask);
  884         
  885         /*
  886          * Count the BSP.
  887          * This is also used as a counter while starting the APs.
  888          */
  889         mp_ncpus = 1;
  890 
  891         --mp_naps;      /* subtract the BSP */
  892 }
  893 
  894 
  895 /*
  896  * 2nd pass on motherboard's Intel MP specification table.
  897  *
  898  * sets:
  899  *      boot_cpu_id
  900  *      ID_TO_IO(N), phy APIC ID to log CPU/IO table
  901  *      CPU_TO_ID(N), logical CPU to APIC ID table
  902  *      IO_TO_ID(N), logical IO to APIC ID table
  903  *      bus_data[N]
  904  *      io_apic_ints[N]
  905  */
  906 static int
  907 mptable_pass2(void)
  908 {
  909         struct PROCENTRY proc;
  910         int     x;
  911         mpcth_t cth;
  912         int     totalSize;
  913         void*   position;
  914         int     count;
  915         int     type;
  916         int     apic, bus, cpu, intr;
  917         int     i, j;
  918         int     pgeflag;
  919 
  920         POSTCODE(MPTABLE_PASS2_POST);
  921 
  922         /* Initialize fake proc entry for use with HT fixup. */
  923         bzero(&proc, sizeof(proc));
  924         proc.type = 0;
  925         proc.cpu_flags = PROCENTRY_FLAG_EN;
  926 
  927         pgeflag = 0;            /* XXX - Not used under SMP yet.  */
  928 
  929         MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
  930             M_DEVBUF, M_WAITOK);
  931         MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
  932             M_DEVBUF, M_WAITOK);
  933         MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
  934             M_DEVBUF, M_WAITOK);
  935         MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
  936             M_DEVBUF, M_WAITOK);
  937 
  938         bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
  939 
  940         for (i = 0; i < mp_napics; i++) {
  941                 for (j = 0; j < mp_napics; j++) {
  942                         /* same page frame as a previous IO apic? */
  943                         if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
  944                             (io_apic_address[i] & PG_FRAME)) {
  945                                 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
  946                                         + (NPTEPG-2-j) * PAGE_SIZE
  947                                         + (io_apic_address[i] & PAGE_MASK));
  948                                 break;
  949                         }
  950                         /* use this slot if available */
  951                         if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
  952                                 SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
  953                                     pgeflag | (io_apic_address[i] & PG_FRAME));
  954                                 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
  955                                         + (NPTEPG-2-j) * PAGE_SIZE
  956                                         + (io_apic_address[i] & PAGE_MASK));
  957                                 break;
  958                         }
  959                 }
  960         }
  961 
  962         /* clear various tables */
  963         for (x = 0; x < NAPICID; ++x) {
  964                 ID_TO_IO(x) = -1;       /* phy APIC ID to log CPU/IO table */
  965                 CPU_TO_ID(x) = -1;      /* logical CPU to APIC ID table */
  966                 IO_TO_ID(x) = -1;       /* logical IO to APIC ID table */
  967         }
  968 
  969         /* clear bus data table */
  970         for (x = 0; x < mp_nbusses; ++x)
  971                 bus_data[x].bus_id = 0xff;
  972 
  973         /* clear IO APIC INT table */
  974         for (x = 0; x < (nintrs + 1); ++x) {
  975                 io_apic_ints[x].int_type = 0xff;
  976                 io_apic_ints[x].int_vector = 0xff;
  977         }
  978 
  979         /* setup the cpu/apic mapping arrays */
  980         boot_cpu_id = -1;
  981 
  982         /* record whether PIC or virtual-wire mode */
  983         picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
  984 
  985         /* check for use of 'default' configuration */
  986         if (MPFPS_MPFB1 != 0)
  987                 return MPFPS_MPFB1;     /* return default configuration type */
  988 
  989         if ((cth = mpfps->pap) == 0)
  990                 panic("MP Configuration Table Header MISSING!");
  991 
  992         /* walk the table, recording info of interest */
  993         totalSize = cth->base_table_length - sizeof(struct MPCTH);
  994         position = (u_char *) cth + sizeof(struct MPCTH);
  995         count = cth->entry_count;
  996         apic = bus = intr = 0;
  997         cpu = 1;                                /* pre-count the BSP */
  998 
  999         while (count--) {
 1000                 switch (type = *(u_char *) position) {
 1001                 case 0:
 1002                         if (processor_entry(position, cpu)) {
 1003                                 if (logical_cpus != 0 &&
 1004                                     cpu % logical_cpus != 0)
 1005                                         logical_cpus_mask |= (1 << cpu);
 1006                                 ++cpu;
 1007                         }
 1008                         if (need_hyperthreading_fixup) {
 1009                                 /*
 1010                                  * Create fake mptable processor entries
 1011                                  * and feed them to processor_entry() to
 1012                                  * enumerate the logical CPUs.
 1013                                  */
 1014                                 proc.apic_id = ((proc_entry_ptr)position)->apic_id;
 1015                                 for (i = 1; i < logical_cpus; i++) {
 1016                                         proc.apic_id++;
 1017                                         (void)processor_entry(&proc, cpu);
 1018                                         logical_cpus_mask |= (1 << cpu);
 1019                                         cpu++;
 1020                                 }
 1021                         }
 1022                         break;
 1023                 case 1:
 1024                         if (bus_entry(position, bus))
 1025                                 ++bus;
 1026                         break;
 1027                 case 2:
 1028                         if (io_apic_entry(position, apic))
 1029                                 ++apic;
 1030                         break;
 1031                 case 3:
 1032                         if (int_entry(position, intr))
 1033                                 ++intr;
 1034                         break;
 1035                 case 4:
 1036                         /* int_entry(position); */
 1037                         break;
 1038                 default:
 1039                         panic("mpfps Base Table HOSED!");
 1040                         /* NOTREACHED */
 1041                 }
 1042 
 1043                 totalSize -= basetable_entry_types[type].length;
 1044                 (u_char *) position += basetable_entry_types[type].length;
 1045         }
 1046 
 1047         if (boot_cpu_id == -1)
 1048                 panic("NO BSP found!");
 1049 
 1050         /* report fact that its NOT a default configuration */
 1051         return 0;
 1052 }
 1053 
 1054 /*
 1055  * Check if we should perform a hyperthreading "fix-up" to
 1056  * enumerate any logical CPU's that aren't already listed
 1057  * in the table.
 1058  *
 1059  * XXX: We assume that all of the physical CPUs in the
 1060  * system have the same number of logical CPUs.
 1061  *
 1062  * XXX: We assume that APIC ID's are allocated such that
 1063  * the APIC ID's for a physical processor are aligned
 1064  * with the number of logical CPU's in the processor.
 1065  */
 1066 static void
 1067 mptable_hyperthread_fixup(u_int id_mask)
 1068 {
 1069         u_int i, id;
 1070 
 1071         /* Nothing to do if there is no HTT support. */
 1072         if ((cpu_feature & CPUID_HTT) == 0)
 1073                 return;
 1074         logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
 1075         if (logical_cpus <= 1)
 1076                 return;
 1077 
 1078         /*
 1079          * For each APIC ID of a CPU that is set in the mask,
 1080          * scan the other candidate APIC ID's for this
 1081          * physical processor.  If any of those ID's are
 1082          * already in the table, then kill the fixup.
 1083          */
 1084         for (id = 0; id <= MAXCPU; id++) {
 1085                 if ((id_mask & 1 << id) == 0)
 1086                         continue;
 1087                 /* First, make sure we are on a logical_cpus boundary. */
 1088                 if (id % logical_cpus != 0)
 1089                         return;
 1090                 for (i = id + 1; i < id + logical_cpus; i++)
 1091                         if ((id_mask & 1 << i) != 0)
 1092                                 return;
 1093         }
 1094 
 1095         /*
 1096          * Ok, the ID's checked out, so enable the fixup.  We have to fixup
 1097          * mp_naps and mp_maxid right now.
 1098          */
 1099         need_hyperthreading_fixup = 1;
 1100         mp_maxid *= logical_cpus;
 1101         mp_naps *= logical_cpus;
 1102 }
 1103 
 1104 void
 1105 assign_apic_irq(int apic, int intpin, int irq)
 1106 {
 1107         int x;
 1108         
 1109         if (int_to_apicintpin[irq].ioapic != -1)
 1110                 panic("assign_apic_irq: inconsistent table");
 1111         
 1112         int_to_apicintpin[irq].ioapic = apic;
 1113         int_to_apicintpin[irq].int_pin = intpin;
 1114         int_to_apicintpin[irq].apic_address = ioapic[apic];
 1115         int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
 1116         
 1117         for (x = 0; x < nintrs; x++) {
 1118                 if ((io_apic_ints[x].int_type == 0 || 
 1119                      io_apic_ints[x].int_type == 3) &&
 1120                     io_apic_ints[x].int_vector == 0xff &&
 1121                     io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
 1122                     io_apic_ints[x].dst_apic_int == intpin)
 1123                         io_apic_ints[x].int_vector = irq;
 1124         }
 1125 }
 1126 
 1127 void
 1128 revoke_apic_irq(int irq)
 1129 {
 1130         int x;
 1131         int oldapic;
 1132         int oldintpin;
 1133         
 1134         if (int_to_apicintpin[irq].ioapic == -1)
 1135                 panic("revoke_apic_irq: inconsistent table");
 1136         
 1137         oldapic = int_to_apicintpin[irq].ioapic;
 1138         oldintpin = int_to_apicintpin[irq].int_pin;
 1139 
 1140         int_to_apicintpin[irq].ioapic = -1;
 1141         int_to_apicintpin[irq].int_pin = 0;
 1142         int_to_apicintpin[irq].apic_address = NULL;
 1143         int_to_apicintpin[irq].redirindex = 0;
 1144         
 1145         for (x = 0; x < nintrs; x++) {
 1146                 if ((io_apic_ints[x].int_type == 0 || 
 1147                      io_apic_ints[x].int_type == 3) &&
 1148                     io_apic_ints[x].int_vector != 0xff &&
 1149                     io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
 1150                     io_apic_ints[x].dst_apic_int == oldintpin)
 1151                         io_apic_ints[x].int_vector = 0xff;
 1152         }
 1153 }
 1154 
 1155 
 1156 static void
 1157 allocate_apic_irq(int intr)
 1158 {
 1159         int apic;
 1160         int intpin;
 1161         int irq;
 1162         
 1163         if (io_apic_ints[intr].int_vector != 0xff)
 1164                 return;         /* Interrupt handler already assigned */
 1165         
 1166         if (io_apic_ints[intr].int_type != 0 &&
 1167             (io_apic_ints[intr].int_type != 3 ||
 1168              (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
 1169               io_apic_ints[intr].dst_apic_int == 0)))
 1170                 return;         /* Not INT or ExtInt on != (0, 0) */
 1171         
 1172         irq = 0;
 1173         while (irq < APIC_INTMAPSIZE &&
 1174                int_to_apicintpin[irq].ioapic != -1)
 1175                 irq++;
 1176         
 1177         if (irq >= APIC_INTMAPSIZE)
 1178                 return;         /* No free interrupt handlers */
 1179         
 1180         apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
 1181         intpin = io_apic_ints[intr].dst_apic_int;
 1182         
 1183         assign_apic_irq(apic, intpin, irq);
 1184         io_apic_setup_intpin(apic, intpin);
 1185 }
 1186 
 1187 
 1188 static void
 1189 swap_apic_id(int apic, int oldid, int newid)
 1190 {
 1191         int x;
 1192         int oapic;
 1193         
 1194 
 1195         if (oldid == newid)
 1196                 return;                 /* Nothing to do */
 1197         
 1198         printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
 1199                apic, oldid, newid);
 1200         
 1201         /* Swap physical APIC IDs in interrupt entries */
 1202         for (x = 0; x < nintrs; x++) {
 1203                 if (io_apic_ints[x].dst_apic_id == oldid)
 1204                         io_apic_ints[x].dst_apic_id = newid;
 1205                 else if (io_apic_ints[x].dst_apic_id == newid)
 1206                         io_apic_ints[x].dst_apic_id = oldid;
 1207         }
 1208         
 1209         /* Swap physical APIC IDs in IO_TO_ID mappings */
 1210         for (oapic = 0; oapic < mp_napics; oapic++)
 1211                 if (IO_TO_ID(oapic) == newid)
 1212                         break;
 1213         
 1214         if (oapic < mp_napics) {
 1215                 printf("Changing APIC ID for IO APIC #%d from "
 1216                        "%d to %d in MP table\n",
 1217                        oapic, newid, oldid);
 1218                 IO_TO_ID(oapic) = oldid;
 1219         }
 1220         IO_TO_ID(apic) = newid;
 1221 }
 1222 
 1223 
 1224 static void
 1225 fix_id_to_io_mapping(void)
 1226 {
 1227         int x;
 1228 
 1229         for (x = 0; x < NAPICID; x++)
 1230                 ID_TO_IO(x) = -1;
 1231         
 1232         for (x = 0; x <= mp_naps; x++)
 1233                 if (CPU_TO_ID(x) < NAPICID)
 1234                         ID_TO_IO(CPU_TO_ID(x)) = x;
 1235         
 1236         for (x = 0; x < mp_napics; x++)
 1237                 if (IO_TO_ID(x) < NAPICID)
 1238                         ID_TO_IO(IO_TO_ID(x)) = x;
 1239 }
 1240 
 1241 
 1242 static int
 1243 first_free_apic_id(void)
 1244 {
 1245         int freeid, x;
 1246         
 1247         for (freeid = 0; freeid < NAPICID; freeid++) {
 1248                 for (x = 0; x <= mp_naps; x++)
 1249                         if (CPU_TO_ID(x) == freeid)
 1250                                 break;
 1251                 if (x <= mp_naps)
 1252                         continue;
 1253                 for (x = 0; x < mp_napics; x++)
 1254                         if (IO_TO_ID(x) == freeid)
 1255                                 break;
 1256                 if (x < mp_napics)
 1257                         continue;
 1258                 return freeid;
 1259         }
 1260         return freeid;
 1261 }
 1262 
 1263 
 1264 static int
 1265 io_apic_id_acceptable(int apic, int id)
 1266 {
 1267         int cpu;                /* Logical CPU number */
 1268         int oapic;              /* Logical IO APIC number for other IO APIC */
 1269 
 1270         if (id >= NAPICID)
 1271                 return 0;       /* Out of range */
 1272         
 1273         for (cpu = 0; cpu <= mp_naps; cpu++)
 1274                 if (CPU_TO_ID(cpu) == id)
 1275                         return 0;       /* Conflict with CPU */
 1276         
 1277         for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
 1278                 if (IO_TO_ID(oapic) == id)
 1279                         return 0;       /* Conflict with other APIC */
 1280         
 1281         return 1;               /* ID is acceptable for IO APIC */
 1282 }
 1283 
 1284 
 1285 /*
 1286  * parse an Intel MP specification table
 1287  */
 1288 static void
 1289 fix_mp_table(void)
 1290 {
 1291         int     x;
 1292         int     id;
 1293         int     bus_0 = 0;      /* Stop GCC warning */
 1294         int     bus_pci = 0;    /* Stop GCC warning */
 1295         int     num_pci_bus;
 1296         int     apic;           /* IO APIC unit number */
 1297         int     freeid;         /* Free physical APIC ID */
 1298         int     physid;         /* Current physical IO APIC ID */
 1299 
 1300         /*
 1301          * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
 1302          * did it wrong.  The MP spec says that when more than 1 PCI bus
 1303          * exists the BIOS must begin with bus entries for the PCI bus and use
 1304          * actual PCI bus numbering.  This implies that when only 1 PCI bus
 1305          * exists the BIOS can choose to ignore this ordering, and indeed many
 1306          * MP motherboards do ignore it.  This causes a problem when the PCI
 1307          * sub-system makes requests of the MP sub-system based on PCI bus
 1308          * numbers.     So here we look for the situation and renumber the
 1309          * busses and associated INTs in an effort to "make it right".
 1310          */
 1311 
 1312         /* find bus 0, PCI bus, count the number of PCI busses */
 1313         for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
 1314                 if (bus_data[x].bus_id == 0) {
 1315                         bus_0 = x;
 1316                 }
 1317                 if (bus_data[x].bus_type == PCI) {
 1318                         ++num_pci_bus;
 1319                         bus_pci = x;
 1320                 }
 1321         }
 1322         /*
 1323          * bus_0 == slot of bus with ID of 0
 1324          * bus_pci == slot of last PCI bus encountered
 1325          */
 1326 
 1327         /* check the 1 PCI bus case for sanity */
 1328         /* if it is number 0 all is well */
 1329         if (num_pci_bus == 1 &&
 1330             bus_data[bus_pci].bus_id != 0) {
 1331                 
 1332                 /* mis-numbered, swap with whichever bus uses slot 0 */
 1333 
 1334                 /* swap the bus entry types */
 1335                 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
 1336                 bus_data[bus_0].bus_type = PCI;
 1337 
 1338                 /* swap each relavant INTerrupt entry */
 1339                 id = bus_data[bus_pci].bus_id;
 1340                 for (x = 0; x < nintrs; ++x) {
 1341                         if (io_apic_ints[x].src_bus_id == id) {
 1342                                 io_apic_ints[x].src_bus_id = 0;
 1343                         }
 1344                         else if (io_apic_ints[x].src_bus_id == 0) {
 1345                                 io_apic_ints[x].src_bus_id = id;
 1346                         }
 1347                 }
 1348         }
 1349 
 1350         /* Assign IO APIC IDs.
 1351          * 
 1352          * First try the existing ID. If a conflict is detected, try
 1353          * the ID in the MP table.  If a conflict is still detected, find
 1354          * a free id.
 1355          *
 1356          * We cannot use the ID_TO_IO table before all conflicts has been
 1357          * resolved and the table has been corrected.
 1358          */
 1359         for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
 1360                 
 1361                 /* First try to use the value set by the BIOS */
 1362                 physid = io_apic_get_id(apic);
 1363                 if (io_apic_id_acceptable(apic, physid)) {
 1364                         if (IO_TO_ID(apic) != physid)
 1365                                 swap_apic_id(apic, IO_TO_ID(apic), physid);
 1366                         continue;
 1367                 }
 1368 
 1369                 /* Then check if the value in the MP table is acceptable */
 1370                 if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
 1371                         continue;
 1372 
 1373                 /* Last resort, find a free APIC ID and use it */
 1374                 freeid = first_free_apic_id();
 1375                 if (freeid >= NAPICID)
 1376                         panic("No free physical APIC IDs found");
 1377                 
 1378                 if (io_apic_id_acceptable(apic, freeid)) {
 1379                         swap_apic_id(apic, IO_TO_ID(apic), freeid);
 1380                         continue;
 1381                 }
 1382                 panic("Free physical APIC ID not usable");
 1383         }
 1384         fix_id_to_io_mapping();
 1385 
 1386         /* detect and fix broken Compaq MP table */
 1387         if (apic_int_type(0, 0) == -1) {
 1388                 printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
 1389                 io_apic_ints[nintrs].int_type = 3;      /* ExtInt */
 1390                 io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */
 1391                 /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
 1392                 io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
 1393                 io_apic_ints[nintrs].dst_apic_int = 0;  /* Pin 0 */
 1394                 nintrs++;
 1395         }
 1396 }
 1397 
 1398 
 1399 /* Assign low level interrupt handlers */
 1400 static void
 1401 setup_apic_irq_mapping(void)
 1402 {
 1403         int     x;
 1404         int     int_vector;
 1405 
 1406         /* Clear array */
 1407         for (x = 0; x < APIC_INTMAPSIZE; x++) {
 1408                 int_to_apicintpin[x].ioapic = -1;
 1409                 int_to_apicintpin[x].int_pin = 0;
 1410                 int_to_apicintpin[x].apic_address = NULL;
 1411                 int_to_apicintpin[x].redirindex = 0;
 1412         }
 1413 
 1414         /* First assign ISA/EISA interrupts */
 1415         for (x = 0; x < nintrs; x++) {
 1416                 int_vector = io_apic_ints[x].src_bus_irq;
 1417                 if (int_vector < APIC_INTMAPSIZE &&
 1418                     io_apic_ints[x].int_vector == 0xff && 
 1419                     int_to_apicintpin[int_vector].ioapic == -1 &&
 1420                     (apic_int_is_bus_type(x, ISA) ||
 1421                      apic_int_is_bus_type(x, EISA)) &&
 1422                     io_apic_ints[x].int_type == 0) {
 1423                         assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), 
 1424                                         io_apic_ints[x].dst_apic_int,
 1425                                         int_vector);
 1426                 }
 1427         }
 1428 
 1429         /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
 1430         for (x = 0; x < nintrs; x++) {
 1431                 if (io_apic_ints[x].dst_apic_int == 0 &&
 1432                     io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
 1433                     io_apic_ints[x].int_vector == 0xff && 
 1434                     int_to_apicintpin[0].ioapic == -1 &&
 1435                     io_apic_ints[x].int_type == 3) {
 1436                         assign_apic_irq(0, 0, 0);
 1437                         break;
 1438                 }
 1439         }
 1440         /* PCI interrupt assignment is deferred */
 1441 }
 1442 
 1443 
 1444 static int
 1445 processor_entry(proc_entry_ptr entry, int cpu)
 1446 {
 1447         /* check for usability */
 1448         if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
 1449                 return 0;
 1450 
 1451         if(entry->apic_id >= NAPICID)
 1452                 panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
 1453         /* check for BSP flag */
 1454         if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
 1455                 boot_cpu_id = entry->apic_id;
 1456                 CPU_TO_ID(0) = entry->apic_id;
 1457                 ID_TO_CPU(entry->apic_id) = 0;
 1458                 return 0;       /* its already been counted */
 1459         }
 1460 
 1461         /* add another AP to list, if less than max number of CPUs */
 1462         else if (cpu < MAXCPU) {
 1463                 CPU_TO_ID(cpu) = entry->apic_id;
 1464                 ID_TO_CPU(entry->apic_id) = cpu;
 1465                 return 1;
 1466         }
 1467 
 1468         return 0;
 1469 }
 1470 
 1471 
 1472 static int
 1473 bus_entry(bus_entry_ptr entry, int bus)
 1474 {
 1475         int     x;
 1476         char    c, name[8];
 1477 
 1478         /* encode the name into an index */
 1479         for (x = 0; x < 6; ++x) {
 1480                 if ((c = entry->bus_type[x]) == ' ')
 1481                         break;
 1482                 name[x] = c;
 1483         }
 1484         name[x] = '\0';
 1485 
 1486         if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
 1487                 panic("unknown bus type: '%s'", name);
 1488 
 1489         bus_data[bus].bus_id = entry->bus_id;
 1490         bus_data[bus].bus_type = x;
 1491 
 1492         return 1;
 1493 }
 1494 
 1495 
 1496 static int
 1497 io_apic_entry(io_apic_entry_ptr entry, int apic)
 1498 {
 1499         if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
 1500                 return 0;
 1501 
 1502         IO_TO_ID(apic) = entry->apic_id;
 1503         if (entry->apic_id < NAPICID)
 1504                 ID_TO_IO(entry->apic_id) = apic;
 1505 
 1506         return 1;
 1507 }
 1508 
 1509 
 1510 static int
 1511 lookup_bus_type(char *name)
 1512 {
 1513         int     x;
 1514 
 1515         for (x = 0; x < MAX_BUSTYPE; ++x)
 1516                 if (strcmp(bus_type_table[x].name, name) == 0)
 1517                         return bus_type_table[x].type;
 1518 
 1519         return UNKNOWN_BUSTYPE;
 1520 }
 1521 
 1522 
 1523 static int
 1524 int_entry(int_entry_ptr entry, int intr)
 1525 {
 1526         int apic;
 1527 
 1528         io_apic_ints[intr].int_type = entry->int_type;
 1529         io_apic_ints[intr].int_flags = entry->int_flags;
 1530         io_apic_ints[intr].src_bus_id = entry->src_bus_id;
 1531         io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
 1532         if (entry->dst_apic_id == 255) {
 1533                 /* This signal goes to all IO APICS.  Select an IO APIC
 1534                    with sufficient number of interrupt pins */
 1535                 for (apic = 0; apic < mp_napics; apic++)
 1536                         if (((io_apic_read(apic, IOAPIC_VER) & 
 1537                               IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= 
 1538                             entry->dst_apic_int)
 1539                                 break;
 1540                 if (apic < mp_napics)
 1541                         io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
 1542                 else
 1543                         io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 1544         } else
 1545                 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 1546         io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
 1547 
 1548         return 1;
 1549 }
 1550 
 1551 
 1552 static int
 1553 apic_int_is_bus_type(int intr, int bus_type)
 1554 {
 1555         int     bus;
 1556 
 1557         for (bus = 0; bus < mp_nbusses; ++bus)
 1558                 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
 1559                     && ((int) bus_data[bus].bus_type == bus_type))
 1560                         return 1;
 1561 
 1562         return 0;
 1563 }
 1564 
 1565 
 1566 /*
 1567  * Given a traditional ISA INT mask, return an APIC mask.
 1568  */
 1569 u_int
 1570 isa_apic_mask(u_int isa_mask)
 1571 {
 1572         int isa_irq;
 1573         int apic_pin;
 1574 
 1575 #if defined(SKIP_IRQ15_REDIRECT)
 1576         if (isa_mask == (1 << 15)) {
 1577                 printf("skipping ISA IRQ15 redirect\n");
 1578                 return isa_mask;
 1579         }
 1580 #endif  /* SKIP_IRQ15_REDIRECT */
 1581 
 1582         isa_irq = ffs(isa_mask);                /* find its bit position */
 1583         if (isa_irq == 0)                       /* doesn't exist */
 1584                 return 0;
 1585         --isa_irq;                              /* make it zero based */
 1586 
 1587         apic_pin = isa_apic_irq(isa_irq);       /* look for APIC connection */
 1588         if (apic_pin == -1)
 1589                 return 0;
 1590 
 1591         return (1 << apic_pin);                 /* convert pin# to a mask */
 1592 }
 1593 
 1594 
 1595 /*
 1596  * Determine which APIC pin an ISA/EISA INT is attached to.
 1597  */
 1598 #define INTTYPE(I)      (io_apic_ints[(I)].int_type)
 1599 #define INTPIN(I)       (io_apic_ints[(I)].dst_apic_int)
 1600 #define INTIRQ(I)       (io_apic_ints[(I)].int_vector)
 1601 #define INTAPIC(I)      (ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
 1602 
 1603 #define SRCBUSIRQ(I)    (io_apic_ints[(I)].src_bus_irq)
 1604 int
 1605 isa_apic_irq(int isa_irq)
 1606 {
 1607         int     intr;
 1608 
 1609         for (intr = 0; intr < nintrs; ++intr) {         /* check each record */
 1610                 if (INTTYPE(intr) == 0) {               /* standard INT */
 1611                         if (SRCBUSIRQ(intr) == isa_irq) {
 1612                                 if (apic_int_is_bus_type(intr, ISA) ||
 1613                                     apic_int_is_bus_type(intr, EISA)) {
 1614                                         if (INTIRQ(intr) == 0xff)
 1615                                                 return -1; /* unassigned */
 1616                                         return INTIRQ(intr);    /* found */
 1617                                 }
 1618                         }
 1619                 }
 1620         }
 1621         return -1;                                      /* NOT found */
 1622 }
 1623 
 1624 
 1625 /*
 1626  * Determine which APIC pin a PCI INT is attached to.
 1627  */
 1628 #define SRCBUSID(I)     (io_apic_ints[(I)].src_bus_id)
 1629 #define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
 1630 #define SRCBUSLINE(I)   (io_apic_ints[(I)].src_bus_irq & 0x03)
 1631 int
 1632 pci_apic_irq(int pciBus, int pciDevice, int pciInt)
 1633 {
 1634         int     intr;
 1635 
 1636         --pciInt;                                       /* zero based */
 1637 
 1638         for (intr = 0; intr < nintrs; ++intr)           /* check each record */
 1639                 if ((INTTYPE(intr) == 0)                /* standard INT */
 1640                     && (SRCBUSID(intr) == pciBus)
 1641                     && (SRCBUSDEVICE(intr) == pciDevice)
 1642                     && (SRCBUSLINE(intr) == pciInt))    /* a candidate IRQ */
 1643                         if (apic_int_is_bus_type(intr, PCI)) {
 1644                                 if (INTIRQ(intr) == 0xff)
 1645                                         allocate_apic_irq(intr);
 1646                                 if (INTIRQ(intr) == 0xff)
 1647                                         return -1;      /* unassigned */
 1648                                 return INTIRQ(intr);    /* exact match */
 1649                         }
 1650 
 1651         return -1;                                      /* NOT found */
 1652 }
 1653 
 1654 int
 1655 next_apic_irq(int irq) 
 1656 {
 1657         int intr, ointr;
 1658         int bus, bustype;
 1659 
 1660         bus = 0;
 1661         bustype = 0;
 1662         for (intr = 0; intr < nintrs; intr++) {
 1663                 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
 1664                         continue;
 1665                 bus = SRCBUSID(intr);
 1666                 bustype = apic_bus_type(bus);
 1667                 if (bustype != ISA &&
 1668                     bustype != EISA &&
 1669                     bustype != PCI)
 1670                         continue;
 1671                 break;
 1672         }
 1673         if (intr >= nintrs) {
 1674                 return -1;
 1675         }
 1676         for (ointr = intr + 1; ointr < nintrs; ointr++) {
 1677                 if (INTTYPE(ointr) != 0)
 1678                         continue;
 1679                 if (bus != SRCBUSID(ointr))
 1680                         continue;
 1681                 if (bustype == PCI) {
 1682                         if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
 1683                                 continue;
 1684                         if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
 1685                                 continue;
 1686                 }
 1687                 if (bustype == ISA || bustype == EISA) {
 1688                         if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
 1689                                 continue;
 1690                 }
 1691                 if (INTPIN(intr) == INTPIN(ointr))
 1692                         continue;
 1693                 break;
 1694         }
 1695         if (ointr >= nintrs) {
 1696                 return -1;
 1697         }
 1698         return INTIRQ(ointr);
 1699 }
 1700 #undef SRCBUSLINE
 1701 #undef SRCBUSDEVICE
 1702 #undef SRCBUSID
 1703 #undef SRCBUSIRQ
 1704 
 1705 #undef INTPIN
 1706 #undef INTIRQ
 1707 #undef INTAPIC
 1708 #undef INTTYPE
 1709 
 1710 
 1711 /*
 1712  * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
 1713  *
 1714  * XXX FIXME:
 1715  *  Exactly what this means is unclear at this point.  It is a solution
 1716  *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
 1717  *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
 1718  *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
 1719  *  option.
 1720  */
 1721 int
 1722 undirect_isa_irq(int rirq)
 1723 {
 1724 #if defined(READY)
 1725         if (bootverbose)
 1726             printf("Freeing redirected ISA irq %d.\n", rirq);
 1727         /** FIXME: tickle the MB redirector chip */
 1728         return -1;
 1729 #else
 1730         if (bootverbose)
 1731             printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
 1732         return 0;
 1733 #endif  /* READY */
 1734 }
 1735 
 1736 
 1737 /*
 1738  * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
 1739  */
 1740 int
 1741 undirect_pci_irq(int rirq)
 1742 {
 1743 #if defined(READY)
 1744         if (bootverbose)
 1745                 printf("Freeing redirected PCI irq %d.\n", rirq);
 1746 
 1747         /** FIXME: tickle the MB redirector chip */
 1748         return -1;
 1749 #else
 1750         if (bootverbose)
 1751                 printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
 1752                        rirq);
 1753         return 0;
 1754 #endif  /* READY */
 1755 }
 1756 
 1757 
 1758 /*
 1759  * given a bus ID, return:
 1760  *  the bus type if found
 1761  *  -1 if NOT found
 1762  */
 1763 int
 1764 apic_bus_type(int id)
 1765 {
 1766         int     x;
 1767 
 1768         for (x = 0; x < mp_nbusses; ++x)
 1769                 if (bus_data[x].bus_id == id)
 1770                         return bus_data[x].bus_type;
 1771 
 1772         return -1;
 1773 }
 1774 
 1775 
 1776 /*
 1777  * given a LOGICAL APIC# and pin#, return:
 1778  *  the associated src bus ID if found
 1779  *  -1 if NOT found
 1780  */
 1781 int
 1782 apic_src_bus_id(int apic, int pin)
 1783 {
 1784         int     x;
 1785 
 1786         /* search each of the possible INTerrupt sources */
 1787         for (x = 0; x < nintrs; ++x)
 1788                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1789                     (pin == io_apic_ints[x].dst_apic_int))
 1790                         return (io_apic_ints[x].src_bus_id);
 1791 
 1792         return -1;              /* NOT found */
 1793 }
 1794 
 1795 
 1796 /*
 1797  * given a LOGICAL APIC# and pin#, return:
 1798  *  the associated src bus IRQ if found
 1799  *  -1 if NOT found
 1800  */
 1801 int
 1802 apic_src_bus_irq(int apic, int pin)
 1803 {
 1804         int     x;
 1805 
 1806         for (x = 0; x < nintrs; x++)
 1807                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1808                     (pin == io_apic_ints[x].dst_apic_int))
 1809                         return (io_apic_ints[x].src_bus_irq);
 1810 
 1811         return -1;              /* NOT found */
 1812 }
 1813 
 1814 
 1815 /*
 1816  * given a LOGICAL APIC# and pin#, return:
 1817  *  the associated INTerrupt type if found
 1818  *  -1 if NOT found
 1819  */
 1820 int
 1821 apic_int_type(int apic, int pin)
 1822 {
 1823         int     x;
 1824 
 1825         /* search each of the possible INTerrupt sources */
 1826         for (x = 0; x < nintrs; ++x)
 1827                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1828                     (pin == io_apic_ints[x].dst_apic_int))
 1829                         return (io_apic_ints[x].int_type);
 1830 
 1831         return -1;              /* NOT found */
 1832 }
 1833 
 1834 int 
 1835 apic_irq(int apic, int pin)
 1836 {
 1837         int x;
 1838         int res;
 1839 
 1840         for (x = 0; x < nintrs; ++x)
 1841                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1842                     (pin == io_apic_ints[x].dst_apic_int)) {
 1843                         res = io_apic_ints[x].int_vector;
 1844                         if (res == 0xff)
 1845                                 return -1;
 1846                         if (apic != int_to_apicintpin[res].ioapic)
 1847                                 panic("apic_irq: inconsistent table");
 1848                         if (pin != int_to_apicintpin[res].int_pin)
 1849                                 panic("apic_irq inconsistent table (2)");
 1850                         return res;
 1851                 }
 1852         return -1;
 1853 }
 1854 
 1855 
 1856 /*
 1857  * given a LOGICAL APIC# and pin#, return:
 1858  *  the associated trigger mode if found
 1859  *  -1 if NOT found
 1860  */
 1861 int
 1862 apic_trigger(int apic, int pin)
 1863 {
 1864         int     x;
 1865 
 1866         /* search each of the possible INTerrupt sources */
 1867         for (x = 0; x < nintrs; ++x)
 1868                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1869                     (pin == io_apic_ints[x].dst_apic_int))
 1870                         return ((io_apic_ints[x].int_flags >> 2) & 0x03);
 1871 
 1872         return -1;              /* NOT found */
 1873 }
 1874 
 1875 
 1876 /*
 1877  * given a LOGICAL APIC# and pin#, return:
 1878  *  the associated 'active' level if found
 1879  *  -1 if NOT found
 1880  */
 1881 int
 1882 apic_polarity(int apic, int pin)
 1883 {
 1884         int     x;
 1885 
 1886         /* search each of the possible INTerrupt sources */
 1887         for (x = 0; x < nintrs; ++x)
 1888                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1889                     (pin == io_apic_ints[x].dst_apic_int))
 1890                         return (io_apic_ints[x].int_flags & 0x03);
 1891 
 1892         return -1;              /* NOT found */
 1893 }
 1894 
 1895 
 1896 /*
 1897  * set data according to MP defaults
 1898  * FIXME: probably not complete yet...
 1899  */
 1900 static void
 1901 default_mp_table(int type)
 1902 {
 1903         int     ap_cpu_id;
 1904 #if defined(APIC_IO)
 1905         int     io_apic_id;
 1906         int     pin;
 1907 #endif  /* APIC_IO */
 1908 
 1909 #if 0
 1910         printf("  MP default config type: %d\n", type);
 1911         switch (type) {
 1912         case 1:
 1913                 printf("   bus: ISA, APIC: 82489DX\n");
 1914                 break;
 1915         case 2:
 1916                 printf("   bus: EISA, APIC: 82489DX\n");
 1917                 break;
 1918         case 3:
 1919                 printf("   bus: EISA, APIC: 82489DX\n");
 1920                 break;
 1921         case 4:
 1922                 printf("   bus: MCA, APIC: 82489DX\n");
 1923                 break;
 1924         case 5:
 1925                 printf("   bus: ISA+PCI, APIC: Integrated\n");
 1926                 break;
 1927         case 6:
 1928                 printf("   bus: EISA+PCI, APIC: Integrated\n");
 1929                 break;
 1930         case 7:
 1931                 printf("   bus: MCA+PCI, APIC: Integrated\n");
 1932                 break;
 1933         default:
 1934                 printf("   future type\n");
 1935                 break;
 1936                 /* NOTREACHED */
 1937         }
 1938 #endif  /* 0 */
 1939 
 1940         boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
 1941         ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
 1942 
 1943         /* BSP */
 1944         CPU_TO_ID(0) = boot_cpu_id;
 1945         ID_TO_CPU(boot_cpu_id) = 0;
 1946 
 1947         /* one and only AP */
 1948         CPU_TO_ID(1) = ap_cpu_id;
 1949         ID_TO_CPU(ap_cpu_id) = 1;
 1950 
 1951 #if defined(APIC_IO)
 1952         /* one and only IO APIC */
 1953         io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
 1954 
 1955         /*
 1956          * sanity check, refer to MP spec section 3.6.6, last paragraph
 1957          * necessary as some hardware isn't properly setting up the IO APIC
 1958          */
 1959 #if defined(REALLY_ANAL_IOAPICID_VALUE)
 1960         if (io_apic_id != 2) {
 1961 #else
 1962         if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
 1963 #endif  /* REALLY_ANAL_IOAPICID_VALUE */
 1964                 io_apic_set_id(0, 2);
 1965                 io_apic_id = 2;
 1966         }
 1967         IO_TO_ID(0) = io_apic_id;
 1968         ID_TO_IO(io_apic_id) = 0;
 1969 #endif  /* APIC_IO */
 1970 
 1971         /* fill out bus entries */
 1972         switch (type) {
 1973         case 1:
 1974         case 2:
 1975         case 3:
 1976         case 4:
 1977         case 5:
 1978         case 6:
 1979         case 7:
 1980                 bus_data[0].bus_id = default_data[type - 1][1];
 1981                 bus_data[0].bus_type = default_data[type - 1][2];
 1982                 bus_data[1].bus_id = default_data[type - 1][3];
 1983                 bus_data[1].bus_type = default_data[type - 1][4];
 1984                 break;
 1985 
 1986         /* case 4: case 7:                 MCA NOT supported */
 1987         default:                /* illegal/reserved */
 1988                 panic("BAD default MP config: %d", type);
 1989                 /* NOTREACHED */
 1990         }
 1991 
 1992 #if defined(APIC_IO)
 1993         /* general cases from MP v1.4, table 5-2 */
 1994         for (pin = 0; pin < 16; ++pin) {
 1995                 io_apic_ints[pin].int_type = 0;
 1996                 io_apic_ints[pin].int_flags = 0x05;     /* edge/active-hi */
 1997                 io_apic_ints[pin].src_bus_id = 0;
 1998                 io_apic_ints[pin].src_bus_irq = pin;    /* IRQ2 caught below */
 1999                 io_apic_ints[pin].dst_apic_id = io_apic_id;
 2000                 io_apic_ints[pin].dst_apic_int = pin;   /* 1-to-1 */
 2001         }
 2002 
 2003         /* special cases from MP v1.4, table 5-2 */
 2004         if (type == 2) {
 2005                 io_apic_ints[2].int_type = 0xff;        /* N/C */
 2006                 io_apic_ints[13].int_type = 0xff;       /* N/C */
 2007 #if !defined(APIC_MIXED_MODE)
 2008                 /** FIXME: ??? */
 2009                 panic("sorry, can't support type 2 default yet");
 2010 #endif  /* APIC_MIXED_MODE */
 2011         }
 2012         else
 2013                 io_apic_ints[2].src_bus_irq = 0;        /* ISA IRQ0 is on APIC INT 2 */
 2014 
 2015         if (type == 7)
 2016                 io_apic_ints[0].int_type = 0xff;        /* N/C */
 2017         else
 2018                 io_apic_ints[0].int_type = 3;   /* vectored 8259 */
 2019 #endif  /* APIC_IO */
 2020 }
 2021 
 2022 
 2023 /*
 2024  * start each AP in our list
 2025  */
 2026 static int
 2027 start_all_aps(u_int boot_addr)
 2028 {
 2029         int     x, i, pg;
 2030 #ifndef PC98
 2031         u_char  mpbiosreason;
 2032 #endif
 2033         u_long  mpbioswarmvec;
 2034         struct pcpu *pc;
 2035         char *stack;
 2036         uintptr_t kptbase;
 2037 
 2038         POSTCODE(START_ALL_APS_POST);
 2039 
 2040         mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 2041 
 2042         /* initialize BSP's local APIC */
 2043         apic_initialize();
 2044         bsp_apic_ready = 1;
 2045 
 2046         /* install the AP 1st level boot code */
 2047         install_ap_tramp(boot_addr);
 2048 
 2049 
 2050         /* save the current value of the warm-start vector */
 2051         mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
 2052 #ifndef PC98
 2053         outb(CMOS_REG, BIOS_RESET);
 2054         mpbiosreason = inb(CMOS_DATA);
 2055 #endif
 2056 
 2057         /* set up temporary P==V mapping for AP boot */
 2058         /* XXX this is a hack, we should boot the AP on its own stack/PTD */
 2059         kptbase = (uintptr_t)(void *)KPTphys;
 2060         for (x = 0; x < NKPT; x++)
 2061                 PTD[x] = (pd_entry_t)(PG_V | PG_RW |
 2062                     ((kptbase + x * PAGE_SIZE) & PG_FRAME));
 2063         invltlb();
 2064 
 2065         /* start each AP */
 2066         for (x = 1; x <= mp_naps; ++x) {
 2067 
 2068                 /* This is a bit verbose, it will go away soon.  */
 2069 
 2070                 /* first page of AP's private space */
 2071                 pg = x * i386_btop(sizeof(struct privatespace));
 2072 
 2073                 /* allocate a new private data page */
 2074                 pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
 2075 
 2076                 /* wire it into the private page table page */
 2077                 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
 2078 
 2079                 /* allocate and set up an idle stack data page */
 2080                 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
 2081                 for (i = 0; i < KSTACK_PAGES; i++)
 2082                         SMPpt[pg + 1 + i] = (pt_entry_t)
 2083                             (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 2084 
 2085                 /* prime data page for it to use */
 2086                 pcpu_init(pc, x, sizeof(struct pcpu));
 2087 
 2088                 /* setup a vector to our boot code */
 2089                 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
 2090                 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
 2091 #ifndef PC98
 2092                 outb(CMOS_REG, BIOS_RESET);
 2093                 outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
 2094 #endif
 2095 
 2096                 bootSTK = &SMP_prvspace[x].idlekstack[KSTACK_PAGES * PAGE_SIZE];
 2097                 bootAP = x;
 2098 
 2099                 /* attempt to start the Application Processor */
 2100                 CHECK_INIT(99); /* setup checkpoints */
 2101                 if (!start_ap(x, boot_addr)) {
 2102                         printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
 2103                         CHECK_PRINT("trace");   /* show checkpoints */
 2104                         /* better panic as the AP may be running loose */
 2105                         printf("panic y/n? [y] ");
 2106                         if (cngetc() != 'n')
 2107                                 panic("bye-bye");
 2108                 }
 2109                 CHECK_PRINT("trace");           /* show checkpoints */
 2110 
 2111                 /* record its version info */
 2112                 cpu_apic_versions[x] = cpu_apic_versions[0];
 2113 
 2114                 all_cpus |= (1 << x);           /* record AP in CPU map */
 2115         }
 2116 
 2117         /* build our map of 'other' CPUs */
 2118         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
 2119 
 2120         /* fill in our (BSP) APIC version */
 2121         cpu_apic_versions[0] = lapic.version;
 2122 
 2123         /* restore the warmstart vector */
 2124         *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
 2125 #ifndef PC98
 2126         outb(CMOS_REG, BIOS_RESET);
 2127         outb(CMOS_DATA, mpbiosreason);
 2128 #endif
 2129 
 2130         /*
 2131          * Set up the idle context for the BSP.  Similar to above except
 2132          * that some was done by locore, some by pmap.c and some is implicit
 2133          * because the BSP is cpu#0 and the page is initially zero, and also
 2134          * because we can refer to variables by name on the BSP..
 2135          */
 2136 
 2137         /* Allocate and setup BSP idle stack */
 2138         stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
 2139         for (i = 0; i < KSTACK_PAGES; i++)
 2140                 SMPpt[1 + i] = (pt_entry_t)
 2141                     (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 2142 
 2143         for (x = 0; x < NKPT; x++)
 2144                 PTD[x] = 0;
 2145         pmap_set_opt();
 2146 
 2147         /* number of APs actually started */
 2148         return mp_ncpus - 1;
 2149 }
 2150 
 2151 
 2152 /*
 2153  * load the 1st level AP boot code into base memory.
 2154  */
 2155 
 2156 /* targets for relocation */
 2157 extern void bigJump(void);
 2158 extern void bootCodeSeg(void);
 2159 extern void bootDataSeg(void);
 2160 extern void MPentry(void);
 2161 extern u_int MP_GDT;
 2162 extern u_int mp_gdtbase;
 2163 
 2164 static void
 2165 install_ap_tramp(u_int boot_addr)
 2166 {
 2167         int     x;
 2168         int     size = *(int *) ((u_long) & bootMP_size);
 2169         u_char *src = (u_char *) ((u_long) bootMP);
 2170         u_char *dst = (u_char *) boot_addr + KERNBASE;
 2171         u_int   boot_base = (u_int) bootMP;
 2172         u_int8_t *dst8;
 2173         u_int16_t *dst16;
 2174         u_int32_t *dst32;
 2175 
 2176         POSTCODE(INSTALL_AP_TRAMP_POST);
 2177 
 2178         for (x = 0; x < size; ++x)
 2179                 *dst++ = *src++;
 2180 
 2181         /*
 2182          * modify addresses in code we just moved to basemem. unfortunately we
 2183          * need fairly detailed info about mpboot.s for this to work.  changes
 2184          * to mpboot.s might require changes here.
 2185          */
 2186 
 2187         /* boot code is located in KERNEL space */
 2188         dst = (u_char *) boot_addr + KERNBASE;
 2189 
 2190         /* modify the lgdt arg */
 2191         dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
 2192         *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
 2193 
 2194         /* modify the ljmp target for MPentry() */
 2195         dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
 2196         *dst32 = ((u_int) MPentry - KERNBASE);
 2197 
 2198         /* modify the target for boot code segment */
 2199         dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
 2200         dst8 = (u_int8_t *) (dst16 + 1);
 2201         *dst16 = (u_int) boot_addr & 0xffff;
 2202         *dst8 = ((u_int) boot_addr >> 16) & 0xff;
 2203 
 2204         /* modify the target for boot data segment */
 2205         dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
 2206         dst8 = (u_int8_t *) (dst16 + 1);
 2207         *dst16 = (u_int) boot_addr & 0xffff;
 2208         *dst8 = ((u_int) boot_addr >> 16) & 0xff;
 2209 }
 2210 
 2211 
 2212 /*
 2213  * this function starts the AP (application processor) identified
 2214  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
 2215  * to accomplish this.  This is necessary because of the nuances
 2216  * of the different hardware we might encounter.  It ain't pretty,
 2217  * but it seems to work.
 2218  */
 2219 static int
 2220 start_ap(int logical_cpu, u_int boot_addr)
 2221 {
 2222         int     physical_cpu;
 2223         int     vector;
 2224         int     cpus;
 2225         u_long  icr_lo, icr_hi;
 2226 
 2227         POSTCODE(START_AP_POST);
 2228 
 2229         /* get the PHYSICAL APIC ID# */
 2230         physical_cpu = CPU_TO_ID(logical_cpu);
 2231 
 2232         /* calculate the vector */
 2233         vector = (boot_addr >> 12) & 0xff;
 2234 
 2235         /* used as a watchpoint to signal AP startup */
 2236         cpus = mp_ncpus;
 2237 
 2238         /*
 2239          * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
 2240          * and running the target CPU. OR this INIT IPI might be latched (P5
 2241          * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 2242          * ignored.
 2243          */
 2244 
 2245         /* setup the address for the target AP */
 2246         icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
 2247         icr_hi |= (physical_cpu << 24);
 2248         lapic.icr_hi = icr_hi;
 2249 
 2250         /* do an INIT IPI: assert RESET */
 2251         icr_lo = lapic.icr_lo & 0xfff00000;
 2252         lapic.icr_lo = icr_lo | 0x0000c500;
 2253 
 2254         /* wait for pending status end */
 2255         while (lapic.icr_lo & APIC_DELSTAT_MASK)
 2256                  /* spin */ ;
 2257 
 2258         /* do an INIT IPI: deassert RESET */
 2259         lapic.icr_lo = icr_lo | 0x00008500;
 2260 
 2261         /* wait for pending status end */
 2262         u_sleep(10000);         /* wait ~10mS */
 2263         while (lapic.icr_lo & APIC_DELSTAT_MASK)
 2264                  /* spin */ ;
 2265 
 2266         /*
 2267          * next we do a STARTUP IPI: the previous INIT IPI might still be
 2268          * latched, (P5 bug) this 1st STARTUP would then terminate
 2269          * immediately, and the previously started INIT IPI would continue. OR
 2270          * the previous INIT IPI has already run. and this STARTUP IPI will
 2271          * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 2272          * will run.
 2273          */
 2274 
 2275         /* do a STARTUP IPI */
 2276         lapic.icr_lo = icr_lo | 0x00000600 | vector;
 2277         while (lapic.icr_lo & APIC_DELSTAT_MASK)
 2278                  /* spin */ ;
 2279         u_sleep(200);           /* wait ~200uS */
 2280 
 2281         /*
 2282          * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 2283          * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 2284          * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 2285          * recognized after hardware RESET or INIT IPI.
 2286          */
 2287 
 2288         lapic.icr_lo = icr_lo | 0x00000600 | vector;
 2289         while (lapic.icr_lo & APIC_DELSTAT_MASK)
 2290                  /* spin */ ;
 2291         u_sleep(200);           /* wait ~200uS */
 2292 
 2293         /* wait for it to start */
 2294         set_apic_timer(5000000);/* == 5 seconds */
 2295         while (read_apic_timer())
 2296                 if (mp_ncpus > cpus)
 2297                         return 1;       /* return SUCCESS */
 2298 
 2299         return 0;               /* return FAILURE */
 2300 }
 2301 
 2302 #if defined(APIC_IO)
 2303 
 2304 #ifdef COUNT_XINVLTLB_HITS
 2305 u_int xhits_gbl[MAXCPU];
 2306 u_int xhits_pg[MAXCPU];
 2307 u_int xhits_rng[MAXCPU];
 2308 SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
 2309 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
 2310     sizeof(xhits_gbl), "IU", "");
 2311 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
 2312     sizeof(xhits_pg), "IU", "");
 2313 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
 2314     sizeof(xhits_rng), "IU", "");
 2315 
 2316 u_int ipi_global;
 2317 u_int ipi_page;
 2318 u_int ipi_range;
 2319 u_int ipi_range_size;
 2320 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
 2321 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
 2322 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
 2323 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
 2324     0, "");
 2325 
 2326 u_int ipi_masked_global;
 2327 u_int ipi_masked_page;
 2328 u_int ipi_masked_range;
 2329 u_int ipi_masked_range_size;
 2330 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
 2331     &ipi_masked_global, 0, "");
 2332 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
 2333     &ipi_masked_page, 0, "");
 2334 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
 2335     &ipi_masked_range, 0, "");
 2336 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
 2337     &ipi_masked_range_size, 0, "");
 2338 #endif
 2339 
 2340 /*
 2341  * Flush the TLB on all other CPU's
 2342  */
 2343 static void
 2344 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 2345 {
 2346         u_int ncpu;
 2347         register_t eflags;
 2348 
 2349         ncpu = mp_ncpus - 1;    /* does not shootdown self */
 2350         if (ncpu < 1)
 2351                 return;         /* no other cpus */
 2352         eflags = read_eflags();
 2353         if ((eflags & PSL_I) == 0)
 2354                 panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
 2355         mtx_lock_spin(&smp_tlb_mtx);
 2356         smp_tlb_addr1 = addr1;
 2357         smp_tlb_addr2 = addr2;
 2358         atomic_store_rel_int(&smp_tlb_wait, 0);
 2359         ipi_all_but_self(vector);
 2360         while (smp_tlb_wait < ncpu)
 2361                 ia32_pause();
 2362         mtx_unlock_spin(&smp_tlb_mtx);
 2363 }
 2364 
 2365 /*
 2366  * This is about as magic as it gets.  fortune(1) has got similar code
 2367  * for reversing bits in a word.  Who thinks up this stuff??
 2368  *
 2369  * Yes, it does appear to be consistently faster than:
 2370  * while (i = ffs(m)) {
 2371  *      m >>= i;
 2372  *      bits++;
 2373  * }
 2374  * and
 2375  * while (lsb = (m & -m)) {     // This is magic too
 2376  *      m &= ~lsb;              // or: m ^= lsb
 2377  *      bits++;
 2378  * }
 2379  * Both of these latter forms do some very strange things on gcc-3.1 with
 2380  * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
 2381  * There is probably an SSE or MMX popcnt instruction.
 2382  *
 2383  * I wonder if this should be in libkern?
 2384  *
 2385  * XXX Stop the presses!  Another one:
 2386  * static __inline u_int32_t
 2387  * popcnt1(u_int32_t v)
 2388  * {
 2389  *      v -= ((v >> 1) & 0x55555555);
 2390  *      v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
 2391  *      v = (v + (v >> 4)) & 0x0F0F0F0F;
 2392  *      return (v * 0x01010101) >> 24;
 2393  * }
 2394  * The downside is that it has a multiply.  With a pentium3 with
 2395  * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
 2396  * an imull, and in that case it is faster.  In most other cases
 2397  * it appears slightly slower.
 2398  */
 2399 static __inline u_int32_t
 2400 popcnt(u_int32_t m)
 2401 {
 2402 
 2403         m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
 2404         m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
 2405         m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
 2406         m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
 2407         m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
 2408         return m;
 2409 }
 2410 
 2411 static void
 2412 smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 2413 {
 2414         int ncpu, othercpus;
 2415         register_t eflags;
 2416 
 2417         othercpus = mp_ncpus - 1;
 2418         if (mask == (u_int)-1) {
 2419                 ncpu = othercpus;
 2420                 if (ncpu < 1)
 2421                         return;
 2422         } else {
 2423                 /* XXX there should be a pcpu self mask */
 2424                 mask &= ~(1 << PCPU_GET(cpuid));
 2425                 if (mask == 0)
 2426                         return;
 2427                 ncpu = popcnt(mask);
 2428                 if (ncpu > othercpus) {
 2429                         /* XXX this should be a panic offence */
 2430                         printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
 2431                             ncpu, othercpus);
 2432                         ncpu = othercpus;
 2433                 }
 2434                 /* XXX should be a panic, implied by mask == 0 above */
 2435                 if (ncpu < 1)
 2436                         return;
 2437         }
 2438         eflags = read_eflags();
 2439         if ((eflags & PSL_I) == 0)
 2440                 panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
 2441         mtx_lock_spin(&smp_tlb_mtx);
 2442         smp_tlb_addr1 = addr1;
 2443         smp_tlb_addr2 = addr2;
 2444         atomic_store_rel_int(&smp_tlb_wait, 0);
 2445         if (mask == (u_int)-1)
 2446                 ipi_all_but_self(vector);
 2447         else
 2448                 ipi_selected(mask, vector);
 2449         while (smp_tlb_wait < ncpu)
 2450                 ia32_pause();
 2451         mtx_unlock_spin(&smp_tlb_mtx);
 2452 }
 2453 #endif
 2454 
 2455 void
 2456 smp_invltlb(void)
 2457 {
 2458 #if defined(APIC_IO)
 2459         if (smp_started) {
 2460                 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
 2461 #ifdef COUNT_XINVLTLB_HITS
 2462                 ipi_global++;
 2463 #endif
 2464         }
 2465 #endif  /* APIC_IO */
 2466 }
 2467 
 2468 void
 2469 smp_invlpg(vm_offset_t addr)
 2470 {
 2471 #if defined(APIC_IO)
 2472         if (smp_started) {
 2473                 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
 2474 #ifdef COUNT_XINVLTLB_HITS
 2475                 ipi_page++;
 2476 #endif
 2477         }
 2478 #endif  /* APIC_IO */
 2479 }
 2480 
 2481 void
 2482 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 2483 {
 2484 #if defined(APIC_IO)
 2485         if (smp_started) {
 2486                 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
 2487 #ifdef COUNT_XINVLTLB_HITS
 2488                 ipi_range++;
 2489                 ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
 2490 #endif
 2491         }
 2492 #endif  /* APIC_IO */
 2493 }
 2494 
 2495 void
 2496 smp_masked_invltlb(u_int mask)
 2497 {
 2498 #if defined(APIC_IO)
 2499         if (smp_started) {
 2500                 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
 2501 #ifdef COUNT_XINVLTLB_HITS
 2502                 ipi_masked_global++;
 2503 #endif
 2504         }
 2505 #endif  /* APIC_IO */
 2506 }
 2507 
 2508 void
 2509 smp_masked_invlpg(u_int mask, vm_offset_t addr)
 2510 {
 2511 #if defined(APIC_IO)
 2512         if (smp_started) {
 2513                 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
 2514 #ifdef COUNT_XINVLTLB_HITS
 2515                 ipi_masked_page++;
 2516 #endif
 2517         }
 2518 #endif  /* APIC_IO */
 2519 }
 2520 
 2521 void
 2522 smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
 2523 {
 2524 #if defined(APIC_IO)
 2525         if (smp_started) {
 2526                 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
 2527 #ifdef COUNT_XINVLTLB_HITS
 2528                 ipi_masked_range++;
 2529                 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
 2530 #endif
 2531         }
 2532 #endif  /* APIC_IO */
 2533 }
 2534 
 2535 
 2536 /*
 2537  * This is called once the rest of the system is up and running and we're
 2538  * ready to let the AP's out of the pen.
 2539  */
 2540 void
 2541 ap_init(void)
 2542 {
 2543         u_int   apic_id;
 2544 
 2545         /* spin until all the AP's are ready */
 2546         while (!aps_ready)
 2547                 ia32_pause();
 2548 
 2549         /* BSP may have changed PTD while we were waiting */
 2550         invltlb();
 2551 
 2552 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 2553         lidt(&r_idt);
 2554 #endif
 2555 
 2556         /* set up CPU registers and state */
 2557         cpu_setregs();
 2558 
 2559         /* set up FPU state on the AP */
 2560         npxinit(__INITIAL_NPXCW__);
 2561 
 2562         /* set up SSE registers */
 2563         enable_sse();
 2564 
 2565         /* A quick check from sanity claus */
 2566         apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
 2567         if (PCPU_GET(cpuid) != apic_id) {
 2568                 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
 2569                 printf("SMP: apic_id = %d\n", apic_id);
 2570                 printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]);
 2571                 panic("cpuid mismatch! boom!!");
 2572         }
 2573 
 2574         /* Init local apic for irq's */
 2575         apic_initialize();
 2576 
 2577         /* Set memory range attributes for this CPU to match the BSP */
 2578         mem_range_AP_init();
 2579 
 2580         mtx_lock_spin(&ap_boot_mtx);
 2581 
 2582         smp_cpus++;
 2583 
 2584         CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
 2585         printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
 2586 
 2587         /* Build our map of 'other' CPUs. */
 2588         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
 2589 
 2590         if (bootverbose)
 2591                 apic_dump("ap_init()");
 2592 
 2593         if (smp_cpus == mp_ncpus) {
 2594                 /* enable IPI's, tlb shootdown, freezes etc */
 2595                 atomic_store_rel_int(&smp_started, 1);
 2596                 smp_active = 1;  /* historic */
 2597         }
 2598 
 2599         mtx_unlock_spin(&ap_boot_mtx);
 2600 
 2601         /* wait until all the AP's are up */
 2602         while (smp_started == 0)
 2603                 ia32_pause();
 2604 
 2605         /* ok, now grab sched_lock and enter the scheduler */
 2606         mtx_lock_spin(&sched_lock);
 2607 
 2608         binuptime(PCPU_PTR(switchtime));
 2609         PCPU_SET(switchticks, ticks);
 2610 
 2611         cpu_throw(NULL, choosethread());        /* doesn't return */
 2612 
 2613         panic("scheduler returned us to %s", __func__);
 2614 }
 2615 
 2616 /*
 2617  * For statclock, we send an IPI to all CPU's to have them call this
 2618  * function.
 2619  *
 2620  * WARNING! unpend() will call statclock() directly and skip this
 2621  * routine.
 2622  */
 2623 void
 2624 forwarded_statclock(struct clockframe frame)
 2625 {
 2626 
 2627         if (profprocs != 0)
 2628                 profclock(&frame);
 2629         if (pscnt == psdiv)
 2630                 statclock(&frame);
 2631 }
 2632 
 2633 void
 2634 forward_statclock(void)
 2635 {
 2636         int map;
 2637 
 2638         CTR0(KTR_SMP, "forward_statclock");
 2639 
 2640         if (!smp_started || cold || panicstr)
 2641                 return;
 2642 
 2643         map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 2644         if (map != 0)
 2645                 ipi_selected(map, IPI_STATCLOCK);
 2646 }
 2647 
 2648 /*
 2649  * For each hardclock(), we send an IPI to all other CPU's to have them
 2650  * execute this function.  It would be nice to reduce contention on
 2651  * sched_lock if we could simply peek at the CPU to determine the user/kernel
 2652  * state and call hardclock_process() on the CPU receiving the clock interrupt
 2653  * and then just use a simple IPI to handle any ast's if needed.
 2654  *
 2655  * WARNING! unpend() will call hardclock_process() directly and skip this
 2656  * routine.
 2657  */
 2658 void
 2659 forwarded_hardclock(struct clockframe frame)
 2660 {
 2661 
 2662         hardclock_process(&frame);
 2663 }
 2664 
 2665 void 
 2666 forward_hardclock(void)
 2667 {
 2668         u_int map;
 2669 
 2670         CTR0(KTR_SMP, "forward_hardclock");
 2671 
 2672         if (!smp_started || cold || panicstr)
 2673                 return;
 2674 
 2675         map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 2676         if (map != 0)
 2677                 ipi_selected(map, IPI_HARDCLOCK);
 2678 }
 2679 
 2680 #ifdef APIC_INTR_REORDER
 2681 /*
 2682  *      Maintain mapping from softintr vector to isr bit in local apic.
 2683  */
 2684 void
 2685 set_lapic_isrloc(int intr, int vector)
 2686 {
 2687         if (intr < 0 || intr > 32)
 2688                 panic("set_apic_isrloc: bad intr argument: %d",intr);
 2689         if (vector < ICU_OFFSET || vector > 255)
 2690                 panic("set_apic_isrloc: bad vector argument: %d",vector);
 2691         apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
 2692         apic_isrbit_location[intr].bit = (1<<(vector & 31));
 2693 }
 2694 #endif
 2695 
 2696 /*
 2697  * send an IPI to a set of cpus.
 2698  */
 2699 void
 2700 ipi_selected(u_int32_t cpus, u_int ipi)
 2701 {
 2702 
 2703         CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
 2704         selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
 2705 }
 2706 
 2707 /*
 2708  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
 2709  */
 2710 void
 2711 ipi_all(u_int ipi)
 2712 {
 2713 
 2714         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 2715         apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED); 
 2716 }
 2717 
 2718 /*
 2719  * send an IPI to all CPUs EXCEPT myself
 2720  */
 2721 void
 2722 ipi_all_but_self(u_int ipi)
 2723 {
 2724 
 2725         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 2726         apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED); 
 2727 }
 2728 
 2729 /*
 2730  * send an IPI to myself
 2731  */
 2732 void
 2733 ipi_self(u_int ipi)
 2734 {
 2735 
 2736         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 2737         apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED); 
 2738 }
 2739 
 2740 static void
 2741 release_aps(void *dummy __unused)
 2742 {
 2743 
 2744         if (mp_ncpus == 1) 
 2745                 return;
 2746         mtx_lock_spin(&sched_lock);
 2747         atomic_store_rel_int(&aps_ready, 1);
 2748         while (smp_started == 0)
 2749                 ia32_pause();
 2750         mtx_unlock_spin(&sched_lock);
 2751 }
 2752 
 2753 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 2754 
 2755 static int      hlt_cpus_mask;
 2756 static int      hlt_logical_cpus = 1;
 2757 static struct   sysctl_ctx_list logical_cpu_clist;
 2758 
 2759 static int
 2760 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
 2761 {
 2762         u_int mask;
 2763         int error;
 2764 
 2765         mask = hlt_cpus_mask;
 2766         error = sysctl_handle_int(oidp, &mask, 0, req);
 2767         if (error || !req->newptr)
 2768                 return (error);
 2769 
 2770         if (logical_cpus_mask != 0 &&
 2771             (mask & logical_cpus_mask) == logical_cpus_mask)
 2772                 hlt_logical_cpus = 1;
 2773         else
 2774                 hlt_logical_cpus = 0;
 2775 
 2776         if ((mask & all_cpus) == all_cpus)
 2777                 mask &= ~(1<<0);
 2778         hlt_cpus_mask = mask;
 2779         return (error);
 2780 }
 2781 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
 2782     0, 0, sysctl_hlt_cpus, "IU", "");
 2783 
 2784 static int
 2785 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 2786 {
 2787         int disable, error;
 2788 
 2789         disable = hlt_logical_cpus;
 2790         error = sysctl_handle_int(oidp, &disable, 0, req);
 2791         if (error || !req->newptr)
 2792                 return (error);
 2793 
 2794         if (disable)
 2795                 hlt_cpus_mask |= logical_cpus_mask;
 2796         else
 2797                 hlt_cpus_mask &= ~logical_cpus_mask;
 2798 
 2799         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 2800                 hlt_cpus_mask &= ~(1<<0);
 2801 
 2802         hlt_logical_cpus = disable;
 2803         return (error);
 2804 }
 2805 
 2806 static void
 2807 cpu_hlt_setup(void *dummy __unused)
 2808 {
 2809 
 2810         if (logical_cpus_mask != 0) {
 2811                 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 2812                     &hlt_logical_cpus);
 2813                 sysctl_ctx_init(&logical_cpu_clist);
 2814                 SYSCTL_ADD_PROC(&logical_cpu_clist,
 2815                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 2816                     "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 2817                     sysctl_hlt_logical_cpus, "IU", "");
 2818                 SYSCTL_ADD_UINT(&logical_cpu_clist,
 2819                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 2820                     "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
 2821                     &logical_cpus_mask, 0, "");
 2822 
 2823                 if (hlt_logical_cpus)
 2824                         hlt_cpus_mask |= logical_cpus_mask;
 2825         }
 2826 }
 2827 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 2828 
 2829 int
 2830 mp_grab_cpu_hlt(void)
 2831 {
 2832         u_int mask = PCPU_GET(cpumask);
 2833         int retval;
 2834 
 2835         retval = mask & hlt_cpus_mask;
 2836         while (mask & hlt_cpus_mask)
 2837                 __asm __volatile("sti; hlt" : : : "memory");
 2838         return (retval);
 2839 }

Cache object: 8da679698ff09279860cf19125217f0f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.