The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/mp_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1996, by Steve Passe
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. The name of the developer may NOT be used to endorse or promote products
   11  *    derived from this software without specific prior written permission.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  *
   25  * $FreeBSD: releng/5.0/sys/i386/i386/mp_machdep.c 105216 2002-10-16 08:57:14Z phk $
   26  */
   27 
   28 #include "opt_cpu.h"
   29 #include "opt_kstack_pages.h"
   30 
   31 #ifdef SMP
   32 #include <machine/smptests.h>
   33 #else
   34 #error
   35 #endif
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/bus.h>
   40 #include <sys/cons.h>   /* cngetc() */
   41 #include <sys/dkstat.h>
   42 #ifdef GPROF 
   43 #include <sys/gmon.h>
   44 #endif
   45 #include <sys/kernel.h>
   46 #include <sys/ktr.h>
   47 #include <sys/lock.h>
   48 #include <sys/malloc.h>
   49 #include <sys/memrange.h>
   50 #include <sys/mutex.h>
   51 #include <sys/pcpu.h>
   52 #include <sys/proc.h>
   53 #include <sys/smp.h>
   54 #include <sys/sysctl.h>
   55 #include <sys/user.h>
   56 
   57 #include <vm/vm.h>
   58 #include <vm/vm_param.h>
   59 #include <vm/pmap.h>
   60 #include <vm/vm_kern.h>
   61 #include <vm/vm_extern.h>
   62 #include <vm/vm_map.h>
   63 
   64 #include <machine/apic.h>
   65 #include <machine/atomic.h>
   66 #include <machine/cpu.h>
   67 #include <machine/cpufunc.h>
   68 #include <machine/mpapic.h>
   69 #include <machine/psl.h>
   70 #include <machine/segments.h>
   71 #include <machine/smp.h>
   72 #include <machine/smptests.h>   /** TEST_DEFAULT_CONFIG, TEST_TEST1 */
   73 #include <machine/tss.h>
   74 #include <machine/specialreg.h>
   75 #include <machine/privatespace.h>
   76 
   77 #if defined(APIC_IO)
   78 #include <machine/md_var.h>             /* setidt() */
   79 #include <i386/isa/icu.h>               /* IPIs */
   80 #include <i386/isa/intr_machdep.h>      /* IPIs */
   81 #endif  /* APIC_IO */
   82 
   83 #if defined(TEST_DEFAULT_CONFIG)
   84 #define MPFPS_MPFB1     TEST_DEFAULT_CONFIG
   85 #else
   86 #define MPFPS_MPFB1     mpfps->mpfb1
   87 #endif  /* TEST_DEFAULT_CONFIG */
   88 
   89 #define WARMBOOT_TARGET         0
   90 #define WARMBOOT_OFF            (KERNBASE + 0x0467)
   91 #define WARMBOOT_SEG            (KERNBASE + 0x0469)
   92 
   93 #ifdef PC98
   94 #define BIOS_BASE               (0xe8000)
   95 #define BIOS_SIZE               (0x18000)
   96 #else
   97 #define BIOS_BASE               (0xf0000)
   98 #define BIOS_SIZE               (0x10000)
   99 #endif
  100 #define BIOS_COUNT              (BIOS_SIZE/4)
  101 
  102 #define CMOS_REG                (0x70)
  103 #define CMOS_DATA               (0x71)
  104 #define BIOS_RESET              (0x0f)
  105 #define BIOS_WARM               (0x0a)
  106 
  107 #define PROCENTRY_FLAG_EN       0x01
  108 #define PROCENTRY_FLAG_BP       0x02
  109 #define IOAPICENTRY_FLAG_EN     0x01
  110 
  111 
  112 /* MP Floating Pointer Structure */
  113 typedef struct MPFPS {
  114         char    signature[4];
  115         void   *pap;
  116         u_char  length;
  117         u_char  spec_rev;
  118         u_char  checksum;
  119         u_char  mpfb1;
  120         u_char  mpfb2;
  121         u_char  mpfb3;
  122         u_char  mpfb4;
  123         u_char  mpfb5;
  124 }      *mpfps_t;
  125 
  126 /* MP Configuration Table Header */
  127 typedef struct MPCTH {
  128         char    signature[4];
  129         u_short base_table_length;
  130         u_char  spec_rev;
  131         u_char  checksum;
  132         u_char  oem_id[8];
  133         u_char  product_id[12];
  134         void   *oem_table_pointer;
  135         u_short oem_table_size;
  136         u_short entry_count;
  137         void   *apic_address;
  138         u_short extended_table_length;
  139         u_char  extended_table_checksum;
  140         u_char  reserved;
  141 }      *mpcth_t;
  142 
  143 
  144 typedef struct PROCENTRY {
  145         u_char  type;
  146         u_char  apic_id;
  147         u_char  apic_version;
  148         u_char  cpu_flags;
  149         u_long  cpu_signature;
  150         u_long  feature_flags;
  151         u_long  reserved1;
  152         u_long  reserved2;
  153 }      *proc_entry_ptr;
  154 
  155 typedef struct BUSENTRY {
  156         u_char  type;
  157         u_char  bus_id;
  158         char    bus_type[6];
  159 }      *bus_entry_ptr;
  160 
  161 typedef struct IOAPICENTRY {
  162         u_char  type;
  163         u_char  apic_id;
  164         u_char  apic_version;
  165         u_char  apic_flags;
  166         void   *apic_address;
  167 }      *io_apic_entry_ptr;
  168 
  169 typedef struct INTENTRY {
  170         u_char  type;
  171         u_char  int_type;
  172         u_short int_flags;
  173         u_char  src_bus_id;
  174         u_char  src_bus_irq;
  175         u_char  dst_apic_id;
  176         u_char  dst_apic_int;
  177 }      *int_entry_ptr;
  178 
  179 /* descriptions of MP basetable entries */
  180 typedef struct BASETABLE_ENTRY {
  181         u_char  type;
  182         u_char  length;
  183         char    name[16];
  184 }       basetable_entry;
  185 
  186 /*
  187  * this code MUST be enabled here and in mpboot.s.
  188  * it follows the very early stages of AP boot by placing values in CMOS ram.
  189  * it NORMALLY will never be needed and thus the primitive method for enabling.
  190  *
  191 #define CHECK_POINTS
  192  */
  193 
  194 #if defined(CHECK_POINTS) && !defined(PC98)
  195 #define CHECK_READ(A)    (outb(CMOS_REG, (A)), inb(CMOS_DATA))
  196 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
  197 
  198 #define CHECK_INIT(D);                          \
  199         CHECK_WRITE(0x34, (D));                 \
  200         CHECK_WRITE(0x35, (D));                 \
  201         CHECK_WRITE(0x36, (D));                 \
  202         CHECK_WRITE(0x37, (D));                 \
  203         CHECK_WRITE(0x38, (D));                 \
  204         CHECK_WRITE(0x39, (D));
  205 
  206 #define CHECK_PRINT(S);                         \
  207         printf("%s: %d, %d, %d, %d, %d, %d\n",  \
  208            (S),                                 \
  209            CHECK_READ(0x34),                    \
  210            CHECK_READ(0x35),                    \
  211            CHECK_READ(0x36),                    \
  212            CHECK_READ(0x37),                    \
  213            CHECK_READ(0x38),                    \
  214            CHECK_READ(0x39));
  215 
  216 #else                           /* CHECK_POINTS */
  217 
  218 #define CHECK_INIT(D)
  219 #define CHECK_PRINT(S)
  220 
  221 #endif                          /* CHECK_POINTS */
  222 
  223 /*
  224  * Values to send to the POST hardware.
  225  */
  226 #define MP_BOOTADDRESS_POST     0x10
  227 #define MP_PROBE_POST           0x11
  228 #define MPTABLE_PASS1_POST      0x12
  229 
  230 #define MP_START_POST           0x13
  231 #define MP_ENABLE_POST          0x14
  232 #define MPTABLE_PASS2_POST      0x15
  233 
  234 #define START_ALL_APS_POST      0x16
  235 #define INSTALL_AP_TRAMP_POST   0x17
  236 #define START_AP_POST           0x18
  237 
  238 #define MP_ANNOUNCE_POST        0x19
  239 
  240 /* used to hold the AP's until we are ready to release them */
  241 static struct mtx ap_boot_mtx;
  242 
  243 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
  244 int     current_postcode;
  245 
  246 /** XXX FIXME: what system files declare these??? */
  247 extern struct region_descriptor r_gdt, r_idt;
  248 
  249 int     bsp_apic_ready = 0;     /* flags useability of BSP apic */
  250 int     mp_naps;                /* # of Applications processors */
  251 int     mp_nbusses;             /* # of busses */
  252 int     mp_napics;              /* # of IO APICs */
  253 int     boot_cpu_id;            /* designated BSP */
  254 vm_offset_t cpu_apic_address;
  255 vm_offset_t io_apic_address[NAPICID];   /* NAPICID is more than enough */
  256 extern  int nkpt;
  257 
  258 u_int32_t cpu_apic_versions[MAXCPU];
  259 u_int32_t *io_apic_versions;
  260 
  261 #ifdef APIC_INTR_REORDER
  262 struct {
  263         volatile int *location;
  264         int bit;
  265 } apic_isrbit_location[32];
  266 #endif
  267 
  268 struct apic_intmapinfo  int_to_apicintpin[APIC_INTMAPSIZE];
  269 
  270 /*
  271  * APIC ID logical/physical mapping structures.
  272  * We oversize these to simplify boot-time config.
  273  */
  274 int     cpu_num_to_apic_id[NAPICID];
  275 int     io_num_to_apic_id[NAPICID];
  276 int     apic_id_to_logical[NAPICID];
  277 
  278 
  279 /* AP uses this during bootstrap.  Do not staticize.  */
  280 char *bootSTK;
  281 static int bootAP;
  282 
  283 /* Hotwire a 0->4MB V==P mapping */
  284 extern pt_entry_t *KPTphys;
  285 
  286 /* SMP page table page */
  287 extern pt_entry_t *SMPpt;
  288 
  289 struct pcb stoppcbs[MAXCPU];
  290 
  291 #ifdef APIC_IO
  292 /* Variables needed for SMP tlb shootdown. */
  293 vm_offset_t smp_tlb_addr1;
  294 vm_offset_t smp_tlb_addr2;
  295 volatile int smp_tlb_wait;
  296 static struct mtx smp_tlb_mtx;
  297 #endif
  298 
  299 /*
  300  * Local data and functions.
  301  */
  302 
  303 /* Set to 1 once we're ready to let the APs out of the pen. */
  304 static volatile int aps_ready = 0;
  305 
  306 static int      mp_capable;
  307 static u_int    boot_address;
  308 static u_int    base_memory;
  309 
  310 static int      picmode;                /* 0: virtual wire mode, 1: PIC mode */
  311 static mpfps_t  mpfps;
  312 static int      search_for_sig(u_int32_t target, int count);
  313 static void     mp_enable(u_int boot_addr);
  314 
  315 static void     mptable_pass1(void);
  316 static int      mptable_pass2(void);
  317 static void     default_mp_table(int type);
  318 static void     fix_mp_table(void);
  319 static void     setup_apic_irq_mapping(void);
  320 static void     init_locks(void);
  321 static int      start_all_aps(u_int boot_addr);
  322 static void     install_ap_tramp(u_int boot_addr);
  323 static int      start_ap(int logicalCpu, u_int boot_addr);
  324 void            ap_init(void);
  325 static int      apic_int_is_bus_type(int intr, int bus_type);
  326 static void     release_aps(void *dummy);
  327 
  328 /*
  329  * initialize all the SMP locks
  330  */
  331 
  332 /* lock region used by kernel profiling */
  333 int     mcount_lock;
  334 
  335 #ifdef USE_COMLOCK
  336 /* locks com (tty) data/hardware accesses: a FASTINTR() */
  337 struct mtx              com_mtx;
  338 #endif /* USE_COMLOCK */
  339 
  340 static void
  341 init_locks(void)
  342 {
  343 
  344 #ifdef USE_COMLOCK
  345         mtx_init(&com_mtx, "com", NULL, MTX_SPIN);
  346 #endif /* USE_COMLOCK */
  347 #ifdef APIC_IO
  348         mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
  349 #endif
  350 }
  351 
  352 /*
  353  * Calculate usable address in base memory for AP trampoline code.
  354  */
  355 u_int
  356 mp_bootaddress(u_int basemem)
  357 {
  358         POSTCODE(MP_BOOTADDRESS_POST);
  359 
  360         base_memory = basemem * 1024;   /* convert to bytes */
  361 
  362         boot_address = base_memory & ~0xfff;    /* round down to 4k boundary */
  363         if ((base_memory - boot_address) < bootMP_size)
  364                 boot_address -= 4096;   /* not enough, lower by 4k */
  365 
  366         return boot_address;
  367 }
  368 
  369 
  370 /*
  371  * Look for an Intel MP spec table (ie, SMP capable hardware).
  372  */
  373 void
  374 i386_mp_probe(void)
  375 {
  376         int     x;
  377         u_long  segment;
  378         u_int32_t target;
  379 
  380         POSTCODE(MP_PROBE_POST);
  381 
  382         /* see if EBDA exists */
  383         if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
  384                 /* search first 1K of EBDA */
  385                 target = (u_int32_t) (segment << 4);
  386                 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
  387                         goto found;
  388         } else {
  389                 /* last 1K of base memory, effective 'top of base' passed in */
  390                 target = (u_int32_t) (base_memory - 0x400);
  391                 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
  392                         goto found;
  393         }
  394 
  395         /* search the BIOS */
  396         target = (u_int32_t) BIOS_BASE;
  397         if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
  398                 goto found;
  399 
  400         /* nothing found */
  401         mpfps = (mpfps_t)0;
  402         mp_capable = 0;
  403         return;
  404 
  405 found:
  406         /* calculate needed resources */
  407         mpfps = (mpfps_t)x;
  408         mptable_pass1();
  409 
  410         /* flag fact that we are running multiple processors */
  411         mp_capable = 1;
  412 }
  413 
  414 int
  415 cpu_mp_probe(void)
  416 {
  417         /*
  418          * Record BSP in CPU map
  419          * This is done here so that MBUF init code works correctly.
  420          */
  421         all_cpus = 1;
  422 
  423         return (mp_capable);
  424 }
  425 
  426 /*
  427  * Initialize the SMP hardware and the APIC and start up the AP's.
  428  */
  429 void
  430 cpu_mp_start(void)
  431 {
  432         POSTCODE(MP_START_POST);
  433 
  434         /* look for MP capable motherboard */
  435         if (mp_capable)
  436                 mp_enable(boot_address);
  437         else
  438                 panic("MP hardware not found!");
  439 
  440         cpu_setregs();
  441 }
  442 
  443 
  444 /*
  445  * Print various information about the SMP system hardware and setup.
  446  */
  447 void
  448 cpu_mp_announce(void)
  449 {
  450         int     x;
  451 
  452         POSTCODE(MP_ANNOUNCE_POST);
  453 
  454         printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
  455         printf(", version: 0x%08x", cpu_apic_versions[0]);
  456         printf(", at 0x%08x\n", cpu_apic_address);
  457         for (x = 1; x <= mp_naps; ++x) {
  458                 printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
  459                 printf(", version: 0x%08x", cpu_apic_versions[x]);
  460                 printf(", at 0x%08x\n", cpu_apic_address);
  461         }
  462 
  463 #if defined(APIC_IO)
  464         for (x = 0; x < mp_napics; ++x) {
  465                 printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
  466                 printf(", version: 0x%08x", io_apic_versions[x]);
  467                 printf(", at 0x%08x\n", io_apic_address[x]);
  468         }
  469 #else
  470         printf(" Warning: APIC I/O disabled\n");
  471 #endif  /* APIC_IO */
  472 }
  473 
  474 /*
  475  * AP cpu's call this to sync up protected mode.
  476  */
  477 void
  478 init_secondary(void)
  479 {
  480         int     gsel_tss;
  481         int     x, myid = bootAP;
  482         u_int   cr0;
  483 
  484         gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
  485         gdt_segs[GPROC0_SEL].ssd_base =
  486                 (int) &SMP_prvspace[myid].pcpu.pc_common_tss;
  487         SMP_prvspace[myid].pcpu.pc_prvspace =
  488                 &SMP_prvspace[myid].pcpu;
  489 
  490         for (x = 0; x < NGDT; x++) {
  491                 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
  492         }
  493 
  494         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
  495         r_gdt.rd_base = (int) &gdt[myid * NGDT];
  496         lgdt(&r_gdt);                   /* does magic intra-segment return */
  497 
  498         lidt(&r_idt);
  499 
  500         lldt(_default_ldt);
  501         PCPU_SET(currentldt, _default_ldt);
  502 
  503         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
  504         gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
  505         PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
  506         PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
  507         PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
  508         PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
  509         PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
  510         ltr(gsel_tss);
  511 
  512         /*
  513          * Set to a known state:
  514          * Set by mpboot.s: CR0_PG, CR0_PE
  515          * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
  516          */
  517         cr0 = rcr0();
  518         cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
  519         load_cr0(cr0);
  520 
  521         pmap_set_opt();
  522 }
  523 
  524 
  525 #if defined(APIC_IO)
  526 /*
  527  * Final configuration of the BSP's local APIC:
  528  *  - disable 'pic mode'.
  529  *  - disable 'virtual wire mode'.
  530  *  - enable NMI.
  531  */
  532 void
  533 bsp_apic_configure(void)
  534 {
  535         u_char          byte;
  536         u_int32_t       temp;
  537 
  538         /* leave 'pic mode' if necessary */
  539         if (picmode) {
  540                 outb(0x22, 0x70);       /* select IMCR */
  541                 byte = inb(0x23);       /* current contents */
  542                 byte |= 0x01;           /* mask external INTR */
  543                 outb(0x23, byte);       /* disconnect 8259s/NMI */
  544         }
  545 
  546         /* mask lint0 (the 8259 'virtual wire' connection) */
  547         temp = lapic.lvt_lint0;
  548         temp |= APIC_LVT_M;             /* set the mask */
  549         lapic.lvt_lint0 = temp;
  550 
  551         /* setup lint1 to handle NMI */
  552         temp = lapic.lvt_lint1;
  553         temp &= ~APIC_LVT_M;            /* clear the mask */
  554         lapic.lvt_lint1 = temp;
  555 
  556         if (bootverbose)
  557                 apic_dump("bsp_apic_configure()");
  558 }
  559 #endif  /* APIC_IO */
  560 
  561 
  562 /*******************************************************************
  563  * local functions and data
  564  */
  565 
  566 /*
  567  * start the SMP system
  568  */
  569 static void
  570 mp_enable(u_int boot_addr)
  571 {
  572         int     x;
  573 #if defined(APIC_IO)
  574         int     apic;
  575         u_int   ux;
  576 #endif  /* APIC_IO */
  577 
  578         POSTCODE(MP_ENABLE_POST);
  579 
  580         /* turn on 4MB of V == P addressing so we can get to MP table */
  581         *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
  582         invltlb();
  583 
  584         /* examine the MP table for needed info, uses physical addresses */
  585         x = mptable_pass2();
  586 
  587         *(int *)PTD = 0;
  588         invltlb();
  589 
  590         /* can't process default configs till the CPU APIC is pmapped */
  591         if (x)
  592                 default_mp_table(x);
  593 
  594         /* post scan cleanup */
  595         fix_mp_table();
  596         setup_apic_irq_mapping();
  597 
  598 #if defined(APIC_IO)
  599 
  600         /* fill the LOGICAL io_apic_versions table */
  601         for (apic = 0; apic < mp_napics; ++apic) {
  602                 ux = io_apic_read(apic, IOAPIC_VER);
  603                 io_apic_versions[apic] = ux;
  604                 io_apic_set_id(apic, IO_TO_ID(apic));
  605         }
  606 
  607         /* program each IO APIC in the system */
  608         for (apic = 0; apic < mp_napics; ++apic)
  609                 if (io_apic_setup(apic) < 0)
  610                         panic("IO APIC setup failure");
  611 
  612         /* install a 'Spurious INTerrupt' vector */
  613         setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
  614                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  615 
  616         /* install an inter-CPU IPI for TLB invalidation */
  617         setidt(XINVLTLB_OFFSET, Xinvltlb,
  618                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  619         setidt(XINVLPG_OFFSET, Xinvlpg,
  620                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  621         setidt(XINVLRNG_OFFSET, Xinvlrng,
  622                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  623 
  624         /* install an inter-CPU IPI for forwarding hardclock() */
  625         setidt(XHARDCLOCK_OFFSET, Xhardclock,
  626                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  627         
  628         /* install an inter-CPU IPI for forwarding statclock() */
  629         setidt(XSTATCLOCK_OFFSET, Xstatclock,
  630                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  631         
  632         /* install an inter-CPU IPI for all-CPU rendezvous */
  633         setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
  634                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  635 
  636         /* install an inter-CPU IPI for forcing an additional software trap */
  637         setidt(XCPUAST_OFFSET, Xcpuast,
  638                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  639 
  640         /* install an inter-CPU IPI for CPU stop/restart */
  641         setidt(XCPUSTOP_OFFSET, Xcpustop,
  642                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  643 
  644 #if defined(TEST_TEST1)
  645         /* install a "fake hardware INTerrupt" vector */
  646         setidt(XTEST1_OFFSET, Xtest1,
  647                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  648 #endif  /** TEST_TEST1 */
  649 
  650 #endif  /* APIC_IO */
  651 
  652         /* initialize all SMP locks */
  653         init_locks();
  654 
  655         /* start each Application Processor */
  656         start_all_aps(boot_addr);
  657 }
  658 
  659 
  660 /*
  661  * look for the MP spec signature
  662  */
  663 
  664 /* string defined by the Intel MP Spec as identifying the MP table */
  665 #define MP_SIG          0x5f504d5f      /* _MP_ */
  666 #define NEXT(X)         ((X) += 4)
  667 static int
  668 search_for_sig(u_int32_t target, int count)
  669 {
  670         int     x;
  671         u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
  672 
  673         for (x = 0; x < count; NEXT(x))
  674                 if (addr[x] == MP_SIG)
  675                         /* make array index a byte index */
  676                         return (target + (x * sizeof(u_int32_t)));
  677 
  678         return -1;
  679 }
  680 
  681 
  682 static basetable_entry basetable_entry_types[] =
  683 {
  684         {0, 20, "Processor"},
  685         {1, 8, "Bus"},
  686         {2, 8, "I/O APIC"},
  687         {3, 8, "I/O INT"},
  688         {4, 8, "Local INT"}
  689 };
  690 
  691 typedef struct BUSDATA {
  692         u_char  bus_id;
  693         enum busTypes bus_type;
  694 }       bus_datum;
  695 
  696 typedef struct INTDATA {
  697         u_char  int_type;
  698         u_short int_flags;
  699         u_char  src_bus_id;
  700         u_char  src_bus_irq;
  701         u_char  dst_apic_id;
  702         u_char  dst_apic_int;
  703         u_char  int_vector;
  704 }       io_int, local_int;
  705 
  706 typedef struct BUSTYPENAME {
  707         u_char  type;
  708         char    name[7];
  709 }       bus_type_name;
  710 
  711 static bus_type_name bus_type_table[] =
  712 {
  713         {CBUS, "CBUS"},
  714         {CBUSII, "CBUSII"},
  715         {EISA, "EISA"},
  716         {MCA, "MCA"},
  717         {UNKNOWN_BUSTYPE, "---"},
  718         {ISA, "ISA"},
  719         {MCA, "MCA"},
  720         {UNKNOWN_BUSTYPE, "---"},
  721         {UNKNOWN_BUSTYPE, "---"},
  722         {UNKNOWN_BUSTYPE, "---"},
  723         {UNKNOWN_BUSTYPE, "---"},
  724         {UNKNOWN_BUSTYPE, "---"},
  725         {PCI, "PCI"},
  726         {UNKNOWN_BUSTYPE, "---"},
  727         {UNKNOWN_BUSTYPE, "---"},
  728         {UNKNOWN_BUSTYPE, "---"},
  729         {UNKNOWN_BUSTYPE, "---"},
  730         {XPRESS, "XPRESS"},
  731         {UNKNOWN_BUSTYPE, "---"}
  732 };
  733 /* from MP spec v1.4, table 5-1 */
  734 static int default_data[7][5] =
  735 {
  736 /*   nbus, id0, type0, id1, type1 */
  737         {1, 0, ISA, 255, 255},
  738         {1, 0, EISA, 255, 255},
  739         {1, 0, EISA, 255, 255},
  740         {1, 0, MCA, 255, 255},
  741         {2, 0, ISA, 1, PCI},
  742         {2, 0, EISA, 1, PCI},
  743         {2, 0, MCA, 1, PCI}
  744 };
  745 
  746 
  747 /* the bus data */
  748 static bus_datum *bus_data;
  749 
  750 /* the IO INT data, one entry per possible APIC INTerrupt */
  751 static io_int  *io_apic_ints;
  752 
  753 static int nintrs;
  754 
  755 static int processor_entry(proc_entry_ptr entry, int cpu);
  756 static int bus_entry(bus_entry_ptr entry, int bus);
  757 static int io_apic_entry(io_apic_entry_ptr entry, int apic);
  758 static int int_entry(int_entry_ptr entry, int intr);
  759 static int lookup_bus_type(char *name);
  760 
  761 
  762 /*
  763  * 1st pass on motherboard's Intel MP specification table.
  764  *
  765  * initializes:
  766  *      mp_ncpus = 1
  767  *
  768  * determines:
  769  *      cpu_apic_address (common to all CPUs)
  770  *      io_apic_address[N]
  771  *      mp_naps
  772  *      mp_nbusses
  773  *      mp_napics
  774  *      nintrs
  775  */
  776 static void
  777 mptable_pass1(void)
  778 {
  779         int     x;
  780         mpcth_t cth;
  781         int     totalSize;
  782         void*   position;
  783         int     count;
  784         int     type;
  785 
  786         POSTCODE(MPTABLE_PASS1_POST);
  787 
  788         /* clear various tables */
  789         for (x = 0; x < NAPICID; ++x) {
  790                 io_apic_address[x] = ~0;        /* IO APIC address table */
  791         }
  792 
  793         /* init everything to empty */
  794         mp_naps = 0;
  795         mp_nbusses = 0;
  796         mp_napics = 0;
  797         nintrs = 0;
  798 
  799         /* check for use of 'default' configuration */
  800         if (MPFPS_MPFB1 != 0) {
  801                 /* use default addresses */
  802                 cpu_apic_address = DEFAULT_APIC_BASE;
  803                 io_apic_address[0] = DEFAULT_IO_APIC_BASE;
  804 
  805                 /* fill in with defaults */
  806                 mp_naps = 2;            /* includes BSP */
  807                 mp_maxid = 1;
  808                 mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
  809 #if defined(APIC_IO)
  810                 mp_napics = 1;
  811                 nintrs = 16;
  812 #endif  /* APIC_IO */
  813         }
  814         else {
  815                 if ((cth = mpfps->pap) == 0)
  816                         panic("MP Configuration Table Header MISSING!");
  817 
  818                 cpu_apic_address = (vm_offset_t) cth->apic_address;
  819 
  820                 /* walk the table, recording info of interest */
  821                 totalSize = cth->base_table_length - sizeof(struct MPCTH);
  822                 position = (u_char *) cth + sizeof(struct MPCTH);
  823                 count = cth->entry_count;
  824 
  825                 while (count--) {
  826                         switch (type = *(u_char *) position) {
  827                         case 0: /* processor_entry */
  828                                 if (((proc_entry_ptr)position)->cpu_flags
  829                                     & PROCENTRY_FLAG_EN) {
  830                                         ++mp_naps;
  831                                         mp_maxid++;
  832                                 }
  833                                 break;
  834                         case 1: /* bus_entry */
  835                                 ++mp_nbusses;
  836                                 break;
  837                         case 2: /* io_apic_entry */
  838                                 if (((io_apic_entry_ptr)position)->apic_flags
  839                                         & IOAPICENTRY_FLAG_EN)
  840                                         io_apic_address[mp_napics++] =
  841                                             (vm_offset_t)((io_apic_entry_ptr)
  842                                                 position)->apic_address;
  843                                 break;
  844                         case 3: /* int_entry */
  845                                 ++nintrs;
  846                                 break;
  847                         case 4: /* int_entry */
  848                                 break;
  849                         default:
  850                                 panic("mpfps Base Table HOSED!");
  851                                 /* NOTREACHED */
  852                         }
  853 
  854                         totalSize -= basetable_entry_types[type].length;
  855                         (u_char*)position += basetable_entry_types[type].length;
  856                 }
  857         }
  858 
  859         /* qualify the numbers */
  860         if (mp_naps > MAXCPU) {
  861                 printf("Warning: only using %d of %d available CPUs!\n",
  862                         MAXCPU, mp_naps);
  863                 mp_naps = MAXCPU;
  864         }
  865 
  866         /*
  867          * Count the BSP.
  868          * This is also used as a counter while starting the APs.
  869          */
  870         mp_ncpus = 1;
  871 
  872         --mp_naps;      /* subtract the BSP */
  873 }
  874 
  875 
  876 /*
  877  * 2nd pass on motherboard's Intel MP specification table.
  878  *
  879  * sets:
  880  *      boot_cpu_id
  881  *      ID_TO_IO(N), phy APIC ID to log CPU/IO table
  882  *      CPU_TO_ID(N), logical CPU to APIC ID table
  883  *      IO_TO_ID(N), logical IO to APIC ID table
  884  *      bus_data[N]
  885  *      io_apic_ints[N]
  886  */
  887 static int
  888 mptable_pass2(void)
  889 {
  890         int     x;
  891         mpcth_t cth;
  892         int     totalSize;
  893         void*   position;
  894         int     count;
  895         int     type;
  896         int     apic, bus, cpu, intr;
  897         int     i, j;
  898         int     pgeflag;
  899 
  900         POSTCODE(MPTABLE_PASS2_POST);
  901 
  902         pgeflag = 0;            /* XXX - Not used under SMP yet.  */
  903 
  904         MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
  905             M_DEVBUF, M_WAITOK);
  906         MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
  907             M_DEVBUF, M_WAITOK);
  908         MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
  909             M_DEVBUF, M_WAITOK);
  910         MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
  911             M_DEVBUF, M_WAITOK);
  912 
  913         bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
  914 
  915         for (i = 0; i < mp_napics; i++) {
  916                 for (j = 0; j < mp_napics; j++) {
  917                         /* same page frame as a previous IO apic? */
  918                         if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
  919                             (io_apic_address[i] & PG_FRAME)) {
  920                                 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
  921                                         + (NPTEPG-2-j) * PAGE_SIZE
  922                                         + (io_apic_address[i] & PAGE_MASK));
  923                                 break;
  924                         }
  925                         /* use this slot if available */
  926                         if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
  927                                 SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
  928                                     pgeflag | (io_apic_address[i] & PG_FRAME));
  929                                 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
  930                                         + (NPTEPG-2-j) * PAGE_SIZE
  931                                         + (io_apic_address[i] & PAGE_MASK));
  932                                 break;
  933                         }
  934                 }
  935         }
  936 
  937         /* clear various tables */
  938         for (x = 0; x < NAPICID; ++x) {
  939                 ID_TO_IO(x) = -1;       /* phy APIC ID to log CPU/IO table */
  940                 CPU_TO_ID(x) = -1;      /* logical CPU to APIC ID table */
  941                 IO_TO_ID(x) = -1;       /* logical IO to APIC ID table */
  942         }
  943 
  944         /* clear bus data table */
  945         for (x = 0; x < mp_nbusses; ++x)
  946                 bus_data[x].bus_id = 0xff;
  947 
  948         /* clear IO APIC INT table */
  949         for (x = 0; x < (nintrs + 1); ++x) {
  950                 io_apic_ints[x].int_type = 0xff;
  951                 io_apic_ints[x].int_vector = 0xff;
  952         }
  953 
  954         /* setup the cpu/apic mapping arrays */
  955         boot_cpu_id = -1;
  956 
  957         /* record whether PIC or virtual-wire mode */
  958         picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
  959 
  960         /* check for use of 'default' configuration */
  961         if (MPFPS_MPFB1 != 0)
  962                 return MPFPS_MPFB1;     /* return default configuration type */
  963 
  964         if ((cth = mpfps->pap) == 0)
  965                 panic("MP Configuration Table Header MISSING!");
  966 
  967         /* walk the table, recording info of interest */
  968         totalSize = cth->base_table_length - sizeof(struct MPCTH);
  969         position = (u_char *) cth + sizeof(struct MPCTH);
  970         count = cth->entry_count;
  971         apic = bus = intr = 0;
  972         cpu = 1;                                /* pre-count the BSP */
  973 
  974         while (count--) {
  975                 switch (type = *(u_char *) position) {
  976                 case 0:
  977                         if (processor_entry(position, cpu))
  978                                 ++cpu;
  979                         break;
  980                 case 1:
  981                         if (bus_entry(position, bus))
  982                                 ++bus;
  983                         break;
  984                 case 2:
  985                         if (io_apic_entry(position, apic))
  986                                 ++apic;
  987                         break;
  988                 case 3:
  989                         if (int_entry(position, intr))
  990                                 ++intr;
  991                         break;
  992                 case 4:
  993                         /* int_entry(position); */
  994                         break;
  995                 default:
  996                         panic("mpfps Base Table HOSED!");
  997                         /* NOTREACHED */
  998                 }
  999 
 1000                 totalSize -= basetable_entry_types[type].length;
 1001                 (u_char *) position += basetable_entry_types[type].length;
 1002         }
 1003 
 1004         if (boot_cpu_id == -1)
 1005                 panic("NO BSP found!");
 1006 
 1007         /* report fact that its NOT a default configuration */
 1008         return 0;
 1009 }
 1010 
 1011 
 1012 void
 1013 assign_apic_irq(int apic, int intpin, int irq)
 1014 {
 1015         int x;
 1016         
 1017         if (int_to_apicintpin[irq].ioapic != -1)
 1018                 panic("assign_apic_irq: inconsistent table");
 1019         
 1020         int_to_apicintpin[irq].ioapic = apic;
 1021         int_to_apicintpin[irq].int_pin = intpin;
 1022         int_to_apicintpin[irq].apic_address = ioapic[apic];
 1023         int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
 1024         
 1025         for (x = 0; x < nintrs; x++) {
 1026                 if ((io_apic_ints[x].int_type == 0 || 
 1027                      io_apic_ints[x].int_type == 3) &&
 1028                     io_apic_ints[x].int_vector == 0xff &&
 1029                     io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
 1030                     io_apic_ints[x].dst_apic_int == intpin)
 1031                         io_apic_ints[x].int_vector = irq;
 1032         }
 1033 }
 1034 
 1035 void
 1036 revoke_apic_irq(int irq)
 1037 {
 1038         int x;
 1039         int oldapic;
 1040         int oldintpin;
 1041         
 1042         if (int_to_apicintpin[irq].ioapic == -1)
 1043                 panic("revoke_apic_irq: inconsistent table");
 1044         
 1045         oldapic = int_to_apicintpin[irq].ioapic;
 1046         oldintpin = int_to_apicintpin[irq].int_pin;
 1047 
 1048         int_to_apicintpin[irq].ioapic = -1;
 1049         int_to_apicintpin[irq].int_pin = 0;
 1050         int_to_apicintpin[irq].apic_address = NULL;
 1051         int_to_apicintpin[irq].redirindex = 0;
 1052         
 1053         for (x = 0; x < nintrs; x++) {
 1054                 if ((io_apic_ints[x].int_type == 0 || 
 1055                      io_apic_ints[x].int_type == 3) &&
 1056                     io_apic_ints[x].int_vector != 0xff &&
 1057                     io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
 1058                     io_apic_ints[x].dst_apic_int == oldintpin)
 1059                         io_apic_ints[x].int_vector = 0xff;
 1060         }
 1061 }
 1062 
 1063 
 1064 static void
 1065 allocate_apic_irq(int intr)
 1066 {
 1067         int apic;
 1068         int intpin;
 1069         int irq;
 1070         
 1071         if (io_apic_ints[intr].int_vector != 0xff)
 1072                 return;         /* Interrupt handler already assigned */
 1073         
 1074         if (io_apic_ints[intr].int_type != 0 &&
 1075             (io_apic_ints[intr].int_type != 3 ||
 1076              (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
 1077               io_apic_ints[intr].dst_apic_int == 0)))
 1078                 return;         /* Not INT or ExtInt on != (0, 0) */
 1079         
 1080         irq = 0;
 1081         while (irq < APIC_INTMAPSIZE &&
 1082                int_to_apicintpin[irq].ioapic != -1)
 1083                 irq++;
 1084         
 1085         if (irq >= APIC_INTMAPSIZE)
 1086                 return;         /* No free interrupt handlers */
 1087         
 1088         apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
 1089         intpin = io_apic_ints[intr].dst_apic_int;
 1090         
 1091         assign_apic_irq(apic, intpin, irq);
 1092         io_apic_setup_intpin(apic, intpin);
 1093 }
 1094 
 1095 
 1096 static void
 1097 swap_apic_id(int apic, int oldid, int newid)
 1098 {
 1099         int x;
 1100         int oapic;
 1101         
 1102 
 1103         if (oldid == newid)
 1104                 return;                 /* Nothing to do */
 1105         
 1106         printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
 1107                apic, oldid, newid);
 1108         
 1109         /* Swap physical APIC IDs in interrupt entries */
 1110         for (x = 0; x < nintrs; x++) {
 1111                 if (io_apic_ints[x].dst_apic_id == oldid)
 1112                         io_apic_ints[x].dst_apic_id = newid;
 1113                 else if (io_apic_ints[x].dst_apic_id == newid)
 1114                         io_apic_ints[x].dst_apic_id = oldid;
 1115         }
 1116         
 1117         /* Swap physical APIC IDs in IO_TO_ID mappings */
 1118         for (oapic = 0; oapic < mp_napics; oapic++)
 1119                 if (IO_TO_ID(oapic) == newid)
 1120                         break;
 1121         
 1122         if (oapic < mp_napics) {
 1123                 printf("Changing APIC ID for IO APIC #%d from "
 1124                        "%d to %d in MP table\n",
 1125                        oapic, newid, oldid);
 1126                 IO_TO_ID(oapic) = oldid;
 1127         }
 1128         IO_TO_ID(apic) = newid;
 1129 }
 1130 
 1131 
 1132 static void
 1133 fix_id_to_io_mapping(void)
 1134 {
 1135         int x;
 1136 
 1137         for (x = 0; x < NAPICID; x++)
 1138                 ID_TO_IO(x) = -1;
 1139         
 1140         for (x = 0; x <= mp_naps; x++)
 1141                 if (CPU_TO_ID(x) < NAPICID)
 1142                         ID_TO_IO(CPU_TO_ID(x)) = x;
 1143         
 1144         for (x = 0; x < mp_napics; x++)
 1145                 if (IO_TO_ID(x) < NAPICID)
 1146                         ID_TO_IO(IO_TO_ID(x)) = x;
 1147 }
 1148 
 1149 
 1150 static int
 1151 first_free_apic_id(void)
 1152 {
 1153         int freeid, x;
 1154         
 1155         for (freeid = 0; freeid < NAPICID; freeid++) {
 1156                 for (x = 0; x <= mp_naps; x++)
 1157                         if (CPU_TO_ID(x) == freeid)
 1158                                 break;
 1159                 if (x <= mp_naps)
 1160                         continue;
 1161                 for (x = 0; x < mp_napics; x++)
 1162                         if (IO_TO_ID(x) == freeid)
 1163                                 break;
 1164                 if (x < mp_napics)
 1165                         continue;
 1166                 return freeid;
 1167         }
 1168         return freeid;
 1169 }
 1170 
 1171 
 1172 static int
 1173 io_apic_id_acceptable(int apic, int id)
 1174 {
 1175         int cpu;                /* Logical CPU number */
 1176         int oapic;              /* Logical IO APIC number for other IO APIC */
 1177 
 1178         if (id >= NAPICID)
 1179                 return 0;       /* Out of range */
 1180         
 1181         for (cpu = 0; cpu <= mp_naps; cpu++)
 1182                 if (CPU_TO_ID(cpu) == id)
 1183                         return 0;       /* Conflict with CPU */
 1184         
 1185         for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
 1186                 if (IO_TO_ID(oapic) == id)
 1187                         return 0;       /* Conflict with other APIC */
 1188         
 1189         return 1;               /* ID is acceptable for IO APIC */
 1190 }
 1191 
 1192 
 1193 /*
 1194  * parse an Intel MP specification table
 1195  */
 1196 static void
 1197 fix_mp_table(void)
 1198 {
 1199         int     x;
 1200         int     id;
 1201         int     bus_0 = 0;      /* Stop GCC warning */
 1202         int     bus_pci = 0;    /* Stop GCC warning */
 1203         int     num_pci_bus;
 1204         int     apic;           /* IO APIC unit number */
 1205         int     freeid;         /* Free physical APIC ID */
 1206         int     physid;         /* Current physical IO APIC ID */
 1207 
 1208         /*
 1209          * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
 1210          * did it wrong.  The MP spec says that when more than 1 PCI bus
 1211          * exists the BIOS must begin with bus entries for the PCI bus and use
 1212          * actual PCI bus numbering.  This implies that when only 1 PCI bus
 1213          * exists the BIOS can choose to ignore this ordering, and indeed many
 1214          * MP motherboards do ignore it.  This causes a problem when the PCI
 1215          * sub-system makes requests of the MP sub-system based on PCI bus
 1216          * numbers.     So here we look for the situation and renumber the
 1217          * busses and associated INTs in an effort to "make it right".
 1218          */
 1219 
 1220         /* find bus 0, PCI bus, count the number of PCI busses */
 1221         for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
 1222                 if (bus_data[x].bus_id == 0) {
 1223                         bus_0 = x;
 1224                 }
 1225                 if (bus_data[x].bus_type == PCI) {
 1226                         ++num_pci_bus;
 1227                         bus_pci = x;
 1228                 }
 1229         }
 1230         /*
 1231          * bus_0 == slot of bus with ID of 0
 1232          * bus_pci == slot of last PCI bus encountered
 1233          */
 1234 
 1235         /* check the 1 PCI bus case for sanity */
 1236         /* if it is number 0 all is well */
 1237         if (num_pci_bus == 1 &&
 1238             bus_data[bus_pci].bus_id != 0) {
 1239                 
 1240                 /* mis-numbered, swap with whichever bus uses slot 0 */
 1241 
 1242                 /* swap the bus entry types */
 1243                 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
 1244                 bus_data[bus_0].bus_type = PCI;
 1245 
 1246                 /* swap each relavant INTerrupt entry */
 1247                 id = bus_data[bus_pci].bus_id;
 1248                 for (x = 0; x < nintrs; ++x) {
 1249                         if (io_apic_ints[x].src_bus_id == id) {
 1250                                 io_apic_ints[x].src_bus_id = 0;
 1251                         }
 1252                         else if (io_apic_ints[x].src_bus_id == 0) {
 1253                                 io_apic_ints[x].src_bus_id = id;
 1254                         }
 1255                 }
 1256         }
 1257 
 1258         /* Assign IO APIC IDs.
 1259          * 
 1260          * First try the existing ID. If a conflict is detected, try
 1261          * the ID in the MP table.  If a conflict is still detected, find
 1262          * a free id.
 1263          *
 1264          * We cannot use the ID_TO_IO table before all conflicts has been
 1265          * resolved and the table has been corrected.
 1266          */
 1267         for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
 1268                 
 1269                 /* First try to use the value set by the BIOS */
 1270                 physid = io_apic_get_id(apic);
 1271                 if (io_apic_id_acceptable(apic, physid)) {
 1272                         if (IO_TO_ID(apic) != physid)
 1273                                 swap_apic_id(apic, IO_TO_ID(apic), physid);
 1274                         continue;
 1275                 }
 1276 
 1277                 /* Then check if the value in the MP table is acceptable */
 1278                 if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
 1279                         continue;
 1280 
 1281                 /* Last resort, find a free APIC ID and use it */
 1282                 freeid = first_free_apic_id();
 1283                 if (freeid >= NAPICID)
 1284                         panic("No free physical APIC IDs found");
 1285                 
 1286                 if (io_apic_id_acceptable(apic, freeid)) {
 1287                         swap_apic_id(apic, IO_TO_ID(apic), freeid);
 1288                         continue;
 1289                 }
 1290                 panic("Free physical APIC ID not usable");
 1291         }
 1292         fix_id_to_io_mapping();
 1293 
 1294         /* detect and fix broken Compaq MP table */
 1295         if (apic_int_type(0, 0) == -1) {
 1296                 printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
 1297                 io_apic_ints[nintrs].int_type = 3;      /* ExtInt */
 1298                 io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */
 1299                 /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
 1300                 io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
 1301                 io_apic_ints[nintrs].dst_apic_int = 0;  /* Pin 0 */
 1302                 nintrs++;
 1303         }
 1304 }
 1305 
 1306 
 1307 /* Assign low level interrupt handlers */
 1308 static void
 1309 setup_apic_irq_mapping(void)
 1310 {
 1311         int     x;
 1312         int     int_vector;
 1313 
 1314         /* Clear array */
 1315         for (x = 0; x < APIC_INTMAPSIZE; x++) {
 1316                 int_to_apicintpin[x].ioapic = -1;
 1317                 int_to_apicintpin[x].int_pin = 0;
 1318                 int_to_apicintpin[x].apic_address = NULL;
 1319                 int_to_apicintpin[x].redirindex = 0;
 1320         }
 1321 
 1322         /* First assign ISA/EISA interrupts */
 1323         for (x = 0; x < nintrs; x++) {
 1324                 int_vector = io_apic_ints[x].src_bus_irq;
 1325                 if (int_vector < APIC_INTMAPSIZE &&
 1326                     io_apic_ints[x].int_vector == 0xff && 
 1327                     int_to_apicintpin[int_vector].ioapic == -1 &&
 1328                     (apic_int_is_bus_type(x, ISA) ||
 1329                      apic_int_is_bus_type(x, EISA)) &&
 1330                     io_apic_ints[x].int_type == 0) {
 1331                         assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), 
 1332                                         io_apic_ints[x].dst_apic_int,
 1333                                         int_vector);
 1334                 }
 1335         }
 1336 
 1337         /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
 1338         for (x = 0; x < nintrs; x++) {
 1339                 if (io_apic_ints[x].dst_apic_int == 0 &&
 1340                     io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
 1341                     io_apic_ints[x].int_vector == 0xff && 
 1342                     int_to_apicintpin[0].ioapic == -1 &&
 1343                     io_apic_ints[x].int_type == 3) {
 1344                         assign_apic_irq(0, 0, 0);
 1345                         break;
 1346                 }
 1347         }
 1348         /* PCI interrupt assignment is deferred */
 1349 }
 1350 
 1351 
 1352 static int
 1353 processor_entry(proc_entry_ptr entry, int cpu)
 1354 {
 1355         /* check for usability */
 1356         if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
 1357                 return 0;
 1358 
 1359         if(entry->apic_id >= NAPICID)
 1360                 panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
 1361         /* check for BSP flag */
 1362         if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
 1363                 boot_cpu_id = entry->apic_id;
 1364                 CPU_TO_ID(0) = entry->apic_id;
 1365                 ID_TO_CPU(entry->apic_id) = 0;
 1366                 return 0;       /* its already been counted */
 1367         }
 1368 
 1369         /* add another AP to list, if less than max number of CPUs */
 1370         else if (cpu < MAXCPU) {
 1371                 CPU_TO_ID(cpu) = entry->apic_id;
 1372                 ID_TO_CPU(entry->apic_id) = cpu;
 1373                 return 1;
 1374         }
 1375 
 1376         return 0;
 1377 }
 1378 
 1379 
 1380 static int
 1381 bus_entry(bus_entry_ptr entry, int bus)
 1382 {
 1383         int     x;
 1384         char    c, name[8];
 1385 
 1386         /* encode the name into an index */
 1387         for (x = 0; x < 6; ++x) {
 1388                 if ((c = entry->bus_type[x]) == ' ')
 1389                         break;
 1390                 name[x] = c;
 1391         }
 1392         name[x] = '\0';
 1393 
 1394         if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
 1395                 panic("unknown bus type: '%s'", name);
 1396 
 1397         bus_data[bus].bus_id = entry->bus_id;
 1398         bus_data[bus].bus_type = x;
 1399 
 1400         return 1;
 1401 }
 1402 
 1403 
 1404 static int
 1405 io_apic_entry(io_apic_entry_ptr entry, int apic)
 1406 {
 1407         if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
 1408                 return 0;
 1409 
 1410         IO_TO_ID(apic) = entry->apic_id;
 1411         if (entry->apic_id < NAPICID)
 1412                 ID_TO_IO(entry->apic_id) = apic;
 1413 
 1414         return 1;
 1415 }
 1416 
 1417 
 1418 static int
 1419 lookup_bus_type(char *name)
 1420 {
 1421         int     x;
 1422 
 1423         for (x = 0; x < MAX_BUSTYPE; ++x)
 1424                 if (strcmp(bus_type_table[x].name, name) == 0)
 1425                         return bus_type_table[x].type;
 1426 
 1427         return UNKNOWN_BUSTYPE;
 1428 }
 1429 
 1430 
 1431 static int
 1432 int_entry(int_entry_ptr entry, int intr)
 1433 {
 1434         int apic;
 1435 
 1436         io_apic_ints[intr].int_type = entry->int_type;
 1437         io_apic_ints[intr].int_flags = entry->int_flags;
 1438         io_apic_ints[intr].src_bus_id = entry->src_bus_id;
 1439         io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
 1440         if (entry->dst_apic_id == 255) {
 1441                 /* This signal goes to all IO APICS.  Select an IO APIC
 1442                    with sufficient number of interrupt pins */
 1443                 for (apic = 0; apic < mp_napics; apic++)
 1444                         if (((io_apic_read(apic, IOAPIC_VER) & 
 1445                               IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= 
 1446                             entry->dst_apic_int)
 1447                                 break;
 1448                 if (apic < mp_napics)
 1449                         io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
 1450                 else
 1451                         io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 1452         } else
 1453                 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 1454         io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
 1455 
 1456         return 1;
 1457 }
 1458 
 1459 
 1460 static int
 1461 apic_int_is_bus_type(int intr, int bus_type)
 1462 {
 1463         int     bus;
 1464 
 1465         for (bus = 0; bus < mp_nbusses; ++bus)
 1466                 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
 1467                     && ((int) bus_data[bus].bus_type == bus_type))
 1468                         return 1;
 1469 
 1470         return 0;
 1471 }
 1472 
 1473 
 1474 /*
 1475  * Given a traditional ISA INT mask, return an APIC mask.
 1476  */
 1477 u_int
 1478 isa_apic_mask(u_int isa_mask)
 1479 {
 1480         int isa_irq;
 1481         int apic_pin;
 1482 
 1483 #if defined(SKIP_IRQ15_REDIRECT)
 1484         if (isa_mask == (1 << 15)) {
 1485                 printf("skipping ISA IRQ15 redirect\n");
 1486                 return isa_mask;
 1487         }
 1488 #endif  /* SKIP_IRQ15_REDIRECT */
 1489 
 1490         isa_irq = ffs(isa_mask);                /* find its bit position */
 1491         if (isa_irq == 0)                       /* doesn't exist */
 1492                 return 0;
 1493         --isa_irq;                              /* make it zero based */
 1494 
 1495         apic_pin = isa_apic_irq(isa_irq);       /* look for APIC connection */
 1496         if (apic_pin == -1)
 1497                 return 0;
 1498 
 1499         return (1 << apic_pin);                 /* convert pin# to a mask */
 1500 }
 1501 
 1502 
 1503 /*
 1504  * Determine which APIC pin an ISA/EISA INT is attached to.
 1505  */
 1506 #define INTTYPE(I)      (io_apic_ints[(I)].int_type)
 1507 #define INTPIN(I)       (io_apic_ints[(I)].dst_apic_int)
 1508 #define INTIRQ(I)       (io_apic_ints[(I)].int_vector)
 1509 #define INTAPIC(I)      (ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
 1510 
 1511 #define SRCBUSIRQ(I)    (io_apic_ints[(I)].src_bus_irq)
 1512 int
 1513 isa_apic_irq(int isa_irq)
 1514 {
 1515         int     intr;
 1516 
 1517         for (intr = 0; intr < nintrs; ++intr) {         /* check each record */
 1518                 if (INTTYPE(intr) == 0) {               /* standard INT */
 1519                         if (SRCBUSIRQ(intr) == isa_irq) {
 1520                                 if (apic_int_is_bus_type(intr, ISA) ||
 1521                                     apic_int_is_bus_type(intr, EISA)) {
 1522                                         if (INTIRQ(intr) == 0xff)
 1523                                                 return -1; /* unassigned */
 1524                                         return INTIRQ(intr);    /* found */
 1525                                 }
 1526                         }
 1527                 }
 1528         }
 1529         return -1;                                      /* NOT found */
 1530 }
 1531 
 1532 
 1533 /*
 1534  * Determine which APIC pin a PCI INT is attached to.
 1535  */
 1536 #define SRCBUSID(I)     (io_apic_ints[(I)].src_bus_id)
 1537 #define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
 1538 #define SRCBUSLINE(I)   (io_apic_ints[(I)].src_bus_irq & 0x03)
 1539 int
 1540 pci_apic_irq(int pciBus, int pciDevice, int pciInt)
 1541 {
 1542         int     intr;
 1543 
 1544         --pciInt;                                       /* zero based */
 1545 
 1546         for (intr = 0; intr < nintrs; ++intr)           /* check each record */
 1547                 if ((INTTYPE(intr) == 0)                /* standard INT */
 1548                     && (SRCBUSID(intr) == pciBus)
 1549                     && (SRCBUSDEVICE(intr) == pciDevice)
 1550                     && (SRCBUSLINE(intr) == pciInt))    /* a candidate IRQ */
 1551                         if (apic_int_is_bus_type(intr, PCI)) {
 1552                                 if (INTIRQ(intr) == 0xff)
 1553                                         allocate_apic_irq(intr);
 1554                                 if (INTIRQ(intr) == 0xff)
 1555                                         return -1;      /* unassigned */
 1556                                 return INTIRQ(intr);    /* exact match */
 1557                         }
 1558 
 1559         return -1;                                      /* NOT found */
 1560 }
 1561 
 1562 int
 1563 next_apic_irq(int irq) 
 1564 {
 1565         int intr, ointr;
 1566         int bus, bustype;
 1567 
 1568         bus = 0;
 1569         bustype = 0;
 1570         for (intr = 0; intr < nintrs; intr++) {
 1571                 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
 1572                         continue;
 1573                 bus = SRCBUSID(intr);
 1574                 bustype = apic_bus_type(bus);
 1575                 if (bustype != ISA &&
 1576                     bustype != EISA &&
 1577                     bustype != PCI)
 1578                         continue;
 1579                 break;
 1580         }
 1581         if (intr >= nintrs) {
 1582                 return -1;
 1583         }
 1584         for (ointr = intr + 1; ointr < nintrs; ointr++) {
 1585                 if (INTTYPE(ointr) != 0)
 1586                         continue;
 1587                 if (bus != SRCBUSID(ointr))
 1588                         continue;
 1589                 if (bustype == PCI) {
 1590                         if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
 1591                                 continue;
 1592                         if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
 1593                                 continue;
 1594                 }
 1595                 if (bustype == ISA || bustype == EISA) {
 1596                         if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
 1597                                 continue;
 1598                 }
 1599                 if (INTPIN(intr) == INTPIN(ointr))
 1600                         continue;
 1601                 break;
 1602         }
 1603         if (ointr >= nintrs) {
 1604                 return -1;
 1605         }
 1606         return INTIRQ(ointr);
 1607 }
 1608 #undef SRCBUSLINE
 1609 #undef SRCBUSDEVICE
 1610 #undef SRCBUSID
 1611 #undef SRCBUSIRQ
 1612 
 1613 #undef INTPIN
 1614 #undef INTIRQ
 1615 #undef INTAPIC
 1616 #undef INTTYPE
 1617 
 1618 
 1619 /*
 1620  * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
 1621  *
 1622  * XXX FIXME:
 1623  *  Exactly what this means is unclear at this point.  It is a solution
 1624  *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
 1625  *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
 1626  *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
 1627  *  option.
 1628  */
 1629 int
 1630 undirect_isa_irq(int rirq)
 1631 {
 1632 #if defined(READY)
 1633         if (bootverbose)
 1634             printf("Freeing redirected ISA irq %d.\n", rirq);
 1635         /** FIXME: tickle the MB redirector chip */
 1636         return -1;
 1637 #else
 1638         if (bootverbose)
 1639             printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
 1640         return 0;
 1641 #endif  /* READY */
 1642 }
 1643 
 1644 
 1645 /*
 1646  * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
 1647  */
 1648 int
 1649 undirect_pci_irq(int rirq)
 1650 {
 1651 #if defined(READY)
 1652         if (bootverbose)
 1653                 printf("Freeing redirected PCI irq %d.\n", rirq);
 1654 
 1655         /** FIXME: tickle the MB redirector chip */
 1656         return -1;
 1657 #else
 1658         if (bootverbose)
 1659                 printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
 1660                        rirq);
 1661         return 0;
 1662 #endif  /* READY */
 1663 }
 1664 
 1665 
 1666 /*
 1667  * given a bus ID, return:
 1668  *  the bus type if found
 1669  *  -1 if NOT found
 1670  */
 1671 int
 1672 apic_bus_type(int id)
 1673 {
 1674         int     x;
 1675 
 1676         for (x = 0; x < mp_nbusses; ++x)
 1677                 if (bus_data[x].bus_id == id)
 1678                         return bus_data[x].bus_type;
 1679 
 1680         return -1;
 1681 }
 1682 
 1683 
 1684 /*
 1685  * given a LOGICAL APIC# and pin#, return:
 1686  *  the associated src bus ID if found
 1687  *  -1 if NOT found
 1688  */
 1689 int
 1690 apic_src_bus_id(int apic, int pin)
 1691 {
 1692         int     x;
 1693 
 1694         /* search each of the possible INTerrupt sources */
 1695         for (x = 0; x < nintrs; ++x)
 1696                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1697                     (pin == io_apic_ints[x].dst_apic_int))
 1698                         return (io_apic_ints[x].src_bus_id);
 1699 
 1700         return -1;              /* NOT found */
 1701 }
 1702 
 1703 
 1704 /*
 1705  * given a LOGICAL APIC# and pin#, return:
 1706  *  the associated src bus IRQ if found
 1707  *  -1 if NOT found
 1708  */
 1709 int
 1710 apic_src_bus_irq(int apic, int pin)
 1711 {
 1712         int     x;
 1713 
 1714         for (x = 0; x < nintrs; x++)
 1715                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1716                     (pin == io_apic_ints[x].dst_apic_int))
 1717                         return (io_apic_ints[x].src_bus_irq);
 1718 
 1719         return -1;              /* NOT found */
 1720 }
 1721 
 1722 
 1723 /*
 1724  * given a LOGICAL APIC# and pin#, return:
 1725  *  the associated INTerrupt type if found
 1726  *  -1 if NOT found
 1727  */
 1728 int
 1729 apic_int_type(int apic, int pin)
 1730 {
 1731         int     x;
 1732 
 1733         /* search each of the possible INTerrupt sources */
 1734         for (x = 0; x < nintrs; ++x)
 1735                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1736                     (pin == io_apic_ints[x].dst_apic_int))
 1737                         return (io_apic_ints[x].int_type);
 1738 
 1739         return -1;              /* NOT found */
 1740 }
 1741 
 1742 int 
 1743 apic_irq(int apic, int pin)
 1744 {
 1745         int x;
 1746         int res;
 1747 
 1748         for (x = 0; x < nintrs; ++x)
 1749                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1750                     (pin == io_apic_ints[x].dst_apic_int)) {
 1751                         res = io_apic_ints[x].int_vector;
 1752                         if (res == 0xff)
 1753                                 return -1;
 1754                         if (apic != int_to_apicintpin[res].ioapic)
 1755                                 panic("apic_irq: inconsistent table");
 1756                         if (pin != int_to_apicintpin[res].int_pin)
 1757                                 panic("apic_irq inconsistent table (2)");
 1758                         return res;
 1759                 }
 1760         return -1;
 1761 }
 1762 
 1763 
 1764 /*
 1765  * given a LOGICAL APIC# and pin#, return:
 1766  *  the associated trigger mode if found
 1767  *  -1 if NOT found
 1768  */
 1769 int
 1770 apic_trigger(int apic, int pin)
 1771 {
 1772         int     x;
 1773 
 1774         /* search each of the possible INTerrupt sources */
 1775         for (x = 0; x < nintrs; ++x)
 1776                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1777                     (pin == io_apic_ints[x].dst_apic_int))
 1778                         return ((io_apic_ints[x].int_flags >> 2) & 0x03);
 1779 
 1780         return -1;              /* NOT found */
 1781 }
 1782 
 1783 
 1784 /*
 1785  * given a LOGICAL APIC# and pin#, return:
 1786  *  the associated 'active' level if found
 1787  *  -1 if NOT found
 1788  */
 1789 int
 1790 apic_polarity(int apic, int pin)
 1791 {
 1792         int     x;
 1793 
 1794         /* search each of the possible INTerrupt sources */
 1795         for (x = 0; x < nintrs; ++x)
 1796                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1797                     (pin == io_apic_ints[x].dst_apic_int))
 1798                         return (io_apic_ints[x].int_flags & 0x03);
 1799 
 1800         return -1;              /* NOT found */
 1801 }
 1802 
 1803 
 1804 /*
 1805  * set data according to MP defaults
 1806  * FIXME: probably not complete yet...
 1807  */
 1808 static void
 1809 default_mp_table(int type)
 1810 {
 1811         int     ap_cpu_id;
 1812 #if defined(APIC_IO)
 1813         int     io_apic_id;
 1814         int     pin;
 1815 #endif  /* APIC_IO */
 1816 
 1817 #if 0
 1818         printf("  MP default config type: %d\n", type);
 1819         switch (type) {
 1820         case 1:
 1821                 printf("   bus: ISA, APIC: 82489DX\n");
 1822                 break;
 1823         case 2:
 1824                 printf("   bus: EISA, APIC: 82489DX\n");
 1825                 break;
 1826         case 3:
 1827                 printf("   bus: EISA, APIC: 82489DX\n");
 1828                 break;
 1829         case 4:
 1830                 printf("   bus: MCA, APIC: 82489DX\n");
 1831                 break;
 1832         case 5:
 1833                 printf("   bus: ISA+PCI, APIC: Integrated\n");
 1834                 break;
 1835         case 6:
 1836                 printf("   bus: EISA+PCI, APIC: Integrated\n");
 1837                 break;
 1838         case 7:
 1839                 printf("   bus: MCA+PCI, APIC: Integrated\n");
 1840                 break;
 1841         default:
 1842                 printf("   future type\n");
 1843                 break;
 1844                 /* NOTREACHED */
 1845         }
 1846 #endif  /* 0 */
 1847 
 1848         boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
 1849         ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
 1850 
 1851         /* BSP */
 1852         CPU_TO_ID(0) = boot_cpu_id;
 1853         ID_TO_CPU(boot_cpu_id) = 0;
 1854 
 1855         /* one and only AP */
 1856         CPU_TO_ID(1) = ap_cpu_id;
 1857         ID_TO_CPU(ap_cpu_id) = 1;
 1858 
 1859 #if defined(APIC_IO)
 1860         /* one and only IO APIC */
 1861         io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
 1862 
 1863         /*
 1864          * sanity check, refer to MP spec section 3.6.6, last paragraph
 1865          * necessary as some hardware isn't properly setting up the IO APIC
 1866          */
 1867 #if defined(REALLY_ANAL_IOAPICID_VALUE)
 1868         if (io_apic_id != 2) {
 1869 #else
 1870         if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
 1871 #endif  /* REALLY_ANAL_IOAPICID_VALUE */
 1872                 io_apic_set_id(0, 2);
 1873                 io_apic_id = 2;
 1874         }
 1875         IO_TO_ID(0) = io_apic_id;
 1876         ID_TO_IO(io_apic_id) = 0;
 1877 #endif  /* APIC_IO */
 1878 
 1879         /* fill out bus entries */
 1880         switch (type) {
 1881         case 1:
 1882         case 2:
 1883         case 3:
 1884         case 4:
 1885         case 5:
 1886         case 6:
 1887         case 7:
 1888                 bus_data[0].bus_id = default_data[type - 1][1];
 1889                 bus_data[0].bus_type = default_data[type - 1][2];
 1890                 bus_data[1].bus_id = default_data[type - 1][3];
 1891                 bus_data[1].bus_type = default_data[type - 1][4];
 1892                 break;
 1893 
 1894         /* case 4: case 7:                 MCA NOT supported */
 1895         default:                /* illegal/reserved */
 1896                 panic("BAD default MP config: %d", type);
 1897                 /* NOTREACHED */
 1898         }
 1899 
 1900 #if defined(APIC_IO)
 1901         /* general cases from MP v1.4, table 5-2 */
 1902         for (pin = 0; pin < 16; ++pin) {
 1903                 io_apic_ints[pin].int_type = 0;
 1904                 io_apic_ints[pin].int_flags = 0x05;     /* edge/active-hi */
 1905                 io_apic_ints[pin].src_bus_id = 0;
 1906                 io_apic_ints[pin].src_bus_irq = pin;    /* IRQ2 caught below */
 1907                 io_apic_ints[pin].dst_apic_id = io_apic_id;
 1908                 io_apic_ints[pin].dst_apic_int = pin;   /* 1-to-1 */
 1909         }
 1910 
 1911         /* special cases from MP v1.4, table 5-2 */
 1912         if (type == 2) {
 1913                 io_apic_ints[2].int_type = 0xff;        /* N/C */
 1914                 io_apic_ints[13].int_type = 0xff;       /* N/C */
 1915 #if !defined(APIC_MIXED_MODE)
 1916                 /** FIXME: ??? */
 1917                 panic("sorry, can't support type 2 default yet");
 1918 #endif  /* APIC_MIXED_MODE */
 1919         }
 1920         else
 1921                 io_apic_ints[2].src_bus_irq = 0;        /* ISA IRQ0 is on APIC INT 2 */
 1922 
 1923         if (type == 7)
 1924                 io_apic_ints[0].int_type = 0xff;        /* N/C */
 1925         else
 1926                 io_apic_ints[0].int_type = 3;   /* vectored 8259 */
 1927 #endif  /* APIC_IO */
 1928 }
 1929 
 1930 
 1931 /*
 1932  * start each AP in our list
 1933  */
 1934 static int
 1935 start_all_aps(u_int boot_addr)
 1936 {
 1937         int     x, i, pg;
 1938         u_char  mpbiosreason;
 1939         u_long  mpbioswarmvec;
 1940         struct pcpu *pc;
 1941         char *stack;
 1942         uintptr_t kptbase;
 1943 
 1944         POSTCODE(START_ALL_APS_POST);
 1945 
 1946         mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 1947 
 1948         /* initialize BSP's local APIC */
 1949         apic_initialize();
 1950         bsp_apic_ready = 1;
 1951 
 1952         /* install the AP 1st level boot code */
 1953         install_ap_tramp(boot_addr);
 1954 
 1955 
 1956         /* save the current value of the warm-start vector */
 1957         mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
 1958 #ifndef PC98
 1959         outb(CMOS_REG, BIOS_RESET);
 1960         mpbiosreason = inb(CMOS_DATA);
 1961 #endif
 1962 
 1963         /* set up temporary P==V mapping for AP boot */
 1964         /* XXX this is a hack, we should boot the AP on its own stack/PTD */
 1965         kptbase = (uintptr_t)(void *)KPTphys;
 1966         for (x = 0; x < NKPT; x++)
 1967                 PTD[x] = (pd_entry_t)(PG_V | PG_RW |
 1968                     ((kptbase + x * PAGE_SIZE) & PG_FRAME));
 1969         invltlb();
 1970 
 1971         /* start each AP */
 1972         for (x = 1; x <= mp_naps; ++x) {
 1973 
 1974                 /* This is a bit verbose, it will go away soon.  */
 1975 
 1976                 /* first page of AP's private space */
 1977                 pg = x * i386_btop(sizeof(struct privatespace));
 1978 
 1979                 /* allocate a new private data page */
 1980                 pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
 1981 
 1982                 /* wire it into the private page table page */
 1983                 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
 1984 
 1985                 /* allocate and set up an idle stack data page */
 1986                 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
 1987                 for (i = 0; i < KSTACK_PAGES; i++)
 1988                         SMPpt[pg + 1 + i] = (pt_entry_t)
 1989                             (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 1990 
 1991                 /* prime data page for it to use */
 1992                 pcpu_init(pc, x, sizeof(struct pcpu));
 1993 
 1994                 /* setup a vector to our boot code */
 1995                 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
 1996                 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
 1997 #ifndef PC98
 1998                 outb(CMOS_REG, BIOS_RESET);
 1999                 outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
 2000 #endif
 2001 
 2002                 bootSTK = &SMP_prvspace[x].idlekstack[KSTACK_PAGES * PAGE_SIZE];
 2003                 bootAP = x;
 2004 
 2005                 /* attempt to start the Application Processor */
 2006                 CHECK_INIT(99); /* setup checkpoints */
 2007                 if (!start_ap(x, boot_addr)) {
 2008                         printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
 2009                         CHECK_PRINT("trace");   /* show checkpoints */
 2010                         /* better panic as the AP may be running loose */
 2011                         printf("panic y/n? [y] ");
 2012                         if (cngetc() != 'n')
 2013                                 panic("bye-bye");
 2014                 }
 2015                 CHECK_PRINT("trace");           /* show checkpoints */
 2016 
 2017                 /* record its version info */
 2018                 cpu_apic_versions[x] = cpu_apic_versions[0];
 2019 
 2020                 all_cpus |= (1 << x);           /* record AP in CPU map */
 2021         }
 2022 
 2023         /* build our map of 'other' CPUs */
 2024         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
 2025 
 2026         /* fill in our (BSP) APIC version */
 2027         cpu_apic_versions[0] = lapic.version;
 2028 
 2029         /* restore the warmstart vector */
 2030         *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
 2031 #ifndef PC98
 2032         outb(CMOS_REG, BIOS_RESET);
 2033         outb(CMOS_DATA, mpbiosreason);
 2034 #endif
 2035 
 2036         /*
 2037          * Set up the idle context for the BSP.  Similar to above except
 2038          * that some was done by locore, some by pmap.c and some is implicit
 2039          * because the BSP is cpu#0 and the page is initially zero, and also
 2040          * because we can refer to variables by name on the BSP..
 2041          */
 2042 
 2043         /* Allocate and setup BSP idle stack */
 2044         stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
 2045         for (i = 0; i < KSTACK_PAGES; i++)
 2046                 SMPpt[1 + i] = (pt_entry_t)
 2047                     (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 2048 
 2049         for (x = 0; x < NKPT; x++)
 2050                 PTD[x] = 0;
 2051         pmap_set_opt();
 2052 
 2053         /* number of APs actually started */
 2054         return mp_ncpus - 1;
 2055 }
 2056 
 2057 
 2058 /*
 2059  * load the 1st level AP boot code into base memory.
 2060  */
 2061 
 2062 /* targets for relocation */
 2063 extern void bigJump(void);
 2064 extern void bootCodeSeg(void);
 2065 extern void bootDataSeg(void);
 2066 extern void MPentry(void);
 2067 extern u_int MP_GDT;
 2068 extern u_int mp_gdtbase;
 2069 
 2070 static void
 2071 install_ap_tramp(u_int boot_addr)
 2072 {
 2073         int     x;
 2074         int     size = *(int *) ((u_long) & bootMP_size);
 2075         u_char *src = (u_char *) ((u_long) bootMP);
 2076         u_char *dst = (u_char *) boot_addr + KERNBASE;
 2077         u_int   boot_base = (u_int) bootMP;
 2078         u_int8_t *dst8;
 2079         u_int16_t *dst16;
 2080         u_int32_t *dst32;
 2081 
 2082         POSTCODE(INSTALL_AP_TRAMP_POST);
 2083 
 2084         for (x = 0; x < size; ++x)
 2085                 *dst++ = *src++;
 2086 
 2087         /*
 2088          * modify addresses in code we just moved to basemem. unfortunately we
 2089          * need fairly detailed info about mpboot.s for this to work.  changes
 2090          * to mpboot.s might require changes here.
 2091          */
 2092 
 2093         /* boot code is located in KERNEL space */
 2094         dst = (u_char *) boot_addr + KERNBASE;
 2095 
 2096         /* modify the lgdt arg */
 2097         dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
 2098         *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
 2099 
 2100         /* modify the ljmp target for MPentry() */
 2101         dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
 2102         *dst32 = ((u_int) MPentry - KERNBASE);
 2103 
 2104         /* modify the target for boot code segment */
 2105         dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
 2106         dst8 = (u_int8_t *) (dst16 + 1);
 2107         *dst16 = (u_int) boot_addr & 0xffff;
 2108         *dst8 = ((u_int) boot_addr >> 16) & 0xff;
 2109 
 2110         /* modify the target for boot data segment */
 2111         dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
 2112         dst8 = (u_int8_t *) (dst16 + 1);
 2113         *dst16 = (u_int) boot_addr & 0xffff;
 2114         *dst8 = ((u_int) boot_addr >> 16) & 0xff;
 2115 }
 2116 
 2117 
 2118 /*
 2119  * this function starts the AP (application processor) identified
 2120  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
 2121  * to accomplish this.  This is necessary because of the nuances
 2122  * of the different hardware we might encounter.  It ain't pretty,
 2123  * but it seems to work.
 2124  */
 2125 static int
 2126 start_ap(int logical_cpu, u_int boot_addr)
 2127 {
 2128         int     physical_cpu;
 2129         int     vector;
 2130         int     cpus;
 2131         u_long  icr_lo, icr_hi;
 2132 
 2133         POSTCODE(START_AP_POST);
 2134 
 2135         /* get the PHYSICAL APIC ID# */
 2136         physical_cpu = CPU_TO_ID(logical_cpu);
 2137 
 2138         /* calculate the vector */
 2139         vector = (boot_addr >> 12) & 0xff;
 2140 
 2141         /* used as a watchpoint to signal AP startup */
 2142         cpus = mp_ncpus;
 2143 
 2144         /*
 2145          * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
 2146          * and running the target CPU. OR this INIT IPI might be latched (P5
 2147          * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 2148          * ignored.
 2149          */
 2150 
 2151         /* setup the address for the target AP */
 2152         icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
 2153         icr_hi |= (physical_cpu << 24);
 2154         lapic.icr_hi = icr_hi;
 2155 
 2156         /* do an INIT IPI: assert RESET */
 2157         icr_lo = lapic.icr_lo & 0xfff00000;
 2158         lapic.icr_lo = icr_lo | 0x0000c500;
 2159 
 2160         /* wait for pending status end */
 2161         while (lapic.icr_lo & APIC_DELSTAT_MASK)
 2162                  /* spin */ ;
 2163 
 2164         /* do an INIT IPI: deassert RESET */
 2165         lapic.icr_lo = icr_lo | 0x00008500;
 2166 
 2167         /* wait for pending status end */
 2168         u_sleep(10000);         /* wait ~10mS */
 2169         while (lapic.icr_lo & APIC_DELSTAT_MASK)
 2170                  /* spin */ ;
 2171 
 2172         /*
 2173          * next we do a STARTUP IPI: the previous INIT IPI might still be
 2174          * latched, (P5 bug) this 1st STARTUP would then terminate
 2175          * immediately, and the previously started INIT IPI would continue. OR
 2176          * the previous INIT IPI has already run. and this STARTUP IPI will
 2177          * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 2178          * will run.
 2179          */
 2180 
 2181         /* do a STARTUP IPI */
 2182         lapic.icr_lo = icr_lo | 0x00000600 | vector;
 2183         while (lapic.icr_lo & APIC_DELSTAT_MASK)
 2184                  /* spin */ ;
 2185         u_sleep(200);           /* wait ~200uS */
 2186 
 2187         /*
 2188          * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 2189          * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 2190          * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 2191          * recognized after hardware RESET or INIT IPI.
 2192          */
 2193 
 2194         lapic.icr_lo = icr_lo | 0x00000600 | vector;
 2195         while (lapic.icr_lo & APIC_DELSTAT_MASK)
 2196                  /* spin */ ;
 2197         u_sleep(200);           /* wait ~200uS */
 2198 
 2199         /* wait for it to start */
 2200         set_apic_timer(5000000);/* == 5 seconds */
 2201         while (read_apic_timer())
 2202                 if (mp_ncpus > cpus)
 2203                         return 1;       /* return SUCCESS */
 2204 
 2205         return 0;               /* return FAILURE */
 2206 }
 2207 
 2208 #if defined(APIC_IO)
 2209 
 2210 #ifdef COUNT_XINVLTLB_HITS
 2211 u_int xhits_gbl[MAXCPU];
 2212 u_int xhits_pg[MAXCPU];
 2213 u_int xhits_rng[MAXCPU];
 2214 SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
 2215 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
 2216     sizeof(xhits_gbl), "IU", "");
 2217 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
 2218     sizeof(xhits_pg), "IU", "");
 2219 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
 2220     sizeof(xhits_rng), "IU", "");
 2221 
 2222 u_int ipi_global;
 2223 u_int ipi_page;
 2224 u_int ipi_range;
 2225 u_int ipi_range_size;
 2226 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
 2227 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
 2228 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
 2229 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
 2230     0, "");
 2231 
 2232 u_int ipi_masked_global;
 2233 u_int ipi_masked_page;
 2234 u_int ipi_masked_range;
 2235 u_int ipi_masked_range_size;
 2236 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
 2237     &ipi_masked_global, 0, "");
 2238 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
 2239     &ipi_masked_page, 0, "");
 2240 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
 2241     &ipi_masked_range, 0, "");
 2242 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
 2243     &ipi_masked_range_size, 0, "");
 2244 #endif
 2245 
 2246 /*
 2247  * Flush the TLB on all other CPU's
 2248  */
 2249 static void
 2250 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 2251 {
 2252         u_int ncpu;
 2253         register_t eflags;
 2254 
 2255         ncpu = mp_ncpus - 1;    /* does not shootdown self */
 2256         if (ncpu < 1)
 2257                 return;         /* no other cpus */
 2258         eflags = read_eflags();
 2259         if ((eflags & PSL_I) == 0)
 2260                 panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
 2261         mtx_lock_spin(&smp_tlb_mtx);
 2262         smp_tlb_addr1 = addr1;
 2263         smp_tlb_addr2 = addr2;
 2264         atomic_store_rel_int(&smp_tlb_wait, 0);
 2265         ipi_all_but_self(vector);
 2266         while (smp_tlb_wait < ncpu)
 2267                 ia32_pause();
 2268         mtx_unlock_spin(&smp_tlb_mtx);
 2269 }
 2270 
 2271 /*
 2272  * This is about as magic as it gets.  fortune(1) has got similar code
 2273  * for reversing bits in a word.  Who thinks up this stuff??
 2274  *
 2275  * Yes, it does appear to be consistently faster than:
 2276  * while (i = ffs(m)) {
 2277  *      m >>= i;
 2278  *      bits++;
 2279  * }
 2280  * and
 2281  * while (lsb = (m & -m)) {     // This is magic too
 2282  *      m &= ~lsb;              // or: m ^= lsb
 2283  *      bits++;
 2284  * }
 2285  * Both of these latter forms do some very strange things on gcc-3.1 with
 2286  * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
 2287  * There is probably an SSE or MMX popcnt instruction.
 2288  *
 2289  * I wonder if this should be in libkern?
 2290  *
 2291  * XXX Stop the presses!  Another one:
 2292  * static __inline u_int32_t
 2293  * popcnt1(u_int32_t v)
 2294  * {
 2295  *      v -= ((v >> 1) & 0x55555555);
 2296  *      v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
 2297  *      v = (v + (v >> 4)) & 0x0F0F0F0F;
 2298  *      return (v * 0x01010101) >> 24;
 2299  * }
 2300  * The downside is that it has a multiply.  With a pentium3 with
 2301  * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
 2302  * an imull, and in that case it is faster.  In most other cases
 2303  * it appears slightly slower.
 2304  */
 2305 static __inline u_int32_t
 2306 popcnt(u_int32_t m)
 2307 {
 2308 
 2309         m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
 2310         m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
 2311         m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
 2312         m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
 2313         m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
 2314         return m;
 2315 }
 2316 
 2317 static void
 2318 smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 2319 {
 2320         int ncpu, othercpus;
 2321         register_t eflags;
 2322 
 2323         othercpus = mp_ncpus - 1;
 2324         if (mask == (u_int)-1) {
 2325                 ncpu = othercpus;
 2326                 if (ncpu < 1)
 2327                         return;
 2328         } else {
 2329                 /* XXX there should be a pcpu self mask */
 2330                 mask &= ~(1 << PCPU_GET(cpuid));
 2331                 if (mask == 0)
 2332                         return;
 2333                 ncpu = popcnt(mask);
 2334                 if (ncpu > othercpus) {
 2335                         /* XXX this should be a panic offence */
 2336                         printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
 2337                             ncpu, othercpus);
 2338                         ncpu = othercpus;
 2339                 }
 2340                 /* XXX should be a panic, implied by mask == 0 above */
 2341                 if (ncpu < 1)
 2342                         return;
 2343         }
 2344         eflags = read_eflags();
 2345         if ((eflags & PSL_I) == 0)
 2346                 panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
 2347         mtx_lock_spin(&smp_tlb_mtx);
 2348         smp_tlb_addr1 = addr1;
 2349         smp_tlb_addr2 = addr2;
 2350         atomic_store_rel_int(&smp_tlb_wait, 0);
 2351         if (mask == (u_int)-1)
 2352                 ipi_all_but_self(vector);
 2353         else
 2354                 ipi_selected(mask, vector);
 2355         while (smp_tlb_wait < ncpu)
 2356                 ia32_pause();
 2357         mtx_unlock_spin(&smp_tlb_mtx);
 2358 }
 2359 #endif
 2360 
 2361 void
 2362 smp_invltlb(void)
 2363 {
 2364 #if defined(APIC_IO)
 2365         if (smp_started) {
 2366                 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
 2367 #ifdef COUNT_XINVLTLB_HITS
 2368                 ipi_global++;
 2369 #endif
 2370         }
 2371 #endif  /* APIC_IO */
 2372 }
 2373 
 2374 void
 2375 smp_invlpg(vm_offset_t addr)
 2376 {
 2377 #if defined(APIC_IO)
 2378         if (smp_started) {
 2379                 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
 2380 #ifdef COUNT_XINVLTLB_HITS
 2381                 ipi_page++;
 2382 #endif
 2383         }
 2384 #endif  /* APIC_IO */
 2385 }
 2386 
 2387 void
 2388 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 2389 {
 2390 #if defined(APIC_IO)
 2391         if (smp_started) {
 2392                 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
 2393 #ifdef COUNT_XINVLTLB_HITS
 2394                 ipi_range++;
 2395                 ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
 2396 #endif
 2397         }
 2398 #endif  /* APIC_IO */
 2399 }
 2400 
 2401 void
 2402 smp_masked_invltlb(u_int mask)
 2403 {
 2404 #if defined(APIC_IO)
 2405         if (smp_started) {
 2406                 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
 2407 #ifdef COUNT_XINVLTLB_HITS
 2408                 ipi_masked_global++;
 2409 #endif
 2410         }
 2411 #endif  /* APIC_IO */
 2412 }
 2413 
 2414 void
 2415 smp_masked_invlpg(u_int mask, vm_offset_t addr)
 2416 {
 2417 #if defined(APIC_IO)
 2418         if (smp_started) {
 2419                 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
 2420 #ifdef COUNT_XINVLTLB_HITS
 2421                 ipi_masked_page++;
 2422 #endif
 2423         }
 2424 #endif  /* APIC_IO */
 2425 }
 2426 
 2427 void
 2428 smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
 2429 {
 2430 #if defined(APIC_IO)
 2431         if (smp_started) {
 2432                 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
 2433 #ifdef COUNT_XINVLTLB_HITS
 2434                 ipi_masked_range++;
 2435                 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
 2436 #endif
 2437         }
 2438 #endif  /* APIC_IO */
 2439 }
 2440 
 2441 
 2442 /*
 2443  * This is called once the rest of the system is up and running and we're
 2444  * ready to let the AP's out of the pen.
 2445  */
 2446 extern void     enable_sse(void);
 2447 
 2448 void
 2449 ap_init(void)
 2450 {
 2451         u_int   apic_id;
 2452 
 2453         /* spin until all the AP's are ready */
 2454         while (!aps_ready)
 2455                 ia32_pause();
 2456 
 2457         /* BSP may have changed PTD while we were waiting */
 2458         invltlb();
 2459 
 2460 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 2461         lidt(&r_idt);
 2462 #endif
 2463 
 2464         /* set up CPU registers and state */
 2465         cpu_setregs();
 2466 
 2467         /* set up FPU state on the AP */
 2468         npxinit(__INITIAL_NPXCW__);
 2469 
 2470         /* set up SSE registers */
 2471         enable_sse();
 2472 
 2473         /* A quick check from sanity claus */
 2474         apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
 2475         if (PCPU_GET(cpuid) != apic_id) {
 2476                 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
 2477                 printf("SMP: apic_id = %d\n", apic_id);
 2478                 printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
 2479                 panic("cpuid mismatch! boom!!");
 2480         }
 2481 
 2482         /* Init local apic for irq's */
 2483         apic_initialize();
 2484 
 2485         /* Set memory range attributes for this CPU to match the BSP */
 2486         mem_range_AP_init();
 2487 
 2488         mtx_lock_spin(&ap_boot_mtx);
 2489 
 2490         smp_cpus++;
 2491 
 2492         CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
 2493         printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
 2494 
 2495         /* Build our map of 'other' CPUs. */
 2496         PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
 2497 
 2498         if (bootverbose)
 2499                 apic_dump("ap_init()");
 2500 
 2501         if (smp_cpus == mp_ncpus) {
 2502                 /* enable IPI's, tlb shootdown, freezes etc */
 2503                 atomic_store_rel_int(&smp_started, 1);
 2504                 smp_active = 1;  /* historic */
 2505         }
 2506 
 2507         mtx_unlock_spin(&ap_boot_mtx);
 2508 
 2509         /* wait until all the AP's are up */
 2510         while (smp_started == 0)
 2511                 ia32_pause();
 2512 
 2513         /* ok, now grab sched_lock and enter the scheduler */
 2514         mtx_lock_spin(&sched_lock);
 2515 
 2516         binuptime(PCPU_PTR(switchtime));
 2517         PCPU_SET(switchticks, ticks);
 2518 
 2519         cpu_throw();    /* doesn't return */
 2520 
 2521         panic("scheduler returned us to %s", __func__);
 2522 }
 2523 
 2524 /*
 2525  * For statclock, we send an IPI to all CPU's to have them call this
 2526  * function.
 2527  *
 2528  * WARNING! unpend() will call statclock_process() directly and skip this
 2529  * routine.
 2530  */
 2531 void
 2532 forwarded_statclock(struct trapframe frame)
 2533 {
 2534 
 2535         mtx_lock_spin(&sched_lock);
 2536         statclock_process(curthread->td_kse, TRAPF_PC(&frame),
 2537             TRAPF_USERMODE(&frame));
 2538         mtx_unlock_spin(&sched_lock);
 2539 }
 2540 
 2541 void
 2542 forward_statclock(void)
 2543 {
 2544         int map;
 2545 
 2546         CTR0(KTR_SMP, "forward_statclock");
 2547 
 2548         if (!smp_started || cold || panicstr)
 2549                 return;
 2550 
 2551         map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 2552         if (map != 0)
 2553                 ipi_selected(map, IPI_STATCLOCK);
 2554 }
 2555 
 2556 /*
 2557  * For each hardclock(), we send an IPI to all other CPU's to have them
 2558  * execute this function.  It would be nice to reduce contention on
 2559  * sched_lock if we could simply peek at the CPU to determine the user/kernel
 2560  * state and call hardclock_process() on the CPU receiving the clock interrupt
 2561  * and then just use a simple IPI to handle any ast's if needed.
 2562  *
 2563  * WARNING! unpend() will call hardclock_process() directly and skip this
 2564  * routine.
 2565  */
 2566 void
 2567 forwarded_hardclock(struct trapframe frame)
 2568 {
 2569 
 2570         mtx_lock_spin(&sched_lock);
 2571         hardclock_process(curthread, TRAPF_USERMODE(&frame));
 2572         mtx_unlock_spin(&sched_lock);
 2573 }
 2574 
 2575 void 
 2576 forward_hardclock(void)
 2577 {
 2578         u_int map;
 2579 
 2580         CTR0(KTR_SMP, "forward_hardclock");
 2581 
 2582         if (!smp_started || cold || panicstr)
 2583                 return;
 2584 
 2585         map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 2586         if (map != 0)
 2587                 ipi_selected(map, IPI_HARDCLOCK);
 2588 }
 2589 
 2590 #ifdef APIC_INTR_REORDER
 2591 /*
 2592  *      Maintain mapping from softintr vector to isr bit in local apic.
 2593  */
 2594 void
 2595 set_lapic_isrloc(int intr, int vector)
 2596 {
 2597         if (intr < 0 || intr > 32)
 2598                 panic("set_apic_isrloc: bad intr argument: %d",intr);
 2599         if (vector < ICU_OFFSET || vector > 255)
 2600                 panic("set_apic_isrloc: bad vector argument: %d",vector);
 2601         apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
 2602         apic_isrbit_location[intr].bit = (1<<(vector & 31));
 2603 }
 2604 #endif
 2605 
 2606 /*
 2607  * send an IPI to a set of cpus.
 2608  */
 2609 void
 2610 ipi_selected(u_int32_t cpus, u_int ipi)
 2611 {
 2612 
 2613         CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
 2614         selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
 2615 }
 2616 
 2617 /*
 2618  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
 2619  */
 2620 void
 2621 ipi_all(u_int ipi)
 2622 {
 2623 
 2624         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 2625         apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED); 
 2626 }
 2627 
 2628 /*
 2629  * send an IPI to all CPUs EXCEPT myself
 2630  */
 2631 void
 2632 ipi_all_but_self(u_int ipi)
 2633 {
 2634 
 2635         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 2636         apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED); 
 2637 }
 2638 
 2639 /*
 2640  * send an IPI to myself
 2641  */
 2642 void
 2643 ipi_self(u_int ipi)
 2644 {
 2645 
 2646         CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 2647         apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED); 
 2648 }
 2649 
 2650 static void
 2651 release_aps(void *dummy __unused)
 2652 {
 2653 
 2654         mtx_lock_spin(&sched_lock);
 2655         atomic_store_rel_int(&aps_ready, 1);
 2656         while (smp_started == 0)
 2657                 ia32_pause();
 2658         mtx_unlock_spin(&sched_lock);
 2659 }
 2660 
 2661 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);

Cache object: cd644563bcc5fe4818be3fbf1aa55e2e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.