The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/mp_machdep.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1996, by Steve Passe
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. The name of the developer may NOT be used to endorse or promote products
   11  *    derived from this software without specific prior written permission.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  *
   25  * $FreeBSD$
   26  */
   27 
   28 #include "opt_cpu.h"
   29 #include "opt_user_ldt.h"
   30 
   31 #ifdef SMP
   32 #include <machine/smptests.h>
   33 #else
   34 #error
   35 #endif
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/bus.h>
   40 #include <sys/kernel.h>
   41 #include <sys/proc.h>
   42 #include <sys/sysctl.h>
   43 #include <sys/malloc.h>
   44 #include <sys/memrange.h>
   45 #ifdef BETTER_CLOCK
   46 #include <sys/dkstat.h>
   47 #endif
   48 #include <sys/cons.h>   /* cngetc() */
   49 
   50 #include <vm/vm.h>
   51 #include <vm/vm_param.h>
   52 #include <vm/pmap.h>
   53 #include <vm/vm_kern.h>
   54 #include <vm/vm_extern.h>
   55 #ifdef BETTER_CLOCK
   56 #include <sys/lock.h>
   57 #include <vm/vm_map.h>
   58 #include <sys/user.h>
   59 #ifdef GPROF 
   60 #include <sys/gmon.h>
   61 #endif
   62 #endif
   63 
   64 #include <machine/smp.h>
   65 #include <machine/apic.h>
   66 #include <machine/atomic.h>
   67 #include <machine/cpufunc.h>
   68 #include <machine/mpapic.h>
   69 #include <machine/psl.h>
   70 #include <machine/segments.h>
   71 #include <machine/smptests.h>   /** TEST_DEFAULT_CONFIG, TEST_TEST1 */
   72 #include <machine/tss.h>
   73 #include <machine/specialreg.h>
   74 #include <machine/globaldata.h>
   75 
   76 #include <pci/pcivar.h>
   77 
   78 #if defined(APIC_IO)
   79 #include <machine/md_var.h>             /* setidt() */
   80 #include <i386/isa/icu.h>               /* IPIs */
   81 #include <i386/isa/intr_machdep.h>      /* IPIs */
   82 #endif  /* APIC_IO */
   83 
   84 #if defined(TEST_DEFAULT_CONFIG)
   85 #define MPFPS_MPFB1     TEST_DEFAULT_CONFIG
   86 #else
   87 #define MPFPS_MPFB1     mpfps->mpfb1
   88 #endif  /* TEST_DEFAULT_CONFIG */
   89 
   90 #define WARMBOOT_TARGET         0
   91 #define WARMBOOT_OFF            (KERNBASE + 0x0467)
   92 #define WARMBOOT_SEG            (KERNBASE + 0x0469)
   93 
   94 #ifdef PC98
   95 #define BIOS_BASE               (0xe8000)
   96 #define BIOS_SIZE               (0x18000)
   97 #else
   98 #define BIOS_BASE               (0xf0000)
   99 #define BIOS_SIZE               (0x10000)
  100 #endif
  101 #define BIOS_COUNT              (BIOS_SIZE/4)
  102 
  103 #define CMOS_REG                (0x70)
  104 #define CMOS_DATA               (0x71)
  105 #define BIOS_RESET              (0x0f)
  106 #define BIOS_WARM               (0x0a)
  107 
  108 #define PROCENTRY_FLAG_EN       0x01
  109 #define PROCENTRY_FLAG_BP       0x02
  110 #define IOAPICENTRY_FLAG_EN     0x01
  111 
  112 
  113 /* MP Floating Pointer Structure */
  114 typedef struct MPFPS {
  115         char    signature[4];
  116         void   *pap;
  117         u_char  length;
  118         u_char  spec_rev;
  119         u_char  checksum;
  120         u_char  mpfb1;
  121         u_char  mpfb2;
  122         u_char  mpfb3;
  123         u_char  mpfb4;
  124         u_char  mpfb5;
  125 }      *mpfps_t;
  126 
  127 /* MP Configuration Table Header */
  128 typedef struct MPCTH {
  129         char    signature[4];
  130         u_short base_table_length;
  131         u_char  spec_rev;
  132         u_char  checksum;
  133         u_char  oem_id[8];
  134         u_char  product_id[12];
  135         void   *oem_table_pointer;
  136         u_short oem_table_size;
  137         u_short entry_count;
  138         void   *apic_address;
  139         u_short extended_table_length;
  140         u_char  extended_table_checksum;
  141         u_char  reserved;
  142 }      *mpcth_t;
  143 
  144 
  145 typedef struct PROCENTRY {
  146         u_char  type;
  147         u_char  apic_id;
  148         u_char  apic_version;
  149         u_char  cpu_flags;
  150         u_long  cpu_signature;
  151         u_long  feature_flags;
  152         u_long  reserved1;
  153         u_long  reserved2;
  154 }      *proc_entry_ptr;
  155 
  156 typedef struct BUSENTRY {
  157         u_char  type;
  158         u_char  bus_id;
  159         char    bus_type[6];
  160 }      *bus_entry_ptr;
  161 
  162 typedef struct IOAPICENTRY {
  163         u_char  type;
  164         u_char  apic_id;
  165         u_char  apic_version;
  166         u_char  apic_flags;
  167         void   *apic_address;
  168 }      *io_apic_entry_ptr;
  169 
  170 typedef struct INTENTRY {
  171         u_char  type;
  172         u_char  int_type;
  173         u_short int_flags;
  174         u_char  src_bus_id;
  175         u_char  src_bus_irq;
  176         u_char  dst_apic_id;
  177         u_char  dst_apic_int;
  178 }      *int_entry_ptr;
  179 
  180 /* descriptions of MP basetable entries */
  181 typedef struct BASETABLE_ENTRY {
  182         u_char  type;
  183         u_char  length;
  184         char    name[16];
  185 }       basetable_entry;
  186 
  187 /*
  188  * this code MUST be enabled here and in mpboot.s.
  189  * it follows the very early stages of AP boot by placing values in CMOS ram.
  190  * it NORMALLY will never be needed and thus the primitive method for enabling.
  191  *
  192 #define CHECK_POINTS
  193  */
  194 
  195 #if defined(CHECK_POINTS) && !defined(PC98)
  196 #define CHECK_READ(A)    (outb(CMOS_REG, (A)), inb(CMOS_DATA))
  197 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
  198 
  199 #define CHECK_INIT(D);                          \
  200         CHECK_WRITE(0x34, (D));                 \
  201         CHECK_WRITE(0x35, (D));                 \
  202         CHECK_WRITE(0x36, (D));                 \
  203         CHECK_WRITE(0x37, (D));                 \
  204         CHECK_WRITE(0x38, (D));                 \
  205         CHECK_WRITE(0x39, (D));
  206 
  207 #define CHECK_PRINT(S);                         \
  208         printf("%s: %d, %d, %d, %d, %d, %d\n",  \
  209            (S),                                 \
  210            CHECK_READ(0x34),                    \
  211            CHECK_READ(0x35),                    \
  212            CHECK_READ(0x36),                    \
  213            CHECK_READ(0x37),                    \
  214            CHECK_READ(0x38),                    \
  215            CHECK_READ(0x39));
  216 
  217 #else                           /* CHECK_POINTS */
  218 
  219 #define CHECK_INIT(D)
  220 #define CHECK_PRINT(S)
  221 
  222 #endif                          /* CHECK_POINTS */
  223 
  224 /*
  225  * Values to send to the POST hardware.
  226  */
  227 #define MP_BOOTADDRESS_POST     0x10
  228 #define MP_PROBE_POST           0x11
  229 #define MPTABLE_PASS1_POST      0x12
  230 
  231 #define MP_START_POST           0x13
  232 #define MP_ENABLE_POST          0x14
  233 #define MPTABLE_PASS2_POST      0x15
  234 
  235 #define START_ALL_APS_POST      0x16
  236 #define INSTALL_AP_TRAMP_POST   0x17
  237 #define START_AP_POST           0x18
  238 
  239 #define MP_ANNOUNCE_POST        0x19
  240 
  241 static int need_hyperthreading_fixup;
  242 static u_int logical_cpus;
  243 u_int logical_cpus_mask;                /* bit mask of logical cpu's */
  244 
  245 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
  246 int     current_postcode;
  247 
  248 /** XXX FIXME: what system files declare these??? */
  249 extern struct region_descriptor r_gdt, r_idt;
  250 
  251 int     bsp_apic_ready = 0;     /* flags useability of BSP apic */
  252 int     mp_ncpus;               /* # of CPUs, including BSP */
  253 int     mp_naps;                /* # of Applications processors */
  254 int     mp_nbusses;             /* # of busses */
  255 int     mp_napics;              /* # of IO APICs */
  256 int     boot_cpu_id;            /* designated BSP */
  257 vm_offset_t cpu_apic_address;
  258 vm_offset_t io_apic_address[NAPICID];   /* NAPICID is more than enough */
  259 extern  int nkpt;
  260 
  261 u_int32_t cpu_apic_versions[MAXCPU];
  262 u_int32_t *io_apic_versions;
  263 
  264 #ifdef APIC_INTR_DIAGNOSTIC
  265 int apic_itrace_enter[32];
  266 int apic_itrace_tryisrlock[32];
  267 int apic_itrace_gotisrlock[32];
  268 int apic_itrace_active[32];
  269 int apic_itrace_masked[32];
  270 int apic_itrace_noisrlock[32];
  271 int apic_itrace_masked2[32];
  272 int apic_itrace_unmask[32];
  273 int apic_itrace_noforward[32];
  274 int apic_itrace_leave[32];
  275 int apic_itrace_enter2[32];
  276 int apic_itrace_doreti[32];
  277 int apic_itrace_splz[32];
  278 int apic_itrace_eoi[32];
  279 #ifdef APIC_INTR_DIAGNOSTIC_IRQ
  280 unsigned short apic_itrace_debugbuffer[32768];
  281 int apic_itrace_debugbuffer_idx;
  282 struct simplelock apic_itrace_debuglock;
  283 #endif
  284 #endif
  285 
  286 #ifdef APIC_INTR_REORDER
  287 struct {
  288         volatile int *location;
  289         int bit;
  290 } apic_isrbit_location[32];
  291 #endif
  292 
  293 struct apic_intmapinfo  int_to_apicintpin[APIC_INTMAPSIZE];
  294 
  295 /*
  296  * APIC ID logical/physical mapping structures.
  297  * We oversize these to simplify boot-time config.
  298  */
  299 int     cpu_num_to_apic_id[NAPICID];
  300 int     io_num_to_apic_id[NAPICID];
  301 int     apic_id_to_logical[NAPICID];
  302 
  303 
  304 /* Bitmap of all available CPUs */
  305 u_int   all_cpus;
  306 
  307 /* AP uses this during bootstrap.  Do not staticize.  */
  308 char *bootSTK;
  309 static int bootAP;
  310 
  311 /* Hotwire a 0->4MB V==P mapping */
  312 extern pt_entry_t *KPTphys;
  313 
  314 /* SMP page table page */
  315 extern pt_entry_t *SMPpt;
  316 
  317 struct pcb stoppcbs[MAXCPU];
  318 
  319 int smp_started;                /* has the system started? */
  320 
  321 /*
  322  * Local data and functions.
  323  */
  324 
  325 static int      mp_capable;
  326 static u_int    boot_address;
  327 static u_int    base_memory;
  328 
  329 static int      picmode;                /* 0: virtual wire mode, 1: PIC mode */
  330 static mpfps_t  mpfps;
  331 static int      search_for_sig(u_int32_t target, int count);
  332 static void     mp_enable(u_int boot_addr);
  333 
  334 static void     mptable_hyperthread_fixup(u_int id_mask);
  335 static void     mptable_pass1(void);
  336 static int      mptable_pass2(void);
  337 static void     default_mp_table(int type);
  338 static void     fix_mp_table(void);
  339 static void     setup_apic_irq_mapping(void);
  340 static void     init_locks(void);
  341 static int      start_all_aps(u_int boot_addr);
  342 static void     install_ap_tramp(u_int boot_addr);
  343 static int      start_ap(int logicalCpu, u_int boot_addr);
  344 static int      apic_int_is_bus_type(int intr, int bus_type);
  345 
  346 static int      hlt_cpus_mask;
  347 static int      hlt_logical_cpus = 1;
  348 static u_int    hyperthreading_cpus;
  349 static u_int    hyperthreading_cpus_mask;
  350 static int      hyperthreading_allowed;
  351 static struct   sysctl_ctx_list logical_cpu_clist;
  352 
  353 /*
  354  * Calculate usable address in base memory for AP trampoline code.
  355  */
  356 u_int
  357 mp_bootaddress(u_int basemem)
  358 {
  359         POSTCODE(MP_BOOTADDRESS_POST);
  360 
  361         base_memory = basemem * 1024;   /* convert to bytes */
  362 
  363         boot_address = base_memory & ~0xfff;    /* round down to 4k boundary */
  364         if ((base_memory - boot_address) < bootMP_size)
  365                 boot_address -= 4096;   /* not enough, lower by 4k */
  366 
  367         return boot_address;
  368 }
  369 
  370 
  371 /*
  372  * Look for an Intel MP spec table (ie, SMP capable hardware).
  373  */
  374 int
  375 mp_probe(void)
  376 {
  377         int     x;
  378         u_long  segment;
  379         u_int32_t target;
  380 
  381         POSTCODE(MP_PROBE_POST);
  382 
  383         /* see if EBDA exists */
  384         if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
  385                 /* search first 1K of EBDA */
  386                 target = (u_int32_t) (segment << 4);
  387                 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
  388                         goto found;
  389         } else {
  390                 /* last 1K of base memory, effective 'top of base' passed in */
  391                 target = (u_int32_t) (base_memory - 0x400);
  392                 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
  393                         goto found;
  394         }
  395 
  396         /* search the BIOS */
  397         target = (u_int32_t) BIOS_BASE;
  398         if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
  399                 goto found;
  400 
  401         /* nothing found */
  402         mpfps = (mpfps_t)0;
  403         mp_capable = 0;
  404         return 0;
  405 
  406 found:
  407         /* calculate needed resources */
  408         mpfps = (mpfps_t)x;
  409         mptable_pass1();
  410 
  411         /* flag fact that we are running multiple processors */
  412         mp_capable = 1;
  413         return 1;
  414 }
  415 
  416 
  417 /*
  418  * Startup the SMP processors.
  419  */
  420 void
  421 mp_start(void)
  422 {
  423         POSTCODE(MP_START_POST);
  424 
  425         /* look for MP capable motherboard */
  426         if (mp_capable)
  427                 mp_enable(boot_address);
  428         else
  429                 panic("MP hardware not found!");
  430 }
  431 
  432 
  433 /*
  434  * Print various information about the SMP system hardware and setup.
  435  */
  436 void
  437 mp_announce(void)
  438 {
  439         int     x;
  440 
  441         POSTCODE(MP_ANNOUNCE_POST);
  442 
  443         printf("FreeBSD/SMP: Multiprocessor motherboard: %d CPUs\n", mp_ncpus);
  444         printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
  445         printf(", version: 0x%08x", cpu_apic_versions[0]);
  446         printf(", at 0x%08x\n", cpu_apic_address);
  447         for (x = 1; x <= mp_naps; ++x) {
  448                 printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
  449                 printf(", version: 0x%08x", cpu_apic_versions[x]);
  450                 printf(", at 0x%08x\n", cpu_apic_address);
  451         }
  452 
  453 #if defined(APIC_IO)
  454         for (x = 0; x < mp_napics; ++x) {
  455                 printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
  456                 printf(", version: 0x%08x", io_apic_versions[x]);
  457                 printf(", at 0x%08x\n", io_apic_address[x]);
  458         }
  459 #else
  460         printf(" Warning: APIC I/O disabled\n");
  461 #endif  /* APIC_IO */
  462 }
  463 
  464 /*
  465  * AP cpu's call this to sync up protected mode.
  466  */
  467 void
  468 init_secondary(void)
  469 {
  470         int     gsel_tss;
  471         int     x, myid = bootAP;
  472         u_int   cr0;
  473 
  474         gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
  475         gdt_segs[GPROC0_SEL].ssd_base =
  476                 (int) &SMP_prvspace[myid].globaldata.gd_common_tss;
  477         SMP_prvspace[myid].globaldata.gd_prvspace = &SMP_prvspace[myid];
  478 
  479         for (x = 0; x < NGDT; x++) {
  480                 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
  481         }
  482 
  483         r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
  484         r_gdt.rd_base = (int) &gdt[myid * NGDT];
  485         lgdt(&r_gdt);                   /* does magic intra-segment return */
  486 
  487         lidt(&r_idt);
  488 
  489         lldt(_default_ldt);
  490 #ifdef USER_LDT
  491         currentldt = _default_ldt;
  492 #endif
  493 
  494         gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
  495         gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
  496         common_tss.tss_esp0 = 0;        /* not used until after switch */
  497         common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
  498         common_tss.tss_ioopt = (sizeof common_tss) << 16;
  499         tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd;
  500         common_tssd = *tss_gdt;
  501         ltr(gsel_tss);
  502 
  503         /*
  504          * Set to a known state:
  505          * Set by mpboot.s: CR0_PG, CR0_PE
  506          * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
  507          */
  508         cr0 = rcr0();
  509         cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
  510         load_cr0(cr0);
  511 
  512         pmap_set_opt();
  513 }
  514 
  515 
  516 #if defined(APIC_IO)
  517 /*
  518  * Final configuration of the BSP's local APIC:
  519  *  - disable 'pic mode'.
  520  *  - disable 'virtual wire mode'.
  521  *  - enable NMI.
  522  */
  523 void
  524 bsp_apic_configure(void)
  525 {
  526         u_char          byte;
  527         u_int32_t       temp;
  528 
  529         /* leave 'pic mode' if necessary */
  530         if (picmode) {
  531                 outb(0x22, 0x70);       /* select IMCR */
  532                 byte = inb(0x23);       /* current contents */
  533                 byte |= 0x01;           /* mask external INTR */
  534                 outb(0x23, byte);       /* disconnect 8259s/NMI */
  535         }
  536 
  537         /* mask lint0 (the 8259 'virtual wire' connection) */
  538         temp = lapic.lvt_lint0;
  539         temp |= APIC_LVT_M;             /* set the mask */
  540         lapic.lvt_lint0 = temp;
  541 
  542         /* setup lint1 to handle NMI */
  543         temp = lapic.lvt_lint1;
  544         temp &= ~APIC_LVT_M;            /* clear the mask */
  545         lapic.lvt_lint1 = temp;
  546 
  547         if (bootverbose)
  548                 apic_dump("bsp_apic_configure()");
  549 }
  550 #endif  /* APIC_IO */
  551 
  552 
  553 /*******************************************************************
  554  * local functions and data
  555  */
  556 
  557 typedef struct INTDATA {
  558         u_char  int_type;
  559         u_short int_flags;
  560         u_char  src_bus_id;
  561         u_char  src_bus_irq;
  562         u_char  dst_apic_id;
  563         u_char  dst_apic_int;
  564         u_char  int_vector;
  565 }       io_int, local_int;
  566 
  567 /* the IO INT data, one entry per possible APIC INTerrupt */
  568 static io_int  *io_apic_ints;
  569 
  570 static int nintrs;
  571 
  572 /*
  573  * start the SMP system
  574  */
  575 static void
  576 mp_enable(u_int boot_addr)
  577 {
  578         int     x;
  579 #if defined(APIC_IO)
  580         int     apic;
  581         u_int   ux;
  582 #endif  /* APIC_IO */
  583 
  584         POSTCODE(MP_ENABLE_POST);
  585 
  586         /* turn on 4MB of V == P addressing so we can get to MP table */
  587         *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
  588         invltlb();
  589 
  590         /* examine the MP table for needed info, uses physical addresses */
  591         x = mptable_pass2();
  592 
  593         *(int *)PTD = 0;
  594         invltlb();
  595 
  596         /* can't process default configs till the CPU APIC is pmapped */
  597         if (x)
  598                 default_mp_table(x);
  599 
  600         /* initialize all SMP locks */
  601         init_locks();
  602 
  603         /* post scan cleanup */
  604         fix_mp_table();
  605         setup_apic_irq_mapping();
  606 
  607 #if defined(APIC_IO)
  608 
  609         /* fill the LOGICAL io_apic_versions table */
  610         for (apic = 0; apic < mp_napics; ++apic) {
  611                 ux = io_apic_read(apic, IOAPIC_VER);
  612                 io_apic_versions[apic] = ux;
  613                 if (ux == 0xffffffff) {
  614                         int i;
  615 
  616                         for (i = 0; i < nintrs; i++)
  617                                 if (io_apic_ints[i].dst_apic_id ==
  618                                     IO_TO_ID(apic))
  619                                         panic("Missing IO APIC");
  620                         printf("Skipping broken IO APIC #%d\n", apic);
  621                 } else
  622                         io_apic_set_id(apic, IO_TO_ID(apic));
  623         }
  624 
  625         /* program each IO APIC in the system */
  626         for (apic = 0; apic < mp_napics; ++apic)
  627                 if (io_apic_versions[apic] != 0xffffffff &&
  628                     io_apic_setup(apic) < 0)
  629                         panic("IO APIC setup failure");
  630 
  631         /* install a 'Spurious INTerrupt' vector */
  632         setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
  633                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  634 
  635         /* install an inter-CPU IPI for TLB invalidation */
  636         setidt(XINVLTLB_OFFSET, Xinvltlb,
  637                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  638 
  639 #ifdef BETTER_CLOCK
  640         /* install an inter-CPU IPI for reading processor state */
  641         setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate,
  642                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  643 #endif
  644         
  645         /* install an inter-CPU IPI for all-CPU rendezvous */
  646         setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
  647                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  648 
  649         /* install an inter-CPU IPI for forcing an additional software trap */
  650         setidt(XCPUAST_OFFSET, Xcpuast,
  651                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  652         
  653         /* install an inter-CPU IPI for interrupt forwarding */
  654         setidt(XFORWARD_IRQ_OFFSET, Xforward_irq,
  655                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  656 
  657         /* install an inter-CPU IPI for CPU stop/restart */
  658         setidt(XCPUSTOP_OFFSET, Xcpustop,
  659                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  660 
  661 #if defined(TEST_TEST1)
  662         /* install a "fake hardware INTerrupt" vector */
  663         setidt(XTEST1_OFFSET, Xtest1,
  664                SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  665 #endif  /** TEST_TEST1 */
  666 
  667 #endif  /* APIC_IO */
  668 
  669         /* start each Application Processor */
  670         start_all_aps(boot_addr);
  671 }
  672 
  673 
  674 /*
  675  * look for the MP spec signature
  676  */
  677 
  678 /* string defined by the Intel MP Spec as identifying the MP table */
  679 #define MP_SIG          0x5f504d5f      /* _MP_ */
  680 #define NEXT(X)         ((X) += 4)
  681 static int
  682 search_for_sig(u_int32_t target, int count)
  683 {
  684         int     x;
  685         u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
  686 
  687         for (x = 0; x < count; NEXT(x))
  688                 if (addr[x] == MP_SIG)
  689                         /* make array index a byte index */
  690                         return (target + (x * sizeof(u_int32_t)));
  691 
  692         return -1;
  693 }
  694 
  695 
  696 static basetable_entry basetable_entry_types[] =
  697 {
  698         {0, 20, "Processor"},
  699         {1, 8, "Bus"},
  700         {2, 8, "I/O APIC"},
  701         {3, 8, "I/O INT"},
  702         {4, 8, "Local INT"}
  703 };
  704 
  705 typedef struct BUSDATA {
  706         u_char  bus_id;
  707         enum busTypes bus_type;
  708 }       bus_datum;
  709 
  710 typedef struct BUSTYPENAME {
  711         u_char  type;
  712         char    name[7];
  713 }       bus_type_name;
  714 
  715 static bus_type_name bus_type_table[] =
  716 {
  717         {CBUS, "CBUS"},
  718         {CBUSII, "CBUSII"},
  719         {EISA, "EISA"},
  720         {MCA, "MCA"},
  721         {UNKNOWN_BUSTYPE, "---"},
  722         {ISA, "ISA"},
  723         {MCA, "MCA"},
  724         {UNKNOWN_BUSTYPE, "---"},
  725         {UNKNOWN_BUSTYPE, "---"},
  726         {UNKNOWN_BUSTYPE, "---"},
  727         {UNKNOWN_BUSTYPE, "---"},
  728         {UNKNOWN_BUSTYPE, "---"},
  729         {PCI, "PCI"},
  730         {UNKNOWN_BUSTYPE, "---"},
  731         {UNKNOWN_BUSTYPE, "---"},
  732         {UNKNOWN_BUSTYPE, "---"},
  733         {UNKNOWN_BUSTYPE, "---"},
  734         {XPRESS, "XPRESS"},
  735         {UNKNOWN_BUSTYPE, "---"}
  736 };
  737 /* from MP spec v1.4, table 5-1 */
  738 static int default_data[7][5] =
  739 {
  740 /*   nbus, id0, type0, id1, type1 */
  741         {1, 0, ISA, 255, 255},
  742         {1, 0, EISA, 255, 255},
  743         {1, 0, EISA, 255, 255},
  744         {1, 0, MCA, 255, 255},
  745         {2, 0, ISA, 1, PCI},
  746         {2, 0, EISA, 1, PCI},
  747         {2, 0, MCA, 1, PCI}
  748 };
  749 
  750 
  751 /* the bus data */
  752 static bus_datum *bus_data;
  753 
  754 static int processor_entry      __P((proc_entry_ptr entry, int cpu));
  755 static int bus_entry            __P((bus_entry_ptr entry, int bus));
  756 static int io_apic_entry        __P((io_apic_entry_ptr entry, int apic));
  757 static int int_entry            __P((int_entry_ptr entry, int intr));
  758 static int lookup_bus_type      __P((char *name));
  759 
  760 
  761 /*
  762  * 1st pass on motherboard's Intel MP specification table.
  763  *
  764  * initializes:
  765  *      mp_ncpus = 1
  766  *
  767  * determines:
  768  *      cpu_apic_address (common to all CPUs)
  769  *      io_apic_address[N]
  770  *      mp_naps
  771  *      mp_nbusses
  772  *      mp_napics
  773  *      nintrs
  774  */
  775 static void
  776 mptable_pass1(void)
  777 {
  778         int     x;
  779         mpcth_t cth;
  780         int     totalSize;
  781         void*   position;
  782         int     count;
  783         int     type;
  784         u_int   id_mask;
  785 
  786         POSTCODE(MPTABLE_PASS1_POST);
  787 
  788         /* clear various tables */
  789         for (x = 0; x < NAPICID; ++x) {
  790                 io_apic_address[x] = ~0;        /* IO APIC address table */
  791         }
  792 
  793         /* init everything to empty */
  794         mp_naps = 0;
  795         mp_nbusses = 0;
  796         mp_napics = 0;
  797         nintrs = 0;
  798         id_mask = 0;
  799 
  800         /* check for use of 'default' configuration */
  801         if (MPFPS_MPFB1 != 0) {
  802                 /* use default addresses */
  803                 cpu_apic_address = DEFAULT_APIC_BASE;
  804                 io_apic_address[0] = DEFAULT_IO_APIC_BASE;
  805 
  806                 /* fill in with defaults */
  807                 mp_naps = 2;            /* includes BSP */
  808                 mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
  809 #if defined(APIC_IO)
  810                 mp_napics = 1;
  811                 nintrs = 16;
  812 #endif  /* APIC_IO */
  813         }
  814         else {
  815                 if ((cth = mpfps->pap) == 0)
  816                         panic("MP Configuration Table Header MISSING!");
  817 
  818                 cpu_apic_address = (vm_offset_t) cth->apic_address;
  819 
  820                 /* walk the table, recording info of interest */
  821                 totalSize = cth->base_table_length - sizeof(struct MPCTH);
  822                 position = (u_char *) cth + sizeof(struct MPCTH);
  823                 count = cth->entry_count;
  824 
  825                 while (count--) {
  826                         switch (type = *(u_char *) position) {
  827                         case 0: /* processor_entry */
  828                                 if (((proc_entry_ptr)position)->cpu_flags
  829                                     & PROCENTRY_FLAG_EN) {
  830                                         ++mp_naps;
  831                                         id_mask |= 1 <<
  832                                             ((proc_entry_ptr)position)->apic_id;
  833                                 }
  834                                 break;
  835                         case 1: /* bus_entry */
  836                                 ++mp_nbusses;
  837                                 break;
  838                         case 2: /* io_apic_entry */
  839                                 if (((io_apic_entry_ptr)position)->apic_flags
  840                                         & IOAPICENTRY_FLAG_EN)
  841                                         io_apic_address[mp_napics++] =
  842                                             (vm_offset_t)((io_apic_entry_ptr)
  843                                                 position)->apic_address;
  844                                 break;
  845                         case 3: /* int_entry */
  846                                 ++nintrs;
  847                                 break;
  848                         case 4: /* int_entry */
  849                                 break;
  850                         default:
  851                                 panic("mpfps Base Table HOSED!");
  852                                 /* NOTREACHED */
  853                         }
  854 
  855                         totalSize -= basetable_entry_types[type].length;
  856                         (u_char*)position += basetable_entry_types[type].length;
  857                 }
  858         }
  859 
  860         /* qualify the numbers */
  861         if (mp_naps > MAXCPU) {
  862                 printf("Warning: only using %d of %d available CPUs!\n",
  863                         MAXCPU, mp_naps);
  864                 mp_naps = MAXCPU;
  865         }
  866 
  867         /* See if we need to fixup HT logical CPUs. */
  868         mptable_hyperthread_fixup(id_mask);
  869         
  870         /*
  871          * Count the BSP.
  872          * This is also used as a counter while starting the APs.
  873          */
  874         mp_ncpus = 1;
  875 
  876         --mp_naps;      /* subtract the BSP */
  877 }
  878 
  879 
  880 /*
  881  * 2nd pass on motherboard's Intel MP specification table.
  882  *
  883  * sets:
  884  *      boot_cpu_id
  885  *      ID_TO_IO(N), phy APIC ID to log CPU/IO table
  886  *      CPU_TO_ID(N), logical CPU to APIC ID table
  887  *      IO_TO_ID(N), logical IO to APIC ID table
  888  *      bus_data[N]
  889  *      io_apic_ints[N]
  890  */
  891 static int
  892 mptable_pass2(void)
  893 {
  894         struct PROCENTRY proc;
  895         int     x;
  896         mpcth_t cth;
  897         int     totalSize;
  898         void*   position;
  899         int     count;
  900         int     type;
  901         int     apic, bus, cpu, intr;
  902         int     i, j;
  903         int     pgeflag;
  904 
  905         POSTCODE(MPTABLE_PASS2_POST);
  906 
  907         /* Initialize fake proc entry for use with HT fixup. */
  908         bzero(&proc, sizeof(proc));
  909         proc.type = 0;
  910         proc.cpu_flags = PROCENTRY_FLAG_EN;
  911 
  912         pgeflag = 0;            /* XXX - Not used under SMP yet.  */
  913 
  914         MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
  915             M_DEVBUF, M_WAITOK);
  916         MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
  917             M_DEVBUF, M_WAITOK);
  918         MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
  919             M_DEVBUF, M_WAITOK);
  920         MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
  921             M_DEVBUF, M_WAITOK);
  922 
  923         bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
  924 
  925         for (i = 0; i < mp_napics; i++) {
  926                 for (j = 0; j < mp_napics; j++) {
  927                         /* same page frame as a previous IO apic? */
  928                         if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
  929                             (io_apic_address[i] & PG_FRAME)) {
  930                                 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
  931                                         + (NPTEPG-2-j) * PAGE_SIZE
  932                                         + (io_apic_address[i] & PAGE_MASK));
  933                                 break;
  934                         }
  935                         /* use this slot if available */
  936                         if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
  937                                 SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
  938                                     pgeflag | (io_apic_address[i] & PG_FRAME));
  939                                 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
  940                                         + (NPTEPG-2-j) * PAGE_SIZE
  941                                         + (io_apic_address[i] & PAGE_MASK));
  942                                 break;
  943                         }
  944                 }
  945         }
  946 
  947         /* clear various tables */
  948         for (x = 0; x < NAPICID; ++x) {
  949                 ID_TO_IO(x) = -1;       /* phy APIC ID to log CPU/IO table */
  950                 CPU_TO_ID(x) = -1;      /* logical CPU to APIC ID table */
  951                 IO_TO_ID(x) = -1;       /* logical IO to APIC ID table */
  952         }
  953 
  954         /* clear bus data table */
  955         for (x = 0; x < mp_nbusses; ++x)
  956                 bus_data[x].bus_id = 0xff;
  957 
  958         /* clear IO APIC INT table */
  959         for (x = 0; x < (nintrs + 1); ++x) {
  960                 io_apic_ints[x].int_type = 0xff;
  961                 io_apic_ints[x].int_vector = 0xff;
  962         }
  963 
  964         /* setup the cpu/apic mapping arrays */
  965         boot_cpu_id = -1;
  966 
  967         /* record whether PIC or virtual-wire mode */
  968         picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
  969 
  970         /* check for use of 'default' configuration */
  971         if (MPFPS_MPFB1 != 0)
  972                 return MPFPS_MPFB1;     /* return default configuration type */
  973 
  974         if ((cth = mpfps->pap) == 0)
  975                 panic("MP Configuration Table Header MISSING!");
  976 
  977         /* walk the table, recording info of interest */
  978         totalSize = cth->base_table_length - sizeof(struct MPCTH);
  979         position = (u_char *) cth + sizeof(struct MPCTH);
  980         count = cth->entry_count;
  981         apic = bus = intr = 0;
  982         cpu = 1;                                /* pre-count the BSP */
  983 
  984         while (count--) {
  985                 switch (type = *(u_char *) position) {
  986                 case 0:
  987                         if (processor_entry(position, cpu))
  988                                 ++cpu;
  989 
  990                         if (need_hyperthreading_fixup) {
  991                                 /*
  992                                  * Create fake mptable processor entries
  993                                  * and feed them to processor_entry() to
  994                                  * enumerate the logical CPUs.
  995                                  */
  996                                 proc.apic_id = ((proc_entry_ptr)position)->apic_id;
  997                                 for (i = 1; i < logical_cpus; i++) {
  998                                         proc.apic_id++;
  999                                         (void)processor_entry(&proc, cpu);
 1000                                         logical_cpus_mask |= (1 << cpu);
 1001                                         if (hyperthreading_cpus > 1 &&
 1002                                             proc.apic_id % hyperthreading_cpus != 0)
 1003                                                 hyperthreading_cpus_mask |= (1 << cpu);
 1004                                         cpu++;
 1005                                 }
 1006                         } else if (logical_cpus != 0) {
 1007                                 u_int id = ((proc_entry_ptr)position)->apic_id;
 1008 
 1009                                 /*
 1010                                  * If this is an already-enumerated logical
 1011                                  * CPU, add it to the bitmap.
 1012                                  */
 1013                                 if (id % logical_cpus != 0)
 1014                                         logical_cpus_mask |= (1 << ID_TO_CPU(id));
 1015                                 if (hyperthreading_cpus > 1 &&
 1016                                     id % hyperthreading_cpus != 0)
 1017                                         hyperthreading_cpus_mask |= (1 << ID_TO_CPU(id));
 1018                         }
 1019                         break;
 1020                 case 1:
 1021                         if (bus_entry(position, bus))
 1022                                 ++bus;
 1023                         break;
 1024                 case 2:
 1025                         if (io_apic_entry(position, apic))
 1026                                 ++apic;
 1027                         break;
 1028                 case 3:
 1029                         if (int_entry(position, intr))
 1030                                 ++intr;
 1031                         break;
 1032                 case 4:
 1033                         /* int_entry(position); */
 1034                         break;
 1035                 default:
 1036                         panic("mpfps Base Table HOSED!");
 1037                         /* NOTREACHED */
 1038                 }
 1039 
 1040                 totalSize -= basetable_entry_types[type].length;
 1041                 (u_char *) position += basetable_entry_types[type].length;
 1042         }
 1043 
 1044         if (boot_cpu_id == -1)
 1045                 panic("NO BSP found!");
 1046 
 1047         /* report fact that its NOT a default configuration */
 1048         return 0;
 1049 }
 1050 
 1051 /*
 1052  * Check if we should perform a hyperthreading "fix-up" to
 1053  * enumerate any logical CPU's that aren't already listed
 1054  * in the table.
 1055  *
 1056  * XXX: We assume that all of the physical CPUs in the
 1057  * system have the same number of logical CPUs.
 1058  *
 1059  * XXX: We assume that APIC ID's are allocated such that
 1060  * the APIC ID's for a physical processor are aligned
 1061  * with the number of logical CPU's in the processor.
 1062  */
 1063 static void
 1064 mptable_hyperthread_fixup(u_int id_mask)
 1065 {
 1066         u_int i, id;
 1067         u_int threads_per_cache, p[4];
 1068 
 1069         /* Nothing to do if there is no HTT support. */
 1070         if ((cpu_feature & CPUID_HTT) == 0)
 1071                 return;
 1072         logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
 1073         if (logical_cpus <= 1)
 1074                 return;
 1075 
 1076         /*
 1077          * Work out if hyperthreading is *really* enabled.  This
 1078          * is made really ugly by the fact that processors lie: Dual
 1079          * core processors claim to be hyperthreaded even when they're
 1080          * not, presumably because they want to be treated the same
 1081          * way as HTT with respect to per-cpu software licensing.
 1082          * At the time of writing (May 12, 2005) the only hyperthreaded
 1083          * cpus are from Intel, and Intel's dual-core processors can be
 1084          * identified via the "deterministic cache parameters" cpuid
 1085          * calls.
 1086          */
 1087         /*
 1088          * First determine if this is an Intel processor which claims
 1089          * to have hyperthreading support.
 1090          */
 1091         if ((cpu_feature & CPUID_HTT) &&
 1092             (strcmp(cpu_vendor, "GenuineIntel") == 0)) {
 1093                 /*
 1094                  * If the "deterministic cache parameters" cpuid calls
 1095                  * are available, use them.
 1096                  */
 1097                 if (cpu_high >= 4) {
 1098                         /* Ask the processor about the L1 cache. */
 1099                         for (i = 0; i < 1; i++) {
 1100                                 cpuid_count(4, i, p);
 1101                                 threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1;
 1102                                 if (hyperthreading_cpus < threads_per_cache)
 1103                                         hyperthreading_cpus = threads_per_cache;
 1104                                 if ((p[0] & 0x1f) == 0)
 1105                                         break;
 1106                         }
 1107                 }
 1108 
 1109                 /*
 1110                  * If the deterministic cache parameters are not
 1111                  * available, or if no caches were reported to exist,
 1112                  * just accept what the HTT flag indicated.
 1113                  */
 1114                 if (hyperthreading_cpus == 0)
 1115                         hyperthreading_cpus = logical_cpus;
 1116         }
 1117 
 1118         /*
 1119          * For each APIC ID of a CPU that is set in the mask,
 1120          * scan the other candidate APIC ID's for this
 1121          * physical processor.  If any of those ID's are
 1122          * already in the table, then kill the fixup.
 1123          */
 1124         for (id = 0; id <= MAXCPU; id++) {
 1125                 if ((id_mask & 1 << id) == 0)
 1126                         continue;
 1127                 /* First, make sure we are on a logical_cpus boundary. */
 1128                 if (id % logical_cpus != 0)
 1129                         return;
 1130                 for (i = id + 1; i < id + logical_cpus; i++)
 1131                         if ((id_mask & 1 << i) != 0)
 1132                                 return;
 1133         }
 1134 
 1135         /*
 1136          * Ok, the ID's checked out, so enable the fixup.  We have to fixup
 1137          * mp_naps right now.
 1138          */
 1139         need_hyperthreading_fixup = 1;
 1140         mp_naps *= logical_cpus;
 1141 }
 1142 
 1143 void
 1144 assign_apic_irq(int apic, int intpin, int irq)
 1145 {
 1146         int x;
 1147         
 1148         if (int_to_apicintpin[irq].ioapic != -1)
 1149                 panic("assign_apic_irq: inconsistent table");
 1150         
 1151         int_to_apicintpin[irq].ioapic = apic;
 1152         int_to_apicintpin[irq].int_pin = intpin;
 1153         int_to_apicintpin[irq].apic_address = ioapic[apic];
 1154         int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
 1155         
 1156         for (x = 0; x < nintrs; x++) {
 1157                 if ((io_apic_ints[x].int_type == 0 || 
 1158                      io_apic_ints[x].int_type == 3) &&
 1159                     io_apic_ints[x].int_vector == 0xff &&
 1160                     io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
 1161                     io_apic_ints[x].dst_apic_int == intpin)
 1162                         io_apic_ints[x].int_vector = irq;
 1163         }
 1164 }
 1165 
 1166 void
 1167 revoke_apic_irq(int irq)
 1168 {
 1169         int x;
 1170         int oldapic;
 1171         int oldintpin;
 1172         
 1173         if (int_to_apicintpin[irq].ioapic == -1)
 1174                 panic("revoke_apic_irq: inconsistent table");
 1175         
 1176         oldapic = int_to_apicintpin[irq].ioapic;
 1177         oldintpin = int_to_apicintpin[irq].int_pin;
 1178 
 1179         int_to_apicintpin[irq].ioapic = -1;
 1180         int_to_apicintpin[irq].int_pin = 0;
 1181         int_to_apicintpin[irq].apic_address = NULL;
 1182         int_to_apicintpin[irq].redirindex = 0;
 1183         
 1184         for (x = 0; x < nintrs; x++) {
 1185                 if ((io_apic_ints[x].int_type == 0 || 
 1186                      io_apic_ints[x].int_type == 3) &&
 1187                     io_apic_ints[x].int_vector != 0xff &&
 1188                     io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
 1189                     io_apic_ints[x].dst_apic_int == oldintpin)
 1190                         io_apic_ints[x].int_vector = 0xff;
 1191         }
 1192 }
 1193 
 1194 
 1195 static void
 1196 allocate_apic_irq(int intr)
 1197 {
 1198         int apic;
 1199         int intpin;
 1200         int irq;
 1201         
 1202         if (io_apic_ints[intr].int_vector != 0xff)
 1203                 return;         /* Interrupt handler already assigned */
 1204         
 1205         if (io_apic_ints[intr].int_type != 0 &&
 1206             (io_apic_ints[intr].int_type != 3 ||
 1207              (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
 1208               io_apic_ints[intr].dst_apic_int == 0)))
 1209                 return;         /* Not INT or ExtInt on != (0, 0) */
 1210         
 1211         irq = 0;
 1212         while (irq < APIC_INTMAPSIZE &&
 1213                int_to_apicintpin[irq].ioapic != -1)
 1214                 irq++;
 1215         
 1216         if (irq >= APIC_INTMAPSIZE)
 1217                 return;         /* No free interrupt handlers */
 1218         
 1219         apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
 1220         intpin = io_apic_ints[intr].dst_apic_int;
 1221         
 1222         assign_apic_irq(apic, intpin, irq);
 1223         io_apic_setup_intpin(apic, intpin);
 1224 }
 1225 
 1226 
 1227 static void
 1228 swap_apic_id(int apic, int oldid, int newid)
 1229 {
 1230         int x;
 1231         int oapic;
 1232         
 1233 
 1234         if (oldid == newid)
 1235                 return;                 /* Nothing to do */
 1236         
 1237         printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
 1238                apic, oldid, newid);
 1239         
 1240         /* Swap physical APIC IDs in interrupt entries */
 1241         for (x = 0; x < nintrs; x++) {
 1242                 if (io_apic_ints[x].dst_apic_id == oldid)
 1243                         io_apic_ints[x].dst_apic_id = newid;
 1244                 else if (io_apic_ints[x].dst_apic_id == newid)
 1245                         io_apic_ints[x].dst_apic_id = oldid;
 1246         }
 1247         
 1248         /* Swap physical APIC IDs in IO_TO_ID mappings */
 1249         for (oapic = 0; oapic < mp_napics; oapic++)
 1250                 if (IO_TO_ID(oapic) == newid)
 1251                         break;
 1252         
 1253         if (oapic < mp_napics) {
 1254                 printf("Changing APIC ID for IO APIC #%d from "
 1255                        "%d to %d in MP table\n",
 1256                        oapic, newid, oldid);
 1257                 IO_TO_ID(oapic) = oldid;
 1258         }
 1259         IO_TO_ID(apic) = newid;
 1260 }
 1261 
 1262 
 1263 static void
 1264 fix_id_to_io_mapping(void)
 1265 {
 1266         int x;
 1267 
 1268         for (x = 0; x < NAPICID; x++)
 1269                 ID_TO_IO(x) = -1;
 1270         
 1271         for (x = 0; x <= mp_naps; x++)
 1272                 if (CPU_TO_ID(x) < NAPICID)
 1273                         ID_TO_IO(CPU_TO_ID(x)) = x;
 1274         
 1275         for (x = 0; x < mp_napics; x++)
 1276                 if (IO_TO_ID(x) < NAPICID)
 1277                         ID_TO_IO(IO_TO_ID(x)) = x;
 1278 }
 1279 
 1280 
 1281 static int
 1282 first_free_apic_id(void)
 1283 {
 1284         int freeid, x;
 1285         
 1286         for (freeid = 0; freeid < NAPICID; freeid++) {
 1287                 for (x = 0; x <= mp_naps; x++)
 1288                         if (CPU_TO_ID(x) == freeid)
 1289                                 break;
 1290                 if (x <= mp_naps)
 1291                         continue;
 1292                 for (x = 0; x < mp_napics; x++)
 1293                         if (IO_TO_ID(x) == freeid)
 1294                                 break;
 1295                 if (x < mp_napics)
 1296                         continue;
 1297                 return freeid;
 1298         }
 1299         return freeid;
 1300 }
 1301 
 1302 
 1303 static int
 1304 io_apic_id_acceptable(int apic, int id)
 1305 {
 1306         int cpu;                /* Logical CPU number */
 1307         int oapic;              /* Logical IO APIC number for other IO APIC */
 1308 
 1309         if (id >= NAPICID)
 1310                 return 0;       /* Out of range */
 1311         
 1312         for (cpu = 0; cpu <= mp_naps; cpu++)
 1313                 if (CPU_TO_ID(cpu) == id)
 1314                         return 0;       /* Conflict with CPU */
 1315         
 1316         for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
 1317                 if (IO_TO_ID(oapic) == id)
 1318                         return 0;       /* Conflict with other APIC */
 1319         
 1320         return 1;               /* ID is acceptable for IO APIC */
 1321 }
 1322 
 1323 
 1324 /*
 1325  * parse an Intel MP specification table
 1326  */
 1327 static void
 1328 fix_mp_table(void)
 1329 {
 1330         int     x;
 1331         int     id;
 1332         int     bus_0 = 0;      /* Stop GCC warning */
 1333         int     bus_pci = 0;    /* Stop GCC warning */
 1334         int     num_pci_bus;
 1335         int     apic;           /* IO APIC unit number */
 1336         int     freeid;         /* Free physical APIC ID */
 1337         int     physid;         /* Current physical IO APIC ID */
 1338 
 1339         /*
 1340          * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
 1341          * did it wrong.  The MP spec says that when more than 1 PCI bus
 1342          * exists the BIOS must begin with bus entries for the PCI bus and use
 1343          * actual PCI bus numbering.  This implies that when only 1 PCI bus
 1344          * exists the BIOS can choose to ignore this ordering, and indeed many
 1345          * MP motherboards do ignore it.  This causes a problem when the PCI
 1346          * sub-system makes requests of the MP sub-system based on PCI bus
 1347          * numbers.     So here we look for the situation and renumber the
 1348          * busses and associated INTs in an effort to "make it right".
 1349          */
 1350 
 1351         /* find bus 0, PCI bus, count the number of PCI busses */
 1352         for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
 1353                 if (bus_data[x].bus_id == 0) {
 1354                         bus_0 = x;
 1355                 }
 1356                 if (bus_data[x].bus_type == PCI) {
 1357                         ++num_pci_bus;
 1358                         bus_pci = x;
 1359                 }
 1360         }
 1361         /*
 1362          * bus_0 == slot of bus with ID of 0
 1363          * bus_pci == slot of last PCI bus encountered
 1364          */
 1365 
 1366         /* check the 1 PCI bus case for sanity */
 1367         /* if it is number 0 all is well */
 1368         if (num_pci_bus == 1 &&
 1369             bus_data[bus_pci].bus_id != 0) {
 1370                 
 1371                 /* mis-numbered, swap with whichever bus uses slot 0 */
 1372 
 1373                 /* swap the bus entry types */
 1374                 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
 1375                 bus_data[bus_0].bus_type = PCI;
 1376 
 1377                 /* swap each relavant INTerrupt entry */
 1378                 id = bus_data[bus_pci].bus_id;
 1379                 for (x = 0; x < nintrs; ++x) {
 1380                         if (io_apic_ints[x].src_bus_id == id) {
 1381                                 io_apic_ints[x].src_bus_id = 0;
 1382                         }
 1383                         else if (io_apic_ints[x].src_bus_id == 0) {
 1384                                 io_apic_ints[x].src_bus_id = id;
 1385                         }
 1386                 }
 1387         }
 1388 
 1389         /* Assign IO APIC IDs.
 1390          * 
 1391          * First try the existing ID. If a conflict is detected, try
 1392          * the ID in the MP table.  If a conflict is still detected, find
 1393          * a free id.
 1394          *
 1395          * We cannot use the ID_TO_IO table before all conflicts has been
 1396          * resolved and the table has been corrected.
 1397          */
 1398         for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
 1399                 
 1400                 /* First try to use the value set by the BIOS */
 1401                 physid = io_apic_get_id(apic);
 1402                 if (io_apic_id_acceptable(apic, physid)) {
 1403                         if (IO_TO_ID(apic) != physid)
 1404                                 swap_apic_id(apic, IO_TO_ID(apic), physid);
 1405                         continue;
 1406                 }
 1407 
 1408                 /* Then check if the value in the MP table is acceptable */
 1409                 if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
 1410                         continue;
 1411 
 1412                 /* Last resort, find a free APIC ID and use it */
 1413                 freeid = first_free_apic_id();
 1414                 if (freeid >= NAPICID)
 1415                         panic("No free physical APIC IDs found");
 1416                 
 1417                 if (io_apic_id_acceptable(apic, freeid)) {
 1418                         swap_apic_id(apic, IO_TO_ID(apic), freeid);
 1419                         continue;
 1420                 }
 1421                 panic("Free physical APIC ID not usable");
 1422         }
 1423         fix_id_to_io_mapping();
 1424 
 1425         /* detect and fix broken Compaq MP table */
 1426         if (apic_int_type(0, 0) == -1) {
 1427                 printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
 1428                 io_apic_ints[nintrs].int_type = 3;      /* ExtInt */
 1429                 io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */
 1430                 /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
 1431                 io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
 1432                 io_apic_ints[nintrs].dst_apic_int = 0;  /* Pin 0 */
 1433                 nintrs++;
 1434         } else if (apic_int_type(0, 0) == 0) {
 1435                 printf("APIC_IO: MP table broken: ExtINT entry corrupt!\n");
 1436                 for (x = 0; x < nintrs; ++x)
 1437                         if ((0 == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1438                             (0 == io_apic_ints[x].dst_apic_int)) {
 1439                                 io_apic_ints[x].int_type = 3;
 1440                                 io_apic_ints[x].int_vector = 0xff;
 1441                                 break;
 1442                         }
 1443         }
 1444 }
 1445 
 1446 
 1447 /* Assign low level interrupt handlers */
 1448 static void
 1449 setup_apic_irq_mapping(void)
 1450 {
 1451         int     x;
 1452         int     int_vector;
 1453 
 1454         /* Clear array */
 1455         for (x = 0; x < APIC_INTMAPSIZE; x++) {
 1456                 int_to_apicintpin[x].ioapic = -1;
 1457                 int_to_apicintpin[x].int_pin = 0;
 1458                 int_to_apicintpin[x].apic_address = NULL;
 1459                 int_to_apicintpin[x].redirindex = 0;
 1460         }
 1461 
 1462         /* First assign ISA/EISA interrupts */
 1463         for (x = 0; x < nintrs; x++) {
 1464                 int_vector = io_apic_ints[x].src_bus_irq;
 1465                 if (int_vector < APIC_INTMAPSIZE &&
 1466                     io_apic_ints[x].int_vector == 0xff && 
 1467                     int_to_apicintpin[int_vector].ioapic == -1 &&
 1468                     (apic_int_is_bus_type(x, ISA) ||
 1469                      apic_int_is_bus_type(x, EISA)) &&
 1470                     io_apic_ints[x].int_type == 0) {
 1471                         assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), 
 1472                                         io_apic_ints[x].dst_apic_int,
 1473                                         int_vector);
 1474                 }
 1475         }
 1476 
 1477         /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
 1478         for (x = 0; x < nintrs; x++) {
 1479                 if (io_apic_ints[x].dst_apic_int == 0 &&
 1480                     io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
 1481                     io_apic_ints[x].int_vector == 0xff && 
 1482                     int_to_apicintpin[0].ioapic == -1 &&
 1483                     io_apic_ints[x].int_type == 3) {
 1484                         assign_apic_irq(0, 0, 0);
 1485                         break;
 1486                 }
 1487         }
 1488         /* PCI interrupt assignment is deferred */
 1489 }
 1490 
 1491 
 1492 static int
 1493 processor_entry(proc_entry_ptr entry, int cpu)
 1494 {
 1495         /* check for usability */
 1496         if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
 1497                 return 0;
 1498 
 1499         if(entry->apic_id >= NAPICID)
 1500                 panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
 1501         /* check for BSP flag */
 1502         if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
 1503                 boot_cpu_id = entry->apic_id;
 1504                 CPU_TO_ID(0) = entry->apic_id;
 1505                 ID_TO_CPU(entry->apic_id) = 0;
 1506                 return 0;       /* its already been counted */
 1507         }
 1508 
 1509         /* add another AP to list, if less than max number of CPUs */
 1510         else if (cpu < MAXCPU) {
 1511                 CPU_TO_ID(cpu) = entry->apic_id;
 1512                 ID_TO_CPU(entry->apic_id) = cpu;
 1513                 return 1;
 1514         }
 1515 
 1516         return 0;
 1517 }
 1518 
 1519 
 1520 static int
 1521 bus_entry(bus_entry_ptr entry, int bus)
 1522 {
 1523         int     x;
 1524         char    c, name[8];
 1525 
 1526         /* encode the name into an index */
 1527         for (x = 0; x < 6; ++x) {
 1528                 if ((c = entry->bus_type[x]) == ' ')
 1529                         break;
 1530                 name[x] = c;
 1531         }
 1532         name[x] = '\0';
 1533 
 1534         if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
 1535                 panic("unknown bus type: '%s'", name);
 1536 
 1537         bus_data[bus].bus_id = entry->bus_id;
 1538         bus_data[bus].bus_type = x;
 1539 
 1540         return 1;
 1541 }
 1542 
 1543 
 1544 static int
 1545 io_apic_entry(io_apic_entry_ptr entry, int apic)
 1546 {
 1547         if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
 1548                 return 0;
 1549 
 1550         IO_TO_ID(apic) = entry->apic_id;
 1551         if (entry->apic_id < NAPICID)
 1552                 ID_TO_IO(entry->apic_id) = apic;
 1553 
 1554         return 1;
 1555 }
 1556 
 1557 
 1558 static int
 1559 lookup_bus_type(char *name)
 1560 {
 1561         int     x;
 1562 
 1563         for (x = 0; x < MAX_BUSTYPE; ++x)
 1564                 if (strcmp(bus_type_table[x].name, name) == 0)
 1565                         return bus_type_table[x].type;
 1566 
 1567         return UNKNOWN_BUSTYPE;
 1568 }
 1569 
 1570 
 1571 static int
 1572 int_entry(int_entry_ptr entry, int intr)
 1573 {
 1574         int apic;
 1575 
 1576         io_apic_ints[intr].int_type = entry->int_type;
 1577         io_apic_ints[intr].int_flags = entry->int_flags;
 1578         io_apic_ints[intr].src_bus_id = entry->src_bus_id;
 1579         io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
 1580         if (entry->dst_apic_id == 255) {
 1581                 /* This signal goes to all IO APICS.  Select an IO APIC
 1582                    with sufficient number of interrupt pins */
 1583                 for (apic = 0; apic < mp_napics; apic++) {
 1584                         if (io_apic_read(apic, IOAPIC_VER) == 0xffffffff)
 1585                                 continue;
 1586                         if (((io_apic_read(apic, IOAPIC_VER) & 
 1587                               IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= 
 1588                             entry->dst_apic_int)
 1589                                 break;
 1590                 }
 1591                 if (apic < mp_napics)
 1592                         io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
 1593                 else
 1594                         io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 1595         } else
 1596                 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 1597         io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
 1598 
 1599         return 1;
 1600 }
 1601 
 1602 
 1603 static int
 1604 apic_int_is_bus_type(int intr, int bus_type)
 1605 {
 1606         int     bus;
 1607 
 1608         for (bus = 0; bus < mp_nbusses; ++bus)
 1609                 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
 1610                     && ((int) bus_data[bus].bus_type == bus_type))
 1611                         return 1;
 1612 
 1613         return 0;
 1614 }
 1615 
 1616 
 1617 /*
 1618  * Given a traditional ISA INT mask, return an APIC mask.
 1619  */
 1620 u_int
 1621 isa_apic_mask(u_int isa_mask)
 1622 {
 1623         int isa_irq;
 1624         int apic_pin;
 1625 
 1626 #if defined(SKIP_IRQ15_REDIRECT)
 1627         if (isa_mask == (1 << 15)) {
 1628                 printf("skipping ISA IRQ15 redirect\n");
 1629                 return isa_mask;
 1630         }
 1631 #endif  /* SKIP_IRQ15_REDIRECT */
 1632 
 1633         isa_irq = ffs(isa_mask);                /* find its bit position */
 1634         if (isa_irq == 0)                       /* doesn't exist */
 1635                 return 0;
 1636         --isa_irq;                              /* make it zero based */
 1637 
 1638         apic_pin = isa_apic_irq(isa_irq);       /* look for APIC connection */
 1639         if (apic_pin == -1)
 1640                 return 0;
 1641 
 1642         return (1 << apic_pin);                 /* convert pin# to a mask */
 1643 }
 1644 
 1645 
 1646 /*
 1647  * Determine which APIC pin an ISA/EISA INT is attached to.
 1648  */
 1649 #define INTTYPE(I)      (io_apic_ints[(I)].int_type)
 1650 #define INTPIN(I)       (io_apic_ints[(I)].dst_apic_int)
 1651 #define INTIRQ(I)       (io_apic_ints[(I)].int_vector)
 1652 #define INTAPIC(I)      (ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
 1653 
 1654 #define SRCBUSIRQ(I)    (io_apic_ints[(I)].src_bus_irq)
 1655 int
 1656 isa_apic_irq(int isa_irq)
 1657 {
 1658         int     intr;
 1659 
 1660         for (intr = 0; intr < nintrs; ++intr) {         /* check each record */
 1661                 if (INTTYPE(intr) == 0) {               /* standard INT */
 1662                         if (SRCBUSIRQ(intr) == isa_irq) {
 1663                                 if (apic_int_is_bus_type(intr, ISA) ||
 1664                                     apic_int_is_bus_type(intr, EISA)) {
 1665                                         if (INTIRQ(intr) == 0xff)
 1666                                                 return -1; /* unassigned */
 1667                                         return INTIRQ(intr);    /* found */
 1668                                 }
 1669                         }
 1670                 }
 1671         }
 1672         return -1;                                      /* NOT found */
 1673 }
 1674 
 1675 
 1676 /*
 1677  * Determine which APIC pin a PCI INT is attached to.
 1678  */
 1679 #define SRCBUSID(I)     (io_apic_ints[(I)].src_bus_id)
 1680 #define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
 1681 #define SRCBUSLINE(I)   (io_apic_ints[(I)].src_bus_irq & 0x03)
 1682 static int
 1683 pci_apic_irq_raw(int pciBus, int pciDevice, int pciInt)
 1684 {
 1685         int     intr;
 1686 
 1687         --pciInt;                                       /* zero based */
 1688 
 1689         for (intr = 0; intr < nintrs; ++intr)           /* check each record */
 1690                 if ((INTTYPE(intr) == 0)                /* standard INT */
 1691                     && (SRCBUSID(intr) == pciBus)
 1692                     && (SRCBUSDEVICE(intr) == pciDevice)
 1693                     && (SRCBUSLINE(intr) == pciInt))    /* a candidate IRQ */
 1694                         if (apic_int_is_bus_type(intr, PCI)) {
 1695                                 if (INTIRQ(intr) == 0xff)
 1696                                         allocate_apic_irq(intr);
 1697                                 if (INTIRQ(intr) == 0xff)
 1698                                         return -1;      /* unassigned */
 1699                                 return INTIRQ(intr);    /* exact match */
 1700                         }
 1701 
 1702         return -1;                                      /* NOT found */
 1703 }
 1704 
 1705 static int
 1706 pci_apic_bus_present(int bus)
 1707 {
 1708         int intr;
 1709 
 1710         for (intr = 0; intr < nintrs; ++intr)
 1711                 if ((INTTYPE(intr) == 0) && (SRCBUSID(intr) == bus))
 1712                         return (1);
 1713         return (0);
 1714 }
 1715 
 1716 int
 1717 pci_apic_irq(int bus, int device, int pin, void *arg)
 1718 {
 1719         device_t dev, bus_dev, pcib, parent;
 1720         int irq;
 1721 
 1722         parent = (device_t)arg;
 1723         pcib = NULL;
 1724 loop:
 1725         /* See if there is an exact match first. */
 1726         if (bootverbose) {
 1727                 printf("APIC_IO: trying to route %d:%d INT%c\n", bus, device,
 1728                     pin + 'A' - 1);
 1729         }
 1730         irq = pci_apic_irq_raw(bus, device, pin);
 1731         if (irq != -1)
 1732                 return (irq);
 1733 
 1734         /* If this bus has other entries but not this one, punt. */
 1735         if (pci_apic_bus_present(bus))
 1736                 return (-1);
 1737 
 1738         /* Safety net, don't try to walk past bus 0. */
 1739         if (bus == 0)
 1740                 return (-1);
 1741 
 1742         /*
 1743          * Try to find our parent bus and the bridge it hangs off of.  If
 1744          * we are recursing up the chain, we need to find the previous bridge's
 1745          * parent bus.  If we have a valid parent device, then that is our
 1746          * parent bus.  Otherwise, try to find ourself so that we can find
 1747          * our parent bus.  Every device has a function of 0 and we are
 1748          * really just trying to find our parent, so assume a function of 0
 1749          * to find either ourself or one of our siblings.
 1750          */
 1751         if (pcib != NULL)
 1752                 bus_dev = device_get_parent(pcib);
 1753         else if (parent != NULL)
 1754                 bus_dev = parent;
 1755         else {
 1756                 dev = pci_find_bsf(bus, device, 0);
 1757                 if (dev == NULL)
 1758                         return (-1);
 1759                 bus_dev = device_get_parent(dev);
 1760         }
 1761         if (bus_dev == NULL)
 1762                 return (-1);
 1763         pcib = device_get_parent(bus_dev);
 1764         if (pcib == NULL)
 1765                 return (-1);
 1766 
 1767         if (device_get_parent(pcib) == NULL)
 1768                 return (-1);
 1769 
 1770         /*
 1771          * Do the swizzle thing.
 1772          *
 1773          * XXX: no error checking for the bus number here
 1774          * (valid, does it exist, etc.).
 1775          */
 1776         bus = pci_get_bus(pcib);
 1777         pin = (device + (pin - 1)) % 4 + 1;
 1778         device = pci_get_slot(pcib);
 1779         goto loop;
 1780 }
 1781 
 1782 int
 1783 next_apic_irq(int irq) 
 1784 {
 1785         int intr, ointr;
 1786         int bus, bustype;
 1787 
 1788         bus = 0;
 1789         bustype = 0;
 1790         for (intr = 0; intr < nintrs; intr++) {
 1791                 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
 1792                         continue;
 1793                 bus = SRCBUSID(intr);
 1794                 bustype = apic_bus_type(bus);
 1795                 if (bustype != ISA &&
 1796                     bustype != EISA &&
 1797                     bustype != PCI)
 1798                         continue;
 1799                 break;
 1800         }
 1801         if (intr >= nintrs) {
 1802                 return -1;
 1803         }
 1804         for (ointr = intr + 1; ointr < nintrs; ointr++) {
 1805                 if (INTTYPE(ointr) != 0)
 1806                         continue;
 1807                 if (bus != SRCBUSID(ointr))
 1808                         continue;
 1809                 if (bustype == PCI) {
 1810                         if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
 1811                                 continue;
 1812                         if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
 1813                                 continue;
 1814                 }
 1815                 if (bustype == ISA || bustype == EISA) {
 1816                         if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
 1817                                 continue;
 1818                 }
 1819                 if (INTPIN(intr) == INTPIN(ointr))
 1820                         continue;
 1821                 break;
 1822         }
 1823         if (ointr >= nintrs) {
 1824                 return -1;
 1825         }
 1826         return INTIRQ(ointr);
 1827 }
 1828 #undef SRCBUSLINE
 1829 #undef SRCBUSDEVICE
 1830 #undef SRCBUSID
 1831 #undef SRCBUSIRQ
 1832 
 1833 #undef INTPIN
 1834 #undef INTIRQ
 1835 #undef INTAPIC
 1836 #undef INTTYPE
 1837 
 1838 
 1839 /*
 1840  * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
 1841  *
 1842  * XXX FIXME:
 1843  *  Exactly what this means is unclear at this point.  It is a solution
 1844  *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
 1845  *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
 1846  *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
 1847  *  option.
 1848  */
 1849 int
 1850 undirect_isa_irq(int rirq)
 1851 {
 1852 #if defined(READY)
 1853         if (bootverbose)
 1854             printf("Freeing redirected ISA irq %d.\n", rirq);
 1855         /** FIXME: tickle the MB redirector chip */
 1856         return ???;
 1857 #else
 1858         if (bootverbose)
 1859             printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
 1860         return 0;
 1861 #endif  /* READY */
 1862 }
 1863 
 1864 
 1865 /*
 1866  * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
 1867  */
 1868 int
 1869 undirect_pci_irq(int rirq)
 1870 {
 1871 #if defined(READY)
 1872         if (bootverbose)
 1873                 printf("Freeing redirected PCI irq %d.\n", rirq);
 1874 
 1875         /** FIXME: tickle the MB redirector chip */
 1876         return ???;
 1877 #else
 1878         if (bootverbose)
 1879                 printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
 1880                        rirq);
 1881         return 0;
 1882 #endif  /* READY */
 1883 }
 1884 
 1885 
 1886 /*
 1887  * given a bus ID, return:
 1888  *  the bus type if found
 1889  *  -1 if NOT found
 1890  */
 1891 int
 1892 apic_bus_type(int id)
 1893 {
 1894         int     x;
 1895 
 1896         for (x = 0; x < mp_nbusses; ++x)
 1897                 if (bus_data[x].bus_id == id)
 1898                         return bus_data[x].bus_type;
 1899 
 1900         return -1;
 1901 }
 1902 
 1903 
 1904 /*
 1905  * given a LOGICAL APIC# and pin#, return:
 1906  *  the associated src bus ID if found
 1907  *  -1 if NOT found
 1908  */
 1909 int
 1910 apic_src_bus_id(int apic, int pin)
 1911 {
 1912         int     x;
 1913 
 1914         /* search each of the possible INTerrupt sources */
 1915         for (x = 0; x < nintrs; ++x)
 1916                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1917                     (pin == io_apic_ints[x].dst_apic_int))
 1918                         return (io_apic_ints[x].src_bus_id);
 1919 
 1920         return -1;              /* NOT found */
 1921 }
 1922 
 1923 
 1924 /*
 1925  * given a LOGICAL APIC# and pin#, return:
 1926  *  the associated src bus IRQ if found
 1927  *  -1 if NOT found
 1928  */
 1929 int
 1930 apic_src_bus_irq(int apic, int pin)
 1931 {
 1932         int     x;
 1933 
 1934         for (x = 0; x < nintrs; x++)
 1935                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1936                     (pin == io_apic_ints[x].dst_apic_int))
 1937                         return (io_apic_ints[x].src_bus_irq);
 1938 
 1939         return -1;              /* NOT found */
 1940 }
 1941 
 1942 
 1943 /*
 1944  * given a LOGICAL APIC# and pin#, return:
 1945  *  the associated INTerrupt type if found
 1946  *  -1 if NOT found
 1947  */
 1948 int
 1949 apic_int_type(int apic, int pin)
 1950 {
 1951         int     x;
 1952 
 1953         /* search each of the possible INTerrupt sources */
 1954         for (x = 0; x < nintrs; ++x)
 1955                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1956                     (pin == io_apic_ints[x].dst_apic_int))
 1957                         return (io_apic_ints[x].int_type);
 1958 
 1959         return -1;              /* NOT found */
 1960 }
 1961 
 1962 int 
 1963 apic_irq(int apic, int pin)
 1964 {
 1965         int x;
 1966         int res;
 1967 
 1968         for (x = 0; x < nintrs; ++x)
 1969                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1970                     (pin == io_apic_ints[x].dst_apic_int)) {
 1971                         res = io_apic_ints[x].int_vector;
 1972                         if (res == 0xff)
 1973                                 return -1;
 1974                         if (apic != int_to_apicintpin[res].ioapic)
 1975                                 panic("apic_irq: inconsistent table");
 1976                         if (pin != int_to_apicintpin[res].int_pin)
 1977                                 panic("apic_irq inconsistent table (2)");
 1978                         return res;
 1979                 }
 1980         return -1;
 1981 }
 1982 
 1983 
 1984 /*
 1985  * given a LOGICAL APIC# and pin#, return:
 1986  *  the associated trigger mode if found
 1987  *  -1 if NOT found
 1988  */
 1989 int
 1990 apic_trigger(int apic, int pin)
 1991 {
 1992         int     x;
 1993 
 1994         /* search each of the possible INTerrupt sources */
 1995         for (x = 0; x < nintrs; ++x)
 1996                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 1997                     (pin == io_apic_ints[x].dst_apic_int))
 1998                         return ((io_apic_ints[x].int_flags >> 2) & 0x03);
 1999 
 2000         return -1;              /* NOT found */
 2001 }
 2002 
 2003 
 2004 /*
 2005  * given a LOGICAL APIC# and pin#, return:
 2006  *  the associated 'active' level if found
 2007  *  -1 if NOT found
 2008  */
 2009 int
 2010 apic_polarity(int apic, int pin)
 2011 {
 2012         int     x;
 2013 
 2014         /* search each of the possible INTerrupt sources */
 2015         for (x = 0; x < nintrs; ++x)
 2016                 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 2017                     (pin == io_apic_ints[x].dst_apic_int))
 2018                         return (io_apic_ints[x].int_flags & 0x03);
 2019 
 2020         return -1;              /* NOT found */
 2021 }
 2022 
 2023 
 2024 /*
 2025  * set data according to MP defaults
 2026  * FIXME: probably not complete yet...
 2027  */
 2028 static void
 2029 default_mp_table(int type)
 2030 {
 2031         int     ap_cpu_id;
 2032 #if defined(APIC_IO)
 2033         int     io_apic_id;
 2034         int     pin;
 2035 #endif  /* APIC_IO */
 2036 
 2037 #if 0
 2038         printf("  MP default config type: %d\n", type);
 2039         switch (type) {
 2040         case 1:
 2041                 printf("   bus: ISA, APIC: 82489DX\n");
 2042                 break;
 2043         case 2:
 2044                 printf("   bus: EISA, APIC: 82489DX\n");
 2045                 break;
 2046         case 3:
 2047                 printf("   bus: EISA, APIC: 82489DX\n");
 2048                 break;
 2049         case 4:
 2050                 printf("   bus: MCA, APIC: 82489DX\n");
 2051                 break;
 2052         case 5:
 2053                 printf("   bus: ISA+PCI, APIC: Integrated\n");
 2054                 break;
 2055         case 6:
 2056                 printf("   bus: EISA+PCI, APIC: Integrated\n");
 2057                 break;
 2058         case 7:
 2059                 printf("   bus: MCA+PCI, APIC: Integrated\n");
 2060                 break;
 2061         default:
 2062                 printf("   future type\n");
 2063                 break;
 2064                 /* NOTREACHED */
 2065         }
 2066 #endif  /* 0 */
 2067 
 2068         boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
 2069         ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
 2070 
 2071         /* BSP */
 2072         CPU_TO_ID(0) = boot_cpu_id;
 2073         ID_TO_CPU(boot_cpu_id) = 0;
 2074 
 2075         /* one and only AP */
 2076         CPU_TO_ID(1) = ap_cpu_id;
 2077         ID_TO_CPU(ap_cpu_id) = 1;
 2078 
 2079 #if defined(APIC_IO)
 2080         /* one and only IO APIC */
 2081         io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
 2082 
 2083         /*
 2084          * sanity check, refer to MP spec section 3.6.6, last paragraph
 2085          * necessary as some hardware isn't properly setting up the IO APIC
 2086          */
 2087 #if defined(REALLY_ANAL_IOAPICID_VALUE)
 2088         if (io_apic_id != 2) {
 2089 #else
 2090         if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
 2091 #endif  /* REALLY_ANAL_IOAPICID_VALUE */
 2092                 io_apic_set_id(0, 2);
 2093                 io_apic_id = 2;
 2094         }
 2095         IO_TO_ID(0) = io_apic_id;
 2096         ID_TO_IO(io_apic_id) = 0;
 2097 #endif  /* APIC_IO */
 2098 
 2099         /* fill out bus entries */
 2100         switch (type) {
 2101         case 1:
 2102         case 2:
 2103         case 3:
 2104         case 4:
 2105         case 5:
 2106         case 6:
 2107         case 7:
 2108                 bus_data[0].bus_id = default_data[type - 1][1];
 2109                 bus_data[0].bus_type = default_data[type - 1][2];
 2110                 bus_data[1].bus_id = default_data[type - 1][3];
 2111                 bus_data[1].bus_type = default_data[type - 1][4];
 2112                 break;
 2113 
 2114         /* case 4: case 7:                 MCA NOT supported */
 2115         default:                /* illegal/reserved */
 2116                 panic("BAD default MP config: %d", type);
 2117                 /* NOTREACHED */
 2118         }
 2119 
 2120 #if defined(APIC_IO)
 2121         /* general cases from MP v1.4, table 5-2 */
 2122         for (pin = 0; pin < 16; ++pin) {
 2123                 io_apic_ints[pin].int_type = 0;
 2124                 io_apic_ints[pin].int_flags = 0x05;     /* edge/active-hi */
 2125                 io_apic_ints[pin].src_bus_id = 0;
 2126                 io_apic_ints[pin].src_bus_irq = pin;    /* IRQ2 caught below */
 2127                 io_apic_ints[pin].dst_apic_id = io_apic_id;
 2128                 io_apic_ints[pin].dst_apic_int = pin;   /* 1-to-1 */
 2129         }
 2130 
 2131         /* special cases from MP v1.4, table 5-2 */
 2132         if (type == 2) {
 2133                 io_apic_ints[2].int_type = 0xff;        /* N/C */
 2134                 io_apic_ints[13].int_type = 0xff;       /* N/C */
 2135 #if !defined(APIC_MIXED_MODE)
 2136                 /** FIXME: ??? */
 2137                 panic("sorry, can't support type 2 default yet");
 2138 #endif  /* APIC_MIXED_MODE */
 2139         }
 2140         else
 2141                 io_apic_ints[2].src_bus_irq = 0;        /* ISA IRQ0 is on APIC INT 2 */
 2142 
 2143         if (type == 7)
 2144                 io_apic_ints[0].int_type = 0xff;        /* N/C */
 2145         else
 2146                 io_apic_ints[0].int_type = 3;   /* vectored 8259 */
 2147 #endif  /* APIC_IO */
 2148 }
 2149 
 2150 
 2151 /*
 2152  * initialize all the SMP locks
 2153  */
 2154 
 2155 /* critical region around IO APIC, apic_imen */
 2156 struct simplelock       imen_lock;
 2157 
 2158 /* critical region around splxx(), cpl, cml, cil, ipending */
 2159 struct simplelock       cpl_lock;
 2160 
 2161 /* Make FAST_INTR() routines sequential */
 2162 struct simplelock       fast_intr_lock;
 2163 
 2164 /* critical region around INTR() routines */
 2165 struct simplelock       intr_lock;
 2166 
 2167 /* lock regions protected in UP kernel via cli/sti */
 2168 struct simplelock       mpintr_lock;
 2169 
 2170 /* lock region used by kernel profiling */
 2171 struct simplelock       mcount_lock;
 2172 
 2173 #ifdef USE_COMLOCK
 2174 /* locks com (tty) data/hardware accesses: a FASTINTR() */
 2175 struct simplelock       com_lock;
 2176 #endif /* USE_COMLOCK */
 2177 
 2178 #ifdef USE_CLOCKLOCK
 2179 /* lock regions around the clock hardware */
 2180 struct simplelock       clock_lock;
 2181 #endif /* USE_CLOCKLOCK */
 2182 
 2183 /* lock around the MP rendezvous */
 2184 static struct simplelock smp_rv_lock;
 2185 
 2186 static void
 2187 init_locks(void)
 2188 {
 2189         /*
 2190          * Get the initial mp_lock with a count of 1 for the BSP.
 2191          * This uses a LOGICAL cpu ID, ie BSP == 0.
 2192          */
 2193         mp_lock = 0x00000001;
 2194 
 2195 #if 0
 2196         /* ISR uses its own "giant lock" */
 2197         isr_lock = FREE_LOCK;
 2198 #endif
 2199 
 2200 #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
 2201         s_lock_init((struct simplelock*)&apic_itrace_debuglock);
 2202 #endif
 2203 
 2204         s_lock_init((struct simplelock*)&mpintr_lock);
 2205 
 2206         s_lock_init((struct simplelock*)&mcount_lock);
 2207 
 2208         s_lock_init((struct simplelock*)&fast_intr_lock);
 2209         s_lock_init((struct simplelock*)&intr_lock);
 2210         s_lock_init((struct simplelock*)&imen_lock);
 2211         s_lock_init((struct simplelock*)&cpl_lock);
 2212         s_lock_init(&smp_rv_lock);
 2213 
 2214 #ifdef USE_COMLOCK
 2215         s_lock_init((struct simplelock*)&com_lock);
 2216 #endif /* USE_COMLOCK */
 2217 #ifdef USE_CLOCKLOCK
 2218         s_lock_init((struct simplelock*)&clock_lock);
 2219 #endif /* USE_CLOCKLOCK */
 2220 }
 2221 
 2222 
 2223 /* Wait for all APs to be fully initialized */
 2224 extern int wait_ap(unsigned int);
 2225 
 2226 /*
 2227  * start each AP in our list
 2228  */
 2229 static int
 2230 start_all_aps(u_int boot_addr)
 2231 {
 2232         int     x, i, pg;
 2233         u_char  mpbiosreason;
 2234         u_long  mpbioswarmvec;
 2235         struct globaldata *gd;
 2236         char *stack;
 2237         uintptr_t kptbase;
 2238 
 2239         POSTCODE(START_ALL_APS_POST);
 2240 
 2241         /* initialize BSP's local APIC */
 2242         apic_initialize();
 2243         bsp_apic_ready = 1;
 2244 
 2245         /* install the AP 1st level boot code */
 2246         install_ap_tramp(boot_addr);
 2247 
 2248 
 2249         /* save the current value of the warm-start vector */
 2250         mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
 2251 #ifndef PC98
 2252         outb(CMOS_REG, BIOS_RESET);
 2253         mpbiosreason = inb(CMOS_DATA);
 2254 #endif
 2255 
 2256         /* record BSP in CPU map */
 2257         all_cpus = 1;
 2258 
 2259         /* set up temporary P==V mapping for AP boot */
 2260         /* XXX this is a hack, we should boot the AP on its own stack/PTD */
 2261         kptbase = (uintptr_t)(void *)KPTphys;
 2262         for (x = 0; x < NKPT; x++)
 2263                 PTD[x] = (pd_entry_t)(PG_V | PG_RW |
 2264                     ((kptbase + x * PAGE_SIZE) & PG_FRAME));
 2265         invltlb();
 2266 
 2267         /* start each AP */
 2268         for (x = 1; x <= mp_naps; ++x) {
 2269 
 2270                 /* This is a bit verbose, it will go away soon.  */
 2271 
 2272                 /* first page of AP's private space */
 2273                 pg = x * i386_btop(sizeof(struct privatespace));
 2274 
 2275                 /* allocate a new private data page */
 2276                 gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
 2277 
 2278                 /* wire it into the private page table page */
 2279                 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
 2280 
 2281                 /* allocate and set up an idle stack data page */
 2282                 stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
 2283                 for (i = 0; i < UPAGES; i++)
 2284                         SMPpt[pg + 6 + i] = (pt_entry_t)
 2285                             (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 2286 
 2287                 SMPpt[pg + 1] = 0;              /* *prv_CMAP1 */
 2288                 SMPpt[pg + 2] = 0;              /* *prv_CMAP2 */
 2289                 SMPpt[pg + 3] = 0;              /* *prv_CMAP3 */
 2290                 SMPpt[pg + 4] = 0;              /* *prv_PMAP1 */
 2291                 SMPpt[pg + 5] = 0;              /* *prv_PMAP2 */
 2292 
 2293                 /* prime data page for it to use */
 2294                 gd->gd_cpuid = x;
 2295                 gd->gd_cpu_lockid = x << 24;
 2296                 gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
 2297                 gd->gd_prv_CMAP2 = &SMPpt[pg + 2];
 2298                 gd->gd_prv_CMAP3 = &SMPpt[pg + 3];
 2299                 gd->gd_prv_PMAP1 = (pd_entry_t *)&SMPpt[pg + 4];
 2300                 gd->gd_prv_PMAP2 = (pd_entry_t *)&SMPpt[pg + 5];
 2301                 gd->gd_prv_CADDR1 = SMP_prvspace[x].CPAGE1;
 2302                 gd->gd_prv_CADDR2 = SMP_prvspace[x].CPAGE2;
 2303                 gd->gd_prv_CADDR3 = SMP_prvspace[x].CPAGE3;
 2304                 gd->gd_prv_PADDR1 = (pt_entry_t *)SMP_prvspace[x].PPAGE1;
 2305                 gd->gd_prv_PADDR2 = (pt_entry_t *)SMP_prvspace[x].PPAGE2;
 2306 
 2307                 /* setup a vector to our boot code */
 2308                 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
 2309                 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
 2310 #ifndef PC98
 2311                 outb(CMOS_REG, BIOS_RESET);
 2312                 outb(CMOS_DATA, BIOS_WARM);     /* 'warm-start' */
 2313 #endif
 2314 
 2315                 bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
 2316                 bootAP = x;
 2317 
 2318                 /* attempt to start the Application Processor */
 2319                 CHECK_INIT(99); /* setup checkpoints */
 2320                 if (!start_ap(x, boot_addr)) {
 2321                         printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
 2322                         CHECK_PRINT("trace");   /* show checkpoints */
 2323                         /* better panic as the AP may be running loose */
 2324                         printf("panic y/n? [y] ");
 2325                         if (cngetc() != 'n')
 2326                                 panic("bye-bye");
 2327                 }
 2328                 CHECK_PRINT("trace");           /* show checkpoints */
 2329 
 2330                 /* record its version info */
 2331                 cpu_apic_versions[x] = cpu_apic_versions[0];
 2332 
 2333                 all_cpus |= (1 << x);           /* record AP in CPU map */
 2334         }
 2335 
 2336         /* build our map of 'other' CPUs */
 2337         other_cpus = all_cpus & ~(1 << cpuid);
 2338 
 2339         /* fill in our (BSP) APIC version */
 2340         cpu_apic_versions[0] = lapic.version;
 2341 
 2342         /* restore the warmstart vector */
 2343         *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
 2344 #ifndef PC98
 2345         outb(CMOS_REG, BIOS_RESET);
 2346         outb(CMOS_DATA, mpbiosreason);
 2347 #endif
 2348 
 2349         /*
 2350          * Set up the idle context for the BSP.  Similar to above except
 2351          * that some was done by locore, some by pmap.c and some is implicit
 2352          * because the BSP is cpu#0 and the page is initially zero, and also
 2353          * because we can refer to variables by name on the BSP..
 2354          */
 2355 
 2356         /* Allocate and setup BSP idle stack */
 2357         stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
 2358         for (i = 0; i < UPAGES; i++)
 2359                 SMPpt[6 + i] = (pt_entry_t)
 2360                     (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 2361 
 2362         for (x = 0; x < NKPT; x++)
 2363                 PTD[x] = 0;
 2364         pmap_set_opt();
 2365 
 2366         /* number of APs actually started */
 2367         return mp_ncpus - 1;
 2368 }
 2369 
 2370 
 2371 /*
 2372  * load the 1st level AP boot code into base memory.
 2373  */
 2374 
 2375 /* targets for relocation */
 2376 extern void bigJump(void);
 2377 extern void bootCodeSeg(void);
 2378 extern void bootDataSeg(void);
 2379 extern void MPentry(void);
 2380 extern u_int MP_GDT;
 2381 extern u_int mp_gdtbase;
 2382 
 2383 static void
 2384 install_ap_tramp(u_int boot_addr)
 2385 {
 2386         int     x;
 2387         int     size = *(int *) ((u_long) & bootMP_size);
 2388         u_char *src = (u_char *) ((u_long) bootMP);
 2389         u_char *dst = (u_char *) boot_addr + KERNBASE;
 2390         u_int   boot_base = (u_int) bootMP;
 2391         u_int8_t *dst8;
 2392         u_int16_t *dst16;
 2393         u_int32_t *dst32;
 2394 
 2395         POSTCODE(INSTALL_AP_TRAMP_POST);
 2396 
 2397         for (x = 0; x < size; ++x)
 2398                 *dst++ = *src++;
 2399 
 2400         /*
 2401          * modify addresses in code we just moved to basemem. unfortunately we
 2402          * need fairly detailed info about mpboot.s for this to work.  changes
 2403          * to mpboot.s might require changes here.
 2404          */
 2405 
 2406         /* boot code is located in KERNEL space */
 2407         dst = (u_char *) boot_addr + KERNBASE;
 2408 
 2409         /* modify the lgdt arg */
 2410         dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
 2411         *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
 2412 
 2413         /* modify the ljmp target for MPentry() */
 2414         dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
 2415         *dst32 = ((u_int) MPentry - KERNBASE);
 2416 
 2417         /* modify the target for boot code segment */
 2418         dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
 2419         dst8 = (u_int8_t *) (dst16 + 1);
 2420         *dst16 = (u_int) boot_addr & 0xffff;
 2421         *dst8 = ((u_int) boot_addr >> 16) & 0xff;
 2422 
 2423         /* modify the target for boot data segment */
 2424         dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
 2425         dst8 = (u_int8_t *) (dst16 + 1);
 2426         *dst16 = (u_int) boot_addr & 0xffff;
 2427         *dst8 = ((u_int) boot_addr >> 16) & 0xff;
 2428 }
 2429 
 2430 
 2431 /*
 2432  * this function starts the AP (application processor) identified
 2433  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
 2434  * to accomplish this.  This is necessary because of the nuances
 2435  * of the different hardware we might encounter.  It ain't pretty,
 2436  * but it seems to work.
 2437  */
 2438 static int
 2439 start_ap(int logical_cpu, u_int boot_addr)
 2440 {
 2441         int     physical_cpu;
 2442         int     vector;
 2443         int     cpus;
 2444         u_long  icr_lo, icr_hi;
 2445 
 2446         POSTCODE(START_AP_POST);
 2447 
 2448         /* get the PHYSICAL APIC ID# */
 2449         physical_cpu = CPU_TO_ID(logical_cpu);
 2450 
 2451         /* calculate the vector */
 2452         vector = (boot_addr >> 12) & 0xff;
 2453 
 2454         /* used as a watchpoint to signal AP startup */
 2455         cpus = mp_ncpus;
 2456 
 2457         /*
 2458          * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
 2459          * and running the target CPU. OR this INIT IPI might be latched (P5
 2460          * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 2461          * ignored.
 2462          */
 2463 
 2464         /* setup the address for the target AP */
 2465         icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
 2466         icr_hi |= (physical_cpu << 24);
 2467         lapic.icr_hi = icr_hi;
 2468 
 2469         /* do an INIT IPI: assert RESET */
 2470         icr_lo = lapic.icr_lo & 0xfff00000;
 2471         lapic.icr_lo = icr_lo | 0x0000c500;
 2472 
 2473         /* wait for pending status end */
 2474         while (lapic.icr_lo & APIC_DELSTAT_MASK)
 2475                  /* spin */ ;
 2476 
 2477         /* do an INIT IPI: deassert RESET */
 2478         lapic.icr_lo = icr_lo | 0x00008500;
 2479 
 2480         /* wait for pending status end */
 2481         u_sleep(10000);         /* wait ~10mS */
 2482         while (lapic.icr_lo & APIC_DELSTAT_MASK)
 2483                  /* spin */ ;
 2484 
 2485         /*
 2486          * next we do a STARTUP IPI: the previous INIT IPI might still be
 2487          * latched, (P5 bug) this 1st STARTUP would then terminate
 2488          * immediately, and the previously started INIT IPI would continue. OR
 2489          * the previous INIT IPI has already run. and this STARTUP IPI will
 2490          * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 2491          * will run.
 2492          */
 2493 
 2494         /* do a STARTUP IPI */
 2495         lapic.icr_lo = icr_lo | 0x00000600 | vector;
 2496         while (lapic.icr_lo & APIC_DELSTAT_MASK)
 2497                  /* spin */ ;
 2498         u_sleep(200);           /* wait ~200uS */
 2499 
 2500         /*
 2501          * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 2502          * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 2503          * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 2504          * recognized after hardware RESET or INIT IPI.
 2505          */
 2506 
 2507         lapic.icr_lo = icr_lo | 0x00000600 | vector;
 2508         while (lapic.icr_lo & APIC_DELSTAT_MASK)
 2509                  /* spin */ ;
 2510         u_sleep(200);           /* wait ~200uS */
 2511 
 2512         /* wait for it to start */
 2513         set_apic_timer(5000000);/* == 5 seconds */
 2514         while (read_apic_timer())
 2515                 if (mp_ncpus > cpus)
 2516                         return 1;       /* return SUCCESS */
 2517 
 2518         return 0;               /* return FAILURE */
 2519 }
 2520 
 2521 
 2522 /*
 2523  * Flush the TLB on all other CPU's
 2524  *
 2525  * XXX: Needs to handshake and wait for completion before proceding.
 2526  */
 2527 void
 2528 smp_invltlb(void)
 2529 {
 2530 #if defined(APIC_IO)
 2531         if (smp_started && invltlb_ok)
 2532                 all_but_self_ipi(XINVLTLB_OFFSET);
 2533 #endif  /* APIC_IO */
 2534 }
 2535 
 2536 void
 2537 invlpg(u_int addr)
 2538 {
 2539         __asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
 2540 
 2541         /* send a message to the other CPUs */
 2542         smp_invltlb();
 2543 }
 2544 
 2545 void
 2546 invltlb(void)
 2547 {
 2548         u_long  temp;
 2549 
 2550         /*
 2551          * This should be implemented as load_cr3(rcr3()) when load_cr3() is
 2552          * inlined.
 2553          */
 2554         __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
 2555 
 2556         /* send a message to the other CPUs */
 2557         smp_invltlb();
 2558 }
 2559 
 2560 
 2561 /*
 2562  * When called the executing CPU will send an IPI to all other CPUs
 2563  *  requesting that they halt execution.
 2564  *
 2565  * Usually (but not necessarily) called with 'other_cpus' as its arg.
 2566  *
 2567  *  - Signals all CPUs in map to stop.
 2568  *  - Waits for each to stop.
 2569  *
 2570  * Returns:
 2571  *  -1: error
 2572  *   0: NA
 2573  *   1: ok
 2574  *
 2575  * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
 2576  *            from executing at same time.
 2577  */
 2578 int
 2579 stop_cpus(u_int map)
 2580 {
 2581         if (!smp_started)
 2582                 return 0;
 2583 
 2584         /* send the Xcpustop IPI to all CPUs in map */
 2585         selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
 2586         
 2587         while ((stopped_cpus & map) != map)
 2588                 /* spin */ ;
 2589 
 2590         return 1;
 2591 }
 2592 
 2593 
 2594 /*
 2595  * Called by a CPU to restart stopped CPUs. 
 2596  *
 2597  * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
 2598  *
 2599  *  - Signals all CPUs in map to restart.
 2600  *  - Waits for each to restart.
 2601  *
 2602  * Returns:
 2603  *  -1: error
 2604  *   0: NA
 2605  *   1: ok
 2606  */
 2607 int
 2608 restart_cpus(u_int map)
 2609 {
 2610         if (!smp_started)
 2611                 return 0;
 2612 
 2613         started_cpus = map;             /* signal other cpus to restart */
 2614 
 2615         while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */
 2616                 /* spin */ ;
 2617 
 2618         return 1;
 2619 }
 2620 
 2621 int smp_active = 0;     /* are the APs allowed to run? */
 2622 SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
 2623 
 2624 /* XXX maybe should be hw.ncpu */
 2625 static int smp_cpus = 1;        /* how many cpu's running */
 2626 SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
 2627 
 2628 int invltlb_ok = 0;     /* throttle smp_invltlb() till safe */
 2629 SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
 2630 
 2631 /* Warning: Do not staticize.  Used from swtch.s */
 2632 int do_page_zero_idle = 1; /* bzero pages for fun and profit in idleloop */
 2633 SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW,
 2634            &do_page_zero_idle, 0, "");
 2635 
 2636 /* Is forwarding of a interrupt to the CPU holding the ISR lock enabled ? */
 2637 int forward_irq_enabled = 1;
 2638 SYSCTL_INT(_machdep, OID_AUTO, forward_irq_enabled, CTLFLAG_RW,
 2639            &forward_irq_enabled, 0, "");
 2640 
 2641 /* Enable forwarding of a signal to a process running on a different CPU */
 2642 static int forward_signal_enabled = 1;
 2643 SYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
 2644            &forward_signal_enabled, 0, "");
 2645 
 2646 /* Enable forwarding of roundrobin to all other cpus */
 2647 static int forward_roundrobin_enabled = 1;
 2648 SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
 2649            &forward_roundrobin_enabled, 0, "");
 2650 
 2651 /*
 2652  * This is called once the rest of the system is up and running and we're
 2653  * ready to let the AP's out of the pen.
 2654  */
 2655 void ap_init(void);
 2656 
 2657 void
 2658 ap_init()
 2659 {
 2660         u_int   apic_id;
 2661 
 2662         /* BSP may have changed PTD while we're waiting for the lock */
 2663         cpu_invltlb();
 2664 
 2665         smp_cpus++;
 2666 
 2667 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 2668         lidt(&r_idt);
 2669 #endif
 2670 
 2671         /* Build our map of 'other' CPUs. */
 2672         other_cpus = all_cpus & ~(1 << cpuid);
 2673 
 2674         printf("SMP: AP CPU #%d Launched!\n", cpuid);
 2675 
 2676         /* set up CPU registers and state */
 2677         cpu_setregs();
 2678 
 2679         /* set up FPU state on the AP */
 2680         npxinit(__INITIAL_NPXCW__);
 2681 
 2682         /* set up SSE registers */
 2683         enable_sse();
 2684 
 2685         /* A quick check from sanity claus */
 2686         apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
 2687         if (cpuid != apic_id) {
 2688                 printf("SMP: cpuid = %d\n", cpuid);
 2689                 printf("SMP: apic_id = %d\n", apic_id);
 2690                 printf("PTD[MPPTDI] = %llx\n", (u_int64_t)PTD[MPPTDI]);
 2691                 panic("cpuid mismatch! boom!!");
 2692         }
 2693 
 2694         /* Init local apic for irq's */
 2695         apic_initialize();
 2696 
 2697         /* Set memory range attributes for this CPU to match the BSP */
 2698         mem_range_AP_init();
 2699 
 2700         /*
 2701          * Activate smp_invltlb, although strictly speaking, this isn't
 2702          * quite correct yet.  We should have a bitfield for cpus willing
 2703          * to accept TLB flush IPI's or something and sync them.
 2704          */
 2705         if (smp_cpus == mp_ncpus) {
 2706                 invltlb_ok = 1;
 2707                 smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
 2708                 smp_active = 1;  /* historic */
 2709         }
 2710 }
 2711 
 2712 #ifdef BETTER_CLOCK
 2713 
 2714 #define CHECKSTATE_USER 0
 2715 #define CHECKSTATE_SYS  1
 2716 #define CHECKSTATE_INTR 2
 2717 
 2718 /* Do not staticize.  Used from apic_vector.s */
 2719 struct proc*    checkstate_curproc[MAXCPU];
 2720 int             checkstate_cpustate[MAXCPU];
 2721 u_long          checkstate_pc[MAXCPU];
 2722 
 2723 #define PC_TO_INDEX(pc, prof)                           \
 2724         ((int)(((u_quad_t)((pc) - (prof)->pr_off) *     \
 2725             (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
 2726 
 2727 static void
 2728 addupc_intr_forwarded(struct proc *p, int id, int *astmap)
 2729 {
 2730         int i;
 2731         struct uprof *prof;
 2732         u_long pc;
 2733 
 2734         pc = checkstate_pc[id];
 2735         prof = &p->p_stats->p_prof;
 2736         if (pc >= prof->pr_off &&
 2737             (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) {
 2738                 if ((p->p_flag & P_OWEUPC) == 0) {
 2739                         prof->pr_addr = pc;
 2740                         prof->pr_ticks = 1;
 2741                         p->p_flag |= P_OWEUPC;
 2742                 }
 2743                 *astmap |= (1 << id);
 2744         }
 2745 }
 2746 
 2747 static void
 2748 forwarded_statclock(int id, int pscnt, int *astmap)
 2749 {
 2750         struct pstats *pstats;
 2751         long rss;
 2752         struct rusage *ru;
 2753         struct vmspace *vm;
 2754         int cpustate;
 2755         struct proc *p;
 2756 #ifdef GPROF
 2757         register struct gmonparam *g;
 2758         int i;
 2759 #endif
 2760 
 2761         p = checkstate_curproc[id];
 2762         cpustate = checkstate_cpustate[id];
 2763 
 2764         switch (cpustate) {
 2765         case CHECKSTATE_USER:
 2766                 if (p->p_flag & P_PROFIL)
 2767                         addupc_intr_forwarded(p, id, astmap);
 2768                 if (pscnt > 1)
 2769                         return;
 2770                 p->p_uticks++;
 2771                 if (p->p_nice > NZERO)
 2772                         cp_time[CP_NICE]++;
 2773                 else
 2774                         cp_time[CP_USER]++;
 2775                 break;
 2776         case CHECKSTATE_SYS:
 2777 #ifdef GPROF
 2778                 /*
 2779                  * Kernel statistics are just like addupc_intr, only easier.
 2780                  */
 2781                 g = &_gmonparam;
 2782                 if (g->state == GMON_PROF_ON) {
 2783                         i = checkstate_pc[id] - g->lowpc;
 2784                         if (i < g->textsize) {
 2785                                 i /= HISTFRACTION * sizeof(*g->kcount);
 2786                                 g->kcount[i]++;
 2787                         }
 2788                 }
 2789 #endif
 2790                 if (pscnt > 1)
 2791                         return;
 2792 
 2793                 if (!p)
 2794                         cp_time[CP_IDLE]++;
 2795                 else {
 2796                         p->p_sticks++;
 2797                         cp_time[CP_SYS]++;
 2798                 }
 2799                 break;
 2800         case CHECKSTATE_INTR:
 2801         default:
 2802 #ifdef GPROF
 2803                 /*
 2804                  * Kernel statistics are just like addupc_intr, only easier.
 2805                  */
 2806                 g = &_gmonparam;
 2807                 if (g->state == GMON_PROF_ON) {
 2808                         i = checkstate_pc[id] - g->lowpc;
 2809                         if (i < g->textsize) {
 2810                                 i /= HISTFRACTION * sizeof(*g->kcount);
 2811                                 g->kcount[i]++;
 2812                         }
 2813                 }
 2814 #endif
 2815                 if (pscnt > 1)
 2816                         return;
 2817                 if (p)
 2818                         p->p_iticks++;
 2819                 cp_time[CP_INTR]++;
 2820         }
 2821         if (p != NULL) {
 2822                 schedclock(p);
 2823                 
 2824                 /* Update resource usage integrals and maximums. */
 2825                 if ((pstats = p->p_stats) != NULL &&
 2826                     (ru = &pstats->p_ru) != NULL &&
 2827                     (vm = p->p_vmspace) != NULL) {
 2828                         ru->ru_ixrss += pgtok(vm->vm_tsize);
 2829                         ru->ru_idrss += pgtok(vm->vm_dsize);
 2830                         ru->ru_isrss += pgtok(vm->vm_ssize);
 2831                         rss = pgtok(vmspace_resident_count(vm));
 2832                         if (ru->ru_maxrss < rss)
 2833                                 ru->ru_maxrss = rss;
 2834                 }
 2835         }
 2836 }
 2837 
 2838 void
 2839 forward_statclock(int pscnt)
 2840 {
 2841         int map;
 2842         int id;
 2843         int i;
 2844 
 2845         /* Kludge. We don't yet have separate locks for the interrupts
 2846          * and the kernel. This means that we cannot let the other processors
 2847          * handle complex interrupts while inhibiting them from entering
 2848          * the kernel in a non-interrupt context.
 2849          *
 2850          * What we can do, without changing the locking mechanisms yet,
 2851          * is letting the other processors handle a very simple interrupt
 2852          * (wich determines the processor states), and do the main
 2853          * work ourself.
 2854          */
 2855 
 2856         if (!smp_started || !invltlb_ok || cold || panicstr)
 2857                 return;
 2858 
 2859         /* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle ) */
 2860         
 2861         map = other_cpus & ~(stopped_cpus|hlt_cpus_mask);
 2862         checkstate_probed_cpus = 0;
 2863         if (map != 0)
 2864                 selected_apic_ipi(map,
 2865                                   XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
 2866 
 2867         i = 0;
 2868         while (checkstate_probed_cpus != map) {
 2869                 /* spin */
 2870                 i++;
 2871                 if (i == 100000) {
 2872 #ifdef BETTER_CLOCK_DIAGNOSTIC
 2873                         printf("forward_statclock: checkstate %x\n",
 2874                                checkstate_probed_cpus);
 2875 #endif
 2876                         break;
 2877                 }
 2878         }
 2879 
 2880         /*
 2881          * Step 2: walk through other processors processes, update ticks and 
 2882          * profiling info.
 2883          */
 2884         
 2885         map = 0;
 2886         for (id = 0; id < mp_ncpus; id++) {
 2887                 if (id == cpuid)
 2888                         continue;
 2889                 if (((1 << id) & checkstate_probed_cpus) == 0)
 2890                         continue;
 2891                 forwarded_statclock(id, pscnt, &map);
 2892         }
 2893         if (map != 0) {
 2894                 checkstate_need_ast |= map;
 2895                 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
 2896                 i = 0;
 2897                 while ((checkstate_need_ast & map) != 0) {
 2898                         /* spin */
 2899                         i++;
 2900                         if (i > 100000) { 
 2901 #ifdef BETTER_CLOCK_DIAGNOSTIC
 2902                                 printf("forward_statclock: dropped ast 0x%x\n",
 2903                                        checkstate_need_ast & map);
 2904 #endif
 2905                                 break;
 2906                         }
 2907                 }
 2908         }
 2909 }
 2910 
 2911 void 
 2912 forward_hardclock(int pscnt)
 2913 {
 2914         int map;
 2915         int id;
 2916         struct proc *p;
 2917         struct pstats *pstats;
 2918         int i;
 2919 
 2920         /* Kludge. We don't yet have separate locks for the interrupts
 2921          * and the kernel. This means that we cannot let the other processors
 2922          * handle complex interrupts while inhibiting them from entering
 2923          * the kernel in a non-interrupt context.
 2924          *
 2925          * What we can do, without changing the locking mechanisms yet,
 2926          * is letting the other processors handle a very simple interrupt
 2927          * (wich determines the processor states), and do the main
 2928          * work ourself.
 2929          */
 2930 
 2931         if (!smp_started || !invltlb_ok || cold || panicstr)
 2932                 return;
 2933 
 2934         /* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle) */
 2935         
 2936         map = other_cpus & ~(stopped_cpus|hlt_cpus_mask);
 2937         checkstate_probed_cpus = 0;
 2938         if (map != 0)
 2939                 selected_apic_ipi(map,
 2940                                   XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
 2941         
 2942         i = 0;
 2943         while (checkstate_probed_cpus != map) {
 2944                 /* spin */
 2945                 i++;
 2946                 if (i == 100000) {
 2947 #ifdef BETTER_CLOCK_DIAGNOSTIC
 2948                         printf("forward_hardclock: checkstate %x\n",
 2949                                checkstate_probed_cpus);
 2950 #endif
 2951                         break;
 2952                 }
 2953         }
 2954 
 2955         /*
 2956          * Step 2: walk through other processors processes, update virtual 
 2957          * timer and profiling timer. If stathz == 0, also update ticks and 
 2958          * profiling info.
 2959          */
 2960         
 2961         map = 0;
 2962         for (id = 0; id < mp_ncpus; id++) {
 2963                 if (id == cpuid)
 2964                         continue;
 2965                 if (((1 << id) & checkstate_probed_cpus) == 0)
 2966                         continue;
 2967                 p = checkstate_curproc[id];
 2968                 if (p) {
 2969                         pstats = p->p_stats;
 2970                         if (checkstate_cpustate[id] == CHECKSTATE_USER &&
 2971                             timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
 2972                             itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
 2973                                 psignal(p, SIGVTALRM);
 2974                                 map |= (1 << id);
 2975                         }
 2976                         if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
 2977                             itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
 2978                                 psignal(p, SIGPROF);
 2979                                 map |= (1 << id);
 2980                         }
 2981                 }
 2982                 if (stathz == 0) {
 2983                         forwarded_statclock( id, pscnt, &map);
 2984                 }
 2985         }
 2986         if (map != 0) {
 2987                 checkstate_need_ast |= map;
 2988                 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
 2989                 i = 0;
 2990                 while ((checkstate_need_ast & map) != 0) {
 2991                         /* spin */
 2992                         i++;
 2993                         if (i > 100000) { 
 2994 #ifdef BETTER_CLOCK_DIAGNOSTIC
 2995                                 printf("forward_hardclock: dropped ast 0x%x\n",
 2996                                        checkstate_need_ast & map);
 2997 #endif
 2998                                 break;
 2999                         }
 3000                 }
 3001         }
 3002 }
 3003 
 3004 #endif /* BETTER_CLOCK */
 3005 
 3006 void 
 3007 forward_signal(struct proc *p)
 3008 {
 3009         int map;
 3010         int id;
 3011         int i;
 3012 
 3013         /* Kludge. We don't yet have separate locks for the interrupts
 3014          * and the kernel. This means that we cannot let the other processors
 3015          * handle complex interrupts while inhibiting them from entering
 3016          * the kernel in a non-interrupt context.
 3017          *
 3018          * What we can do, without changing the locking mechanisms yet,
 3019          * is letting the other processors handle a very simple interrupt
 3020          * (wich determines the processor states), and do the main
 3021          * work ourself.
 3022          */
 3023 
 3024         if (!smp_started || !invltlb_ok || cold || panicstr)
 3025                 return;
 3026         if (!forward_signal_enabled)
 3027                 return;
 3028         while (1) {
 3029                 if (p->p_stat != SRUN)
 3030                         return;
 3031                 id = p->p_oncpu;
 3032                 if (id == 0xff)
 3033                         return;
 3034                 map = (1<<id);
 3035                 checkstate_need_ast |= map;
 3036                 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
 3037                 i = 0;
 3038                 while ((checkstate_need_ast & map) != 0) {
 3039                         /* spin */
 3040                         i++;
 3041                         if (i > 100000) { 
 3042 #if 0
 3043                                 printf("forward_signal: dropped ast 0x%x\n",
 3044                                        checkstate_need_ast & map);
 3045 #endif
 3046                                 break;
 3047                         }
 3048                 }
 3049                 if (id == p->p_oncpu)
 3050                         return;
 3051         }
 3052 }
 3053 
 3054 void
 3055 forward_roundrobin(void)
 3056 {
 3057         u_int map;
 3058         int i;
 3059 
 3060         if (!smp_started || !invltlb_ok || cold || panicstr)
 3061                 return;
 3062         if (!forward_roundrobin_enabled)
 3063                 return;
 3064         resched_cpus |= other_cpus;
 3065         map = other_cpus & ~(stopped_cpus|hlt_cpus_mask);
 3066 #if 1
 3067         selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
 3068 #else
 3069         (void) all_but_self_ipi(XCPUAST_OFFSET);
 3070 #endif
 3071         i = 0;
 3072         while ((checkstate_need_ast & map) != 0) {
 3073                 /* spin */
 3074                 i++;
 3075                 if (i > 100000) {
 3076 #if 0
 3077                         printf("forward_roundrobin: dropped ast 0x%x\n",
 3078                                checkstate_need_ast & map);
 3079 #endif
 3080                         break;
 3081                 }
 3082         }
 3083 }
 3084 
 3085 
 3086 #ifdef APIC_INTR_REORDER
 3087 /*
 3088  *      Maintain mapping from softintr vector to isr bit in local apic.
 3089  */
 3090 void
 3091 set_lapic_isrloc(int intr, int vector)
 3092 {
 3093         if (intr < 0 || intr > 32)
 3094                 panic("set_apic_isrloc: bad intr argument: %d",intr);
 3095         if (vector < ICU_OFFSET || vector > 255)
 3096                 panic("set_apic_isrloc: bad vector argument: %d",vector);
 3097         apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
 3098         apic_isrbit_location[intr].bit = (1<<(vector & 31));
 3099 }
 3100 #endif
 3101 
 3102 /*
 3103  * All-CPU rendezvous.  CPUs are signalled, all execute the setup function 
 3104  * (if specified), rendezvous, execute the action function (if specified),
 3105  * rendezvous again, execute the teardown function (if specified), and then
 3106  * resume.
 3107  *
 3108  * Note that the supplied external functions _must_ be reentrant and aware
 3109  * that they are running in parallel and in an unknown lock context.
 3110  */
 3111 static void (*smp_rv_setup_func)(void *arg);
 3112 static void (*smp_rv_action_func)(void *arg);
 3113 static void (*smp_rv_teardown_func)(void *arg);
 3114 static void *smp_rv_func_arg;
 3115 static volatile int smp_rv_waiters[2];
 3116 
 3117 void
 3118 smp_rendezvous_action(void)
 3119 {
 3120         /* setup function */
 3121         if (smp_rv_setup_func != NULL)
 3122                 smp_rv_setup_func(smp_rv_func_arg);
 3123         /* spin on entry rendezvous */
 3124         atomic_add_int(&smp_rv_waiters[0], 1);
 3125         while (smp_rv_waiters[0] < mp_ncpus)
 3126                 ;
 3127         /* action function */
 3128         if (smp_rv_action_func != NULL)
 3129                 smp_rv_action_func(smp_rv_func_arg);
 3130         /* spin on exit rendezvous */
 3131         atomic_add_int(&smp_rv_waiters[1], 1);
 3132         while (smp_rv_waiters[1] < mp_ncpus)
 3133                 ;
 3134         /* teardown function */
 3135         if (smp_rv_teardown_func != NULL)
 3136                 smp_rv_teardown_func(smp_rv_func_arg);
 3137 }
 3138 
 3139 void
 3140 smp_rendezvous(void (* setup_func)(void *), 
 3141                void (* action_func)(void *),
 3142                void (* teardown_func)(void *),
 3143                void *arg)
 3144 {
 3145         u_int   efl;
 3146         
 3147         /* obtain rendezvous lock */
 3148         s_lock(&smp_rv_lock);           /* XXX sleep here? NOWAIT flag? */
 3149 
 3150         /* set static function pointers */
 3151         smp_rv_setup_func = setup_func;
 3152         smp_rv_action_func = action_func;
 3153         smp_rv_teardown_func = teardown_func;
 3154         smp_rv_func_arg = arg;
 3155         smp_rv_waiters[0] = 0;
 3156         smp_rv_waiters[1] = 0;
 3157 
 3158         /* disable interrupts on this CPU, save interrupt status */
 3159         efl = read_eflags();
 3160         write_eflags(efl & ~PSL_I);
 3161 
 3162         /* signal other processors, which will enter the IPI with interrupts off */
 3163         all_but_self_ipi(XRENDEZVOUS_OFFSET);
 3164 
 3165         /* call executor function */
 3166         smp_rendezvous_action();
 3167 
 3168         /* restore interrupt flag */
 3169         write_eflags(efl);
 3170 
 3171         /* release lock */
 3172         s_unlock(&smp_rv_lock);
 3173 }
 3174 
 3175 static int
 3176 sysctl_htl_cpus(SYSCTL_HANDLER_ARGS)
 3177 {
 3178         u_int mask;
 3179         int error;
 3180 
 3181         mask = hlt_cpus_mask;
 3182         error = sysctl_handle_int(oidp, &mask, 0, req);
 3183         if (error || !req->newptr)
 3184                 return (error);
 3185 
 3186         if (logical_cpus_mask != 0 &&
 3187             (mask & logical_cpus_mask) == logical_cpus_mask)
 3188                 hlt_logical_cpus = 1;
 3189         else
 3190                 hlt_logical_cpus = 0;
 3191 
 3192         if (! hyperthreading_allowed)
 3193                 mask |= hyperthreading_cpus_mask;
 3194 
 3195         if ((mask & all_cpus) == all_cpus)
 3196                 mask &= ~(1<<0);
 3197         hlt_cpus_mask = mask;
 3198         return (error);
 3199 }
 3200 SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
 3201     0, 0, sysctl_htl_cpus, "IU", "");
 3202 
 3203 static int
 3204 sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 3205 {
 3206         int disable, error;
 3207 
 3208         disable = hlt_logical_cpus;
 3209         error = sysctl_handle_int(oidp, &disable, 0, req);
 3210         if (error || !req->newptr)
 3211                 return (error);
 3212 
 3213         if (disable)
 3214                 hlt_cpus_mask |= logical_cpus_mask;
 3215         else
 3216                 hlt_cpus_mask &= ~logical_cpus_mask;
 3217 
 3218         if (! hyperthreading_allowed)
 3219                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 3220 
 3221         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 3222                 hlt_cpus_mask &= ~(1<<0);
 3223 
 3224         hlt_logical_cpus = disable;
 3225         return (error);
 3226 }
 3227 
 3228 static int
 3229 sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
 3230 {
 3231         int allowed, error;
 3232 
 3233         allowed = hyperthreading_allowed;
 3234         error = sysctl_handle_int(oidp, &allowed, 0, req);
 3235         if (error || !req->newptr)
 3236                 return (error);
 3237 
 3238         if (allowed)
 3239                 hlt_cpus_mask &= ~hyperthreading_cpus_mask;
 3240         else
 3241                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 3242 
 3243         if (logical_cpus_mask != 0 &&
 3244             (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
 3245                 hlt_logical_cpus = 1;
 3246         else
 3247                 hlt_logical_cpus = 0;
 3248 
 3249         if ((hlt_cpus_mask & all_cpus) == all_cpus)
 3250                 hlt_cpus_mask &= ~(1<<0);
 3251 
 3252         hyperthreading_allowed = allowed;
 3253         return (error);
 3254 }
 3255 
 3256 static void
 3257 cpu_hlt_setup(void *dummy __unused)
 3258 {
 3259 
 3260         if (logical_cpus_mask != 0) {
 3261                 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 3262                     &hlt_logical_cpus);
 3263                 sysctl_ctx_init(&logical_cpu_clist);
 3264                 SYSCTL_ADD_PROC(&logical_cpu_clist,
 3265                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 3266                     "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 3267                     sysctl_hlt_logical_cpus, "IU", "");
 3268                 SYSCTL_ADD_UINT(&logical_cpu_clist,
 3269                     SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 3270                     "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
 3271                     &logical_cpus_mask, 0, "");
 3272 
 3273                 if (hlt_logical_cpus)
 3274                         hlt_cpus_mask |= logical_cpus_mask;
 3275 
 3276                 /*
 3277                  * If necessary for security purposes, force
 3278                  * hyperthreading off, regardless of the value
 3279                  * of hlt_logical_cpus.
 3280                  */
 3281                 if (hyperthreading_cpus_mask) {
 3282                         TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
 3283                             &hyperthreading_allowed);
 3284                         SYSCTL_ADD_PROC(&logical_cpu_clist,
 3285                             SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 3286                             "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
 3287                             0, 0, sysctl_hyperthreading_allowed, "IU", "");
 3288                         if (! hyperthreading_allowed)
 3289                                 hlt_cpus_mask |= hyperthreading_cpus_mask;
 3290                 }
 3291         }
 3292 }
 3293 SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 3294 
 3295 int
 3296 mp_grab_cpu_hlt(void)
 3297 {
 3298         u_int mask = 1 << cpuid;
 3299         u_int temp;
 3300         int retval;
 3301 
 3302         retval = mask & hlt_cpus_mask;
 3303         while (mask & hlt_cpus_mask) {
 3304                 temp = lapic.tpr;
 3305                 lapic.tpr = LOPRIO_LEVEL;
 3306                 __asm __volatile("sti; hlt" : : : "memory");
 3307                 lapic.tpr = temp;
 3308         }
 3309         return (retval);
 3310 }

Cache object: 84185aa12bc71dfe83bbe897c5e76f79


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.