The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/pci/pci.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
    5  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
    6  * Copyright (c) 2000, BSDi
    7  * All rights reserved.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice unmodified, this list of conditions, and the following
   14  *    disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include "opt_acpi.h"
   35 #include "opt_iommu.h"
   36 #include "opt_bus.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/conf.h>
   40 #include <sys/endian.h>
   41 #include <sys/eventhandler.h>
   42 #include <sys/fcntl.h>
   43 #include <sys/kernel.h>
   44 #include <sys/limits.h>
   45 #include <sys/linker.h>
   46 #include <sys/malloc.h>
   47 #include <sys/module.h>
   48 #include <sys/queue.h>
   49 #include <sys/sbuf.h>
   50 #include <sys/sysctl.h>
   51 #include <sys/systm.h>
   52 #include <sys/taskqueue.h>
   53 #include <sys/tree.h>
   54 
   55 #include <vm/vm.h>
   56 #include <vm/pmap.h>
   57 #include <vm/vm_extern.h>
   58 
   59 #include <sys/bus.h>
   60 #include <machine/bus.h>
   61 #include <sys/rman.h>
   62 #include <machine/resource.h>
   63 #include <machine/stdarg.h>
   64 
   65 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
   66 #include <machine/intr_machdep.h>
   67 #endif
   68 
   69 #include <sys/pciio.h>
   70 #include <dev/pci/pcireg.h>
   71 #include <dev/pci/pcivar.h>
   72 #include <dev/pci/pci_private.h>
   73 
   74 #ifdef PCI_IOV
   75 #include <sys/nv.h>
   76 #include <dev/pci/pci_iov_private.h>
   77 #endif
   78 
   79 #include <dev/usb/controller/xhcireg.h>
   80 #include <dev/usb/controller/ehcireg.h>
   81 #include <dev/usb/controller/ohcireg.h>
   82 #include <dev/usb/controller/uhcireg.h>
   83 
   84 #include <dev/iommu/iommu.h>
   85 
   86 #include "pcib_if.h"
   87 #include "pci_if.h"
   88 
   89 #define PCIR_IS_BIOS(cfg, reg)                                          \
   90         (((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) || \
   91          ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
   92 
   93 static int              pci_has_quirk(uint32_t devid, int quirk);
   94 static pci_addr_t       pci_mapbase(uint64_t mapreg);
   95 static const char       *pci_maptype(uint64_t mapreg);
   96 static int              pci_maprange(uint64_t mapreg);
   97 static pci_addr_t       pci_rombase(uint64_t mapreg);
   98 static int              pci_romsize(uint64_t testval);
   99 static void             pci_fixancient(pcicfgregs *cfg);
  100 static int              pci_printf(pcicfgregs *cfg, const char *fmt, ...);
  101 
  102 static int              pci_porten(device_t dev);
  103 static int              pci_memen(device_t dev);
  104 static void             pci_assign_interrupt(device_t bus, device_t dev,
  105                             int force_route);
  106 static int              pci_add_map(device_t bus, device_t dev, int reg,
  107                             struct resource_list *rl, int force, int prefetch);
  108 static int              pci_probe(device_t dev);
  109 static void             pci_load_vendor_data(void);
  110 static int              pci_describe_parse_line(char **ptr, int *vendor,
  111                             int *device, char **desc);
  112 static char             *pci_describe_device(device_t dev);
  113 static int              pci_modevent(module_t mod, int what, void *arg);
  114 static void             pci_hdrtypedata(device_t pcib, int b, int s, int f,
  115                             pcicfgregs *cfg);
  116 static void             pci_read_cap(device_t pcib, pcicfgregs *cfg);
  117 static int              pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
  118                             int reg, uint32_t *data);
  119 #if 0
  120 static int              pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
  121                             int reg, uint32_t data);
  122 #endif
  123 static void             pci_read_vpd(device_t pcib, pcicfgregs *cfg);
  124 static void             pci_mask_msix(device_t dev, u_int index);
  125 static void             pci_unmask_msix(device_t dev, u_int index);
  126 static int              pci_msi_blacklisted(void);
  127 static int              pci_msix_blacklisted(void);
  128 static void             pci_resume_msi(device_t dev);
  129 static void             pci_resume_msix(device_t dev);
  130 static int              pci_remap_intr_method(device_t bus, device_t dev,
  131                             u_int irq);
  132 static void             pci_hint_device_unit(device_t acdev, device_t child,
  133                             const char *name, int *unitp);
  134 static int              pci_reset_post(device_t dev, device_t child);
  135 static int              pci_reset_prepare(device_t dev, device_t child);
  136 static int              pci_reset_child(device_t dev, device_t child,
  137                             int flags);
  138 
  139 static int              pci_get_id_method(device_t dev, device_t child,
  140                             enum pci_id_type type, uintptr_t *rid);
  141 static struct pci_devinfo * pci_fill_devinfo(device_t pcib, device_t bus, int d,
  142     int b, int s, int f, uint16_t vid, uint16_t did);
  143 
  144 static device_method_t pci_methods[] = {
  145         /* Device interface */
  146         DEVMETHOD(device_probe,         pci_probe),
  147         DEVMETHOD(device_attach,        pci_attach),
  148         DEVMETHOD(device_detach,        pci_detach),
  149         DEVMETHOD(device_shutdown,      bus_generic_shutdown),
  150         DEVMETHOD(device_suspend,       bus_generic_suspend),
  151         DEVMETHOD(device_resume,        pci_resume),
  152 
  153         /* Bus interface */
  154         DEVMETHOD(bus_print_child,      pci_print_child),
  155         DEVMETHOD(bus_probe_nomatch,    pci_probe_nomatch),
  156         DEVMETHOD(bus_read_ivar,        pci_read_ivar),
  157         DEVMETHOD(bus_write_ivar,       pci_write_ivar),
  158         DEVMETHOD(bus_driver_added,     pci_driver_added),
  159         DEVMETHOD(bus_setup_intr,       pci_setup_intr),
  160         DEVMETHOD(bus_teardown_intr,    pci_teardown_intr),
  161         DEVMETHOD(bus_reset_prepare,    pci_reset_prepare),
  162         DEVMETHOD(bus_reset_post,       pci_reset_post),
  163         DEVMETHOD(bus_reset_child,      pci_reset_child),
  164 
  165         DEVMETHOD(bus_get_dma_tag,      pci_get_dma_tag),
  166         DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
  167         DEVMETHOD(bus_set_resource,     bus_generic_rl_set_resource),
  168         DEVMETHOD(bus_get_resource,     bus_generic_rl_get_resource),
  169         DEVMETHOD(bus_delete_resource,  pci_delete_resource),
  170         DEVMETHOD(bus_alloc_resource,   pci_alloc_resource),
  171         DEVMETHOD(bus_adjust_resource,  bus_generic_adjust_resource),
  172         DEVMETHOD(bus_release_resource, pci_release_resource),
  173         DEVMETHOD(bus_activate_resource, pci_activate_resource),
  174         DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
  175         DEVMETHOD(bus_child_deleted,    pci_child_deleted),
  176         DEVMETHOD(bus_child_detached,   pci_child_detached),
  177         DEVMETHOD(bus_child_pnpinfo,    pci_child_pnpinfo_method),
  178         DEVMETHOD(bus_child_location,   pci_child_location_method),
  179         DEVMETHOD(bus_get_device_path,  pci_get_device_path_method),
  180         DEVMETHOD(bus_hint_device_unit, pci_hint_device_unit),
  181         DEVMETHOD(bus_remap_intr,       pci_remap_intr_method),
  182         DEVMETHOD(bus_suspend_child,    pci_suspend_child),
  183         DEVMETHOD(bus_resume_child,     pci_resume_child),
  184         DEVMETHOD(bus_rescan,           pci_rescan_method),
  185 
  186         /* PCI interface */
  187         DEVMETHOD(pci_read_config,      pci_read_config_method),
  188         DEVMETHOD(pci_write_config,     pci_write_config_method),
  189         DEVMETHOD(pci_enable_busmaster, pci_enable_busmaster_method),
  190         DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
  191         DEVMETHOD(pci_enable_io,        pci_enable_io_method),
  192         DEVMETHOD(pci_disable_io,       pci_disable_io_method),
  193         DEVMETHOD(pci_get_vpd_ident,    pci_get_vpd_ident_method),
  194         DEVMETHOD(pci_get_vpd_readonly, pci_get_vpd_readonly_method),
  195         DEVMETHOD(pci_get_powerstate,   pci_get_powerstate_method),
  196         DEVMETHOD(pci_set_powerstate,   pci_set_powerstate_method),
  197         DEVMETHOD(pci_assign_interrupt, pci_assign_interrupt_method),
  198         DEVMETHOD(pci_find_cap,         pci_find_cap_method),
  199         DEVMETHOD(pci_find_next_cap,    pci_find_next_cap_method),
  200         DEVMETHOD(pci_find_extcap,      pci_find_extcap_method),
  201         DEVMETHOD(pci_find_next_extcap, pci_find_next_extcap_method),
  202         DEVMETHOD(pci_find_htcap,       pci_find_htcap_method),
  203         DEVMETHOD(pci_find_next_htcap,  pci_find_next_htcap_method),
  204         DEVMETHOD(pci_alloc_msi,        pci_alloc_msi_method),
  205         DEVMETHOD(pci_alloc_msix,       pci_alloc_msix_method),
  206         DEVMETHOD(pci_enable_msi,       pci_enable_msi_method),
  207         DEVMETHOD(pci_enable_msix,      pci_enable_msix_method),
  208         DEVMETHOD(pci_disable_msi,      pci_disable_msi_method),
  209         DEVMETHOD(pci_remap_msix,       pci_remap_msix_method),
  210         DEVMETHOD(pci_release_msi,      pci_release_msi_method),
  211         DEVMETHOD(pci_msi_count,        pci_msi_count_method),
  212         DEVMETHOD(pci_msix_count,       pci_msix_count_method),
  213         DEVMETHOD(pci_msix_pba_bar,     pci_msix_pba_bar_method),
  214         DEVMETHOD(pci_msix_table_bar,   pci_msix_table_bar_method),
  215         DEVMETHOD(pci_get_id,           pci_get_id_method),
  216         DEVMETHOD(pci_alloc_devinfo,    pci_alloc_devinfo_method),
  217         DEVMETHOD(pci_child_added,      pci_child_added_method),
  218 #ifdef PCI_IOV
  219         DEVMETHOD(pci_iov_attach,       pci_iov_attach_method),
  220         DEVMETHOD(pci_iov_detach,       pci_iov_detach_method),
  221         DEVMETHOD(pci_create_iov_child, pci_create_iov_child_method),
  222 #endif
  223 
  224         DEVMETHOD_END
  225 };
  226 
  227 DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
  228 
  229 EARLY_DRIVER_MODULE(pci, pcib, pci_driver, pci_modevent, NULL, BUS_PASS_BUS);
  230 MODULE_VERSION(pci, 1);
  231 
  232 static char     *pci_vendordata;
  233 static size_t   pci_vendordata_size;
  234 
  235 struct pci_quirk {
  236         uint32_t devid; /* Vendor/device of the card */
  237         int     type;
  238 #define PCI_QUIRK_MAP_REG       1 /* PCI map register in weird place */
  239 #define PCI_QUIRK_DISABLE_MSI   2 /* Neither MSI nor MSI-X work */
  240 #define PCI_QUIRK_ENABLE_MSI_VM 3 /* Older chipset in VM where MSI works */
  241 #define PCI_QUIRK_UNMAP_REG     4 /* Ignore PCI map register */
  242 #define PCI_QUIRK_DISABLE_MSIX  5 /* MSI-X doesn't work */
  243 #define PCI_QUIRK_MSI_INTX_BUG  6 /* PCIM_CMD_INTxDIS disables MSI */
  244 #define PCI_QUIRK_REALLOC_BAR   7 /* Can't allocate memory at the default address */
  245         int     arg1;
  246         int     arg2;
  247 };
  248 
  249 static const struct pci_quirk pci_quirks[] = {
  250         /* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
  251         { 0x71138086, PCI_QUIRK_MAP_REG,        0x90,    0 },
  252         { 0x719b8086, PCI_QUIRK_MAP_REG,        0x90,    0 },
  253         /* As does the Serverworks OSB4 (the SMBus mapping register) */
  254         { 0x02001166, PCI_QUIRK_MAP_REG,        0x90,    0 },
  255 
  256         /*
  257          * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
  258          * or the CMIC-SL (AKA ServerWorks GC_LE).
  259          */
  260         { 0x00141166, PCI_QUIRK_DISABLE_MSI,    0,      0 },
  261         { 0x00171166, PCI_QUIRK_DISABLE_MSI,    0,      0 },
  262 
  263         /*
  264          * MSI doesn't work on earlier Intel chipsets including
  265          * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
  266          */
  267         { 0x25408086, PCI_QUIRK_DISABLE_MSI,    0,      0 },
  268         { 0x254c8086, PCI_QUIRK_DISABLE_MSI,    0,      0 },
  269         { 0x25508086, PCI_QUIRK_DISABLE_MSI,    0,      0 },
  270         { 0x25608086, PCI_QUIRK_DISABLE_MSI,    0,      0 },
  271         { 0x25708086, PCI_QUIRK_DISABLE_MSI,    0,      0 },
  272         { 0x25788086, PCI_QUIRK_DISABLE_MSI,    0,      0 },
  273         { 0x35808086, PCI_QUIRK_DISABLE_MSI,    0,      0 },
  274 
  275         /*
  276          * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
  277          * bridge.
  278          */
  279         { 0x74501022, PCI_QUIRK_DISABLE_MSI,    0,      0 },
  280 
  281         /*
  282          * Some virtualization environments emulate an older chipset
  283          * but support MSI just fine.  QEMU uses the Intel 82440.
  284          */
  285         { 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,  0,      0 },
  286 
  287         /*
  288          * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
  289          * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
  290          * It prevents us from attaching hpet(4) when the bit is unset.
  291          * Note this quirk only affects SB600 revision A13 and earlier.
  292          * For SB600 A21 and later, firmware must set the bit to hide it.
  293          * For SB700 and later, it is unused and hardcoded to zero.
  294          */
  295         { 0x43851002, PCI_QUIRK_UNMAP_REG,      0x14,   0 },
  296 
  297         /*
  298          * Atheros AR8161/AR8162/E2200/E2400/E2500 Ethernet controllers have
  299          * a bug that MSI interrupt does not assert if PCIM_CMD_INTxDIS bit
  300          * of the command register is set.
  301          */
  302         { 0x10911969, PCI_QUIRK_MSI_INTX_BUG,   0,      0 },
  303         { 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,   0,      0 },
  304         { 0xE0A11969, PCI_QUIRK_MSI_INTX_BUG,   0,      0 },
  305         { 0xE0B11969, PCI_QUIRK_MSI_INTX_BUG,   0,      0 },
  306         { 0x10901969, PCI_QUIRK_MSI_INTX_BUG,   0,      0 },
  307 
  308         /*
  309          * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
  310          * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
  311          */
  312         { 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,   0,      0 }, /* BCM5714 */
  313         { 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,   0,      0 }, /* BCM5714S */
  314         { 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,   0,      0 }, /* BCM5780 */
  315         { 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,   0,      0 }, /* BCM5780S */
  316         { 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,   0,      0 }, /* BCM5715 */
  317         { 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,   0,      0 }, /* BCM5715S */
  318 
  319         /*
  320          * HPE Gen 10 VGA has a memory range that can't be allocated in the
  321          * expected place.
  322          */
  323         { 0x98741002, PCI_QUIRK_REALLOC_BAR,    0,      0 },
  324         { 0 }
  325 };
  326 
  327 /* map register information */
  328 #define PCI_MAPMEM      0x01    /* memory map */
  329 #define PCI_MAPMEMP     0x02    /* prefetchable memory map */
  330 #define PCI_MAPPORT     0x04    /* port map */
  331 
  332 struct devlist pci_devq;
  333 uint32_t pci_generation;
  334 uint32_t pci_numdevs = 0;
  335 static int pcie_chipset, pcix_chipset;
  336 
  337 /* sysctl vars */
  338 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  339     "PCI bus tuning parameters");
  340 
  341 static int pci_enable_io_modes = 1;
  342 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN,
  343     &pci_enable_io_modes, 1,
  344     "Enable I/O and memory bits in the config register.  Some BIOSes do not"
  345     " enable these bits correctly.  We'd like to do this all the time, but"
  346     " there are some peripherals that this causes problems with.");
  347 
  348 static int pci_do_realloc_bars = 1;
  349 SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN,
  350     &pci_do_realloc_bars, 0,
  351     "Attempt to allocate a new range for any BARs whose original "
  352     "firmware-assigned ranges fail to allocate during the initial device scan.");
  353 
  354 static int pci_do_power_nodriver = 0;
  355 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RWTUN,
  356     &pci_do_power_nodriver, 0,
  357     "Place a function into D3 state when no driver attaches to it.  0 means"
  358     " disable.  1 means conservatively place devices into D3 state.  2 means"
  359     " aggressively place devices into D3 state.  3 means put absolutely"
  360     " everything in D3 state.");
  361 
  362 int pci_do_power_resume = 1;
  363 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RWTUN,
  364     &pci_do_power_resume, 1,
  365   "Transition from D3 -> D0 on resume.");
  366 
  367 int pci_do_power_suspend = 1;
  368 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RWTUN,
  369     &pci_do_power_suspend, 1,
  370   "Transition from D0 -> D3 on suspend.");
  371 
  372 static int pci_do_msi = 1;
  373 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RWTUN, &pci_do_msi, 1,
  374     "Enable support for MSI interrupts");
  375 
  376 static int pci_do_msix = 1;
  377 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RWTUN, &pci_do_msix, 1,
  378     "Enable support for MSI-X interrupts");
  379 
  380 static int pci_msix_rewrite_table = 0;
  381 SYSCTL_INT(_hw_pci, OID_AUTO, msix_rewrite_table, CTLFLAG_RWTUN,
  382     &pci_msix_rewrite_table, 0,
  383     "Rewrite entire MSI-X table when updating MSI-X entries");
  384 
  385 static int pci_honor_msi_blacklist = 1;
  386 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RDTUN,
  387     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
  388 
  389 #if defined(__i386__) || defined(__amd64__)
  390 static int pci_usb_takeover = 1;
  391 #else
  392 static int pci_usb_takeover = 0;
  393 #endif
  394 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
  395     &pci_usb_takeover, 1,
  396     "Enable early takeover of USB controllers. Disable this if you depend on"
  397     " BIOS emulation of USB devices, that is you use USB devices (like"
  398     " keyboard or mouse) but do not load USB drivers");
  399 
  400 static int pci_clear_bars;
  401 SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
  402     "Ignore firmware-assigned resources for BARs.");
  403 
  404 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
  405 static int pci_clear_buses;
  406 SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
  407     "Ignore firmware-assigned bus numbers.");
  408 #endif
  409 
  410 static int pci_enable_ari = 1;
  411 SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
  412     0, "Enable support for PCIe Alternative RID Interpretation");
  413 
  414 int pci_enable_aspm = 1;
  415 SYSCTL_INT(_hw_pci, OID_AUTO, enable_aspm, CTLFLAG_RDTUN, &pci_enable_aspm,
  416     0, "Enable support for PCIe Active State Power Management");
  417 
  418 static int pci_clear_aer_on_attach = 0;
  419 SYSCTL_INT(_hw_pci, OID_AUTO, clear_aer_on_attach, CTLFLAG_RWTUN,
  420     &pci_clear_aer_on_attach, 0,
  421     "Clear port and device AER state on driver attach");
  422 
  423 static int
  424 pci_has_quirk(uint32_t devid, int quirk)
  425 {
  426         const struct pci_quirk *q;
  427 
  428         for (q = &pci_quirks[0]; q->devid; q++) {
  429                 if (q->devid == devid && q->type == quirk)
  430                         return (1);
  431         }
  432         return (0);
  433 }
  434 
  435 /* Find a device_t by bus/slot/function in domain 0 */
  436 
  437 device_t
  438 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
  439 {
  440 
  441         return (pci_find_dbsf(0, bus, slot, func));
  442 }
  443 
  444 /* Find a device_t by domain/bus/slot/function */
  445 
  446 device_t
  447 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
  448 {
  449         struct pci_devinfo *dinfo = NULL;
  450 
  451         STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
  452                 if ((dinfo->cfg.domain == domain) &&
  453                     (dinfo->cfg.bus == bus) &&
  454                     (dinfo->cfg.slot == slot) &&
  455                     (dinfo->cfg.func == func)) {
  456                         break;
  457                 }
  458         }
  459 
  460         return (dinfo != NULL ? dinfo->cfg.dev : NULL);
  461 }
  462 
  463 /* Find a device_t by vendor/device ID */
  464 
  465 device_t
  466 pci_find_device(uint16_t vendor, uint16_t device)
  467 {
  468         struct pci_devinfo *dinfo;
  469 
  470         STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
  471                 if ((dinfo->cfg.vendor == vendor) &&
  472                     (dinfo->cfg.device == device)) {
  473                         return (dinfo->cfg.dev);
  474                 }
  475         }
  476 
  477         return (NULL);
  478 }
  479 
  480 device_t
  481 pci_find_class(uint8_t class, uint8_t subclass)
  482 {
  483         struct pci_devinfo *dinfo;
  484 
  485         STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
  486                 if (dinfo->cfg.baseclass == class &&
  487                     dinfo->cfg.subclass == subclass) {
  488                         return (dinfo->cfg.dev);
  489                 }
  490         }
  491 
  492         return (NULL);
  493 }
  494 
  495 device_t
  496 pci_find_class_from(uint8_t class, uint8_t subclass, device_t from)
  497 {
  498         struct pci_devinfo *dinfo;
  499         bool found = false;
  500 
  501         STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
  502                 if (from != NULL && found == false) {
  503                         if (from != dinfo->cfg.dev)
  504                                 continue;
  505                         found = true;
  506                         continue;
  507                 }
  508                 if (dinfo->cfg.baseclass == class &&
  509                     dinfo->cfg.subclass == subclass) {
  510                         return (dinfo->cfg.dev);
  511                 }
  512         }
  513 
  514         return (NULL);
  515 }
  516 
  517 static int
  518 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
  519 {
  520         va_list ap;
  521         int retval;
  522 
  523         retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
  524             cfg->func);
  525         va_start(ap, fmt);
  526         retval += vprintf(fmt, ap);
  527         va_end(ap);
  528         return (retval);
  529 }
  530 
  531 /* return base address of memory or port map */
  532 
  533 static pci_addr_t
  534 pci_mapbase(uint64_t mapreg)
  535 {
  536 
  537         if (PCI_BAR_MEM(mapreg))
  538                 return (mapreg & PCIM_BAR_MEM_BASE);
  539         else
  540                 return (mapreg & PCIM_BAR_IO_BASE);
  541 }
  542 
  543 /* return map type of memory or port map */
  544 
  545 static const char *
  546 pci_maptype(uint64_t mapreg)
  547 {
  548 
  549         if (PCI_BAR_IO(mapreg))
  550                 return ("I/O Port");
  551         if (mapreg & PCIM_BAR_MEM_PREFETCH)
  552                 return ("Prefetchable Memory");
  553         return ("Memory");
  554 }
  555 
  556 /* return log2 of map size decoded for memory or port map */
  557 
  558 int
  559 pci_mapsize(uint64_t testval)
  560 {
  561         int ln2size;
  562 
  563         testval = pci_mapbase(testval);
  564         ln2size = 0;
  565         if (testval != 0) {
  566                 while ((testval & 1) == 0)
  567                 {
  568                         ln2size++;
  569                         testval >>= 1;
  570                 }
  571         }
  572         return (ln2size);
  573 }
  574 
  575 /* return base address of device ROM */
  576 
  577 static pci_addr_t
  578 pci_rombase(uint64_t mapreg)
  579 {
  580 
  581         return (mapreg & PCIM_BIOS_ADDR_MASK);
  582 }
  583 
  584 /* return log2 of map size decided for device ROM */
  585 
  586 static int
  587 pci_romsize(uint64_t testval)
  588 {
  589         int ln2size;
  590 
  591         testval = pci_rombase(testval);
  592         ln2size = 0;
  593         if (testval != 0) {
  594                 while ((testval & 1) == 0)
  595                 {
  596                         ln2size++;
  597                         testval >>= 1;
  598                 }
  599         }
  600         return (ln2size);
  601 }
  602 
  603 /* return log2 of address range supported by map register */
  604 
  605 static int
  606 pci_maprange(uint64_t mapreg)
  607 {
  608         int ln2range = 0;
  609 
  610         if (PCI_BAR_IO(mapreg))
  611                 ln2range = 32;
  612         else
  613                 switch (mapreg & PCIM_BAR_MEM_TYPE) {
  614                 case PCIM_BAR_MEM_32:
  615                         ln2range = 32;
  616                         break;
  617                 case PCIM_BAR_MEM_1MB:
  618                         ln2range = 20;
  619                         break;
  620                 case PCIM_BAR_MEM_64:
  621                         ln2range = 64;
  622                         break;
  623                 }
  624         return (ln2range);
  625 }
  626 
  627 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
  628 
  629 static void
  630 pci_fixancient(pcicfgregs *cfg)
  631 {
  632         if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
  633                 return;
  634 
  635         /* PCI to PCI bridges use header type 1 */
  636         if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
  637                 cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
  638 }
  639 
  640 /* extract header type specific config data */
  641 
  642 static void
  643 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
  644 {
  645 #define REG(n, w)       PCIB_READ_CONFIG(pcib, b, s, f, n, w)
  646         switch (cfg->hdrtype & PCIM_HDRTYPE) {
  647         case PCIM_HDRTYPE_NORMAL:
  648                 cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
  649                 cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
  650                 cfg->mingnt         = REG(PCIR_MINGNT, 1);
  651                 cfg->maxlat         = REG(PCIR_MAXLAT, 1);
  652                 cfg->nummaps        = PCI_MAXMAPS_0;
  653                 break;
  654         case PCIM_HDRTYPE_BRIDGE:
  655                 cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
  656                 cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
  657                 cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
  658                 cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
  659                 cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
  660                 cfg->nummaps        = PCI_MAXMAPS_1;
  661                 break;
  662         case PCIM_HDRTYPE_CARDBUS:
  663                 cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
  664                 cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
  665                 cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
  666                 cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
  667                 cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
  668                 cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
  669                 cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
  670                 cfg->nummaps        = PCI_MAXMAPS_2;
  671                 break;
  672         }
  673 #undef REG
  674 }
  675 
  676 /* read configuration header into pcicfgregs structure */
  677 struct pci_devinfo *
  678 pci_read_device(device_t pcib, device_t bus, int d, int b, int s, int f)
  679 {
  680 #define REG(n, w)       PCIB_READ_CONFIG(pcib, b, s, f, n, w)
  681         uint16_t vid, did;
  682 
  683         vid = REG(PCIR_VENDOR, 2);
  684         if (vid == PCIV_INVALID)
  685                 return (NULL);
  686 
  687         did = REG(PCIR_DEVICE, 2);
  688 
  689         return (pci_fill_devinfo(pcib, bus, d, b, s, f, vid, did));
  690 }
  691 
  692 struct pci_devinfo *
  693 pci_alloc_devinfo_method(device_t dev)
  694 {
  695 
  696         return (malloc(sizeof(struct pci_devinfo), M_DEVBUF,
  697             M_WAITOK | M_ZERO));
  698 }
  699 
  700 static struct pci_devinfo *
  701 pci_fill_devinfo(device_t pcib, device_t bus, int d, int b, int s, int f,
  702     uint16_t vid, uint16_t did)
  703 {
  704         struct pci_devinfo *devlist_entry;
  705         pcicfgregs *cfg;
  706 
  707         devlist_entry = PCI_ALLOC_DEVINFO(bus);
  708 
  709         cfg = &devlist_entry->cfg;
  710 
  711         cfg->domain             = d;
  712         cfg->bus                = b;
  713         cfg->slot               = s;
  714         cfg->func               = f;
  715         cfg->vendor             = vid;
  716         cfg->device             = did;
  717         cfg->cmdreg             = REG(PCIR_COMMAND, 2);
  718         cfg->statreg            = REG(PCIR_STATUS, 2);
  719         cfg->baseclass          = REG(PCIR_CLASS, 1);
  720         cfg->subclass           = REG(PCIR_SUBCLASS, 1);
  721         cfg->progif             = REG(PCIR_PROGIF, 1);
  722         cfg->revid              = REG(PCIR_REVID, 1);
  723         cfg->hdrtype            = REG(PCIR_HDRTYPE, 1);
  724         cfg->cachelnsz          = REG(PCIR_CACHELNSZ, 1);
  725         cfg->lattimer           = REG(PCIR_LATTIMER, 1);
  726         cfg->intpin             = REG(PCIR_INTPIN, 1);
  727         cfg->intline            = REG(PCIR_INTLINE, 1);
  728 
  729         cfg->mfdev              = (cfg->hdrtype & PCIM_MFDEV) != 0;
  730         cfg->hdrtype            &= ~PCIM_MFDEV;
  731         STAILQ_INIT(&cfg->maps);
  732 
  733         cfg->iov                = NULL;
  734 
  735         pci_fixancient(cfg);
  736         pci_hdrtypedata(pcib, b, s, f, cfg);
  737 
  738         if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
  739                 pci_read_cap(pcib, cfg);
  740 
  741         STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
  742 
  743         devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
  744         devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
  745         devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
  746         devlist_entry->conf.pc_sel.pc_func = cfg->func;
  747         devlist_entry->conf.pc_hdr = cfg->hdrtype;
  748 
  749         devlist_entry->conf.pc_subvendor = cfg->subvendor;
  750         devlist_entry->conf.pc_subdevice = cfg->subdevice;
  751         devlist_entry->conf.pc_vendor = cfg->vendor;
  752         devlist_entry->conf.pc_device = cfg->device;
  753 
  754         devlist_entry->conf.pc_class = cfg->baseclass;
  755         devlist_entry->conf.pc_subclass = cfg->subclass;
  756         devlist_entry->conf.pc_progif = cfg->progif;
  757         devlist_entry->conf.pc_revid = cfg->revid;
  758 
  759         pci_numdevs++;
  760         pci_generation++;
  761 
  762         return (devlist_entry);
  763 }
  764 #undef REG
  765 
  766 static void
  767 pci_ea_fill_info(device_t pcib, pcicfgregs *cfg)
  768 {
  769 #define REG(n, w)       PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, \
  770     cfg->ea.ea_location + (n), w)
  771         int num_ent;
  772         int ptr;
  773         int a, b;
  774         uint32_t val;
  775         int ent_size;
  776         uint32_t dw[4];
  777         uint64_t base, max_offset;
  778         struct pci_ea_entry *eae;
  779 
  780         if (cfg->ea.ea_location == 0)
  781                 return;
  782 
  783         STAILQ_INIT(&cfg->ea.ea_entries);
  784 
  785         /* Determine the number of entries */
  786         num_ent = REG(PCIR_EA_NUM_ENT, 2);
  787         num_ent &= PCIM_EA_NUM_ENT_MASK;
  788 
  789         /* Find the first entry to care of */
  790         ptr = PCIR_EA_FIRST_ENT;
  791 
  792         /* Skip DWORD 2 for type 1 functions */
  793         if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE)
  794                 ptr += 4;
  795 
  796         for (a = 0; a < num_ent; a++) {
  797                 eae = malloc(sizeof(*eae), M_DEVBUF, M_WAITOK | M_ZERO);
  798                 eae->eae_cfg_offset = cfg->ea.ea_location + ptr;
  799 
  800                 /* Read a number of dwords in the entry */
  801                 val = REG(ptr, 4);
  802                 ptr += 4;
  803                 ent_size = (val & PCIM_EA_ES);
  804 
  805                 for (b = 0; b < ent_size; b++) {
  806                         dw[b] = REG(ptr, 4);
  807                         ptr += 4;
  808                 }
  809 
  810                 eae->eae_flags = val;
  811                 eae->eae_bei = (PCIM_EA_BEI & val) >> PCIM_EA_BEI_OFFSET;
  812 
  813                 base = dw[0] & PCIM_EA_FIELD_MASK;
  814                 max_offset = dw[1] | ~PCIM_EA_FIELD_MASK;
  815                 b = 2;
  816                 if (((dw[0] & PCIM_EA_IS_64) != 0) && (b < ent_size)) {
  817                         base |= (uint64_t)dw[b] << 32UL;
  818                         b++;
  819                 }
  820                 if (((dw[1] & PCIM_EA_IS_64) != 0)
  821                     && (b < ent_size)) {
  822                         max_offset |= (uint64_t)dw[b] << 32UL;
  823                         b++;
  824                 }
  825 
  826                 eae->eae_base = base;
  827                 eae->eae_max_offset = max_offset;
  828 
  829                 STAILQ_INSERT_TAIL(&cfg->ea.ea_entries, eae, eae_link);
  830 
  831                 if (bootverbose) {
  832                         printf("PCI(EA) dev %04x:%04x, bei %d, flags #%x, base #%jx, max_offset #%jx\n",
  833                             cfg->vendor, cfg->device, eae->eae_bei, eae->eae_flags,
  834                             (uintmax_t)eae->eae_base, (uintmax_t)eae->eae_max_offset);
  835                 }
  836         }
  837 }
  838 #undef REG
  839 
  840 static void
  841 pci_read_cap(device_t pcib, pcicfgregs *cfg)
  842 {
  843 #define REG(n, w)       PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
  844 #define WREG(n, v, w)   PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
  845 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
  846         uint64_t addr;
  847 #endif
  848         uint32_t val;
  849         int     ptr, nextptr, ptrptr;
  850 
  851         switch (cfg->hdrtype & PCIM_HDRTYPE) {
  852         case PCIM_HDRTYPE_NORMAL:
  853         case PCIM_HDRTYPE_BRIDGE:
  854                 ptrptr = PCIR_CAP_PTR;
  855                 break;
  856         case PCIM_HDRTYPE_CARDBUS:
  857                 ptrptr = PCIR_CAP_PTR_2;        /* cardbus capabilities ptr */
  858                 break;
  859         default:
  860                 return;         /* no extended capabilities support */
  861         }
  862         nextptr = REG(ptrptr, 1);       /* sanity check? */
  863 
  864         /*
  865          * Read capability entries.
  866          */
  867         while (nextptr != 0) {
  868                 /* Sanity check */
  869                 if (nextptr > 255) {
  870                         printf("illegal PCI extended capability offset %d\n",
  871                             nextptr);
  872                         return;
  873                 }
  874                 /* Find the next entry */
  875                 ptr = nextptr;
  876                 nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
  877 
  878                 /* Process this entry */
  879                 switch (REG(ptr + PCICAP_ID, 1)) {
  880                 case PCIY_PMG:          /* PCI power management */
  881                         if (cfg->pp.pp_cap == 0) {
  882                                 cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
  883                                 cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
  884                                 cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
  885                                 if ((nextptr - ptr) > PCIR_POWER_DATA)
  886                                         cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
  887                         }
  888                         break;
  889                 case PCIY_HT:           /* HyperTransport */
  890                         /* Determine HT-specific capability type. */
  891                         val = REG(ptr + PCIR_HT_COMMAND, 2);
  892 
  893                         if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
  894                                 cfg->ht.ht_slave = ptr;
  895 
  896 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
  897                         switch (val & PCIM_HTCMD_CAP_MASK) {
  898                         case PCIM_HTCAP_MSI_MAPPING:
  899                                 if (!(val & PCIM_HTCMD_MSI_FIXED)) {
  900                                         /* Sanity check the mapping window. */
  901                                         addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
  902                                             4);
  903                                         addr <<= 32;
  904                                         addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
  905                                             4);
  906                                         if (addr != MSI_INTEL_ADDR_BASE)
  907                                                 device_printf(pcib,
  908             "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
  909                                                     cfg->domain, cfg->bus,
  910                                                     cfg->slot, cfg->func,
  911                                                     (long long)addr);
  912                                 } else
  913                                         addr = MSI_INTEL_ADDR_BASE;
  914 
  915                                 cfg->ht.ht_msimap = ptr;
  916                                 cfg->ht.ht_msictrl = val;
  917                                 cfg->ht.ht_msiaddr = addr;
  918                                 break;
  919                         }
  920 #endif
  921                         break;
  922                 case PCIY_MSI:          /* PCI MSI */
  923                         cfg->msi.msi_location = ptr;
  924                         cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
  925                         cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
  926                                                      PCIM_MSICTRL_MMC_MASK)>>1);
  927                         break;
  928                 case PCIY_MSIX:         /* PCI MSI-X */
  929                         cfg->msix.msix_location = ptr;
  930                         cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
  931                         cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
  932                             PCIM_MSIXCTRL_TABLE_SIZE) + 1;
  933                         val = REG(ptr + PCIR_MSIX_TABLE, 4);
  934                         cfg->msix.msix_table_bar = PCIR_BAR(val &
  935                             PCIM_MSIX_BIR_MASK);
  936                         cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
  937                         val = REG(ptr + PCIR_MSIX_PBA, 4);
  938                         cfg->msix.msix_pba_bar = PCIR_BAR(val &
  939                             PCIM_MSIX_BIR_MASK);
  940                         cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
  941                         break;
  942                 case PCIY_VPD:          /* PCI Vital Product Data */
  943                         cfg->vpd.vpd_reg = ptr;
  944                         break;
  945                 case PCIY_SUBVENDOR:
  946                         /* Should always be true. */
  947                         if ((cfg->hdrtype & PCIM_HDRTYPE) ==
  948                             PCIM_HDRTYPE_BRIDGE) {
  949                                 val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
  950                                 cfg->subvendor = val & 0xffff;
  951                                 cfg->subdevice = val >> 16;
  952                         }
  953                         break;
  954                 case PCIY_PCIX:         /* PCI-X */
  955                         /*
  956                          * Assume we have a PCI-X chipset if we have
  957                          * at least one PCI-PCI bridge with a PCI-X
  958                          * capability.  Note that some systems with
  959                          * PCI-express or HT chipsets might match on
  960                          * this check as well.
  961                          */
  962                         if ((cfg->hdrtype & PCIM_HDRTYPE) ==
  963                             PCIM_HDRTYPE_BRIDGE)
  964                                 pcix_chipset = 1;
  965                         cfg->pcix.pcix_location = ptr;
  966                         break;
  967                 case PCIY_EXPRESS:      /* PCI-express */
  968                         /*
  969                          * Assume we have a PCI-express chipset if we have
  970                          * at least one PCI-express device.
  971                          */
  972                         pcie_chipset = 1;
  973                         cfg->pcie.pcie_location = ptr;
  974                         val = REG(ptr + PCIER_FLAGS, 2);
  975                         cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
  976                         break;
  977                 case PCIY_EA:           /* Enhanced Allocation */
  978                         cfg->ea.ea_location = ptr;
  979                         pci_ea_fill_info(pcib, cfg);
  980                         break;
  981                 default:
  982                         break;
  983                 }
  984         }
  985 
  986 #if defined(__powerpc__)
  987         /*
  988          * Enable the MSI mapping window for all HyperTransport
  989          * slaves.  PCI-PCI bridges have their windows enabled via
  990          * PCIB_MAP_MSI().
  991          */
  992         if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
  993             !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
  994                 device_printf(pcib,
  995             "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
  996                     cfg->domain, cfg->bus, cfg->slot, cfg->func);
  997                  cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
  998                  WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
  999                      2);
 1000         }
 1001 #endif
 1002 /* REG and WREG use carry through to next functions */
 1003 }
 1004 
 1005 /*
 1006  * PCI Vital Product Data
 1007  */
 1008 
 1009 #define PCI_VPD_TIMEOUT         1000000
 1010 
 1011 static int
 1012 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
 1013 {
 1014         int count = PCI_VPD_TIMEOUT;
 1015 
 1016         KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
 1017 
 1018         WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
 1019 
 1020         while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
 1021                 if (--count < 0)
 1022                         return (ENXIO);
 1023                 DELAY(1);       /* limit looping */
 1024         }
 1025         *data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
 1026 
 1027         return (0);
 1028 }
 1029 
 1030 #if 0
 1031 static int
 1032 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
 1033 {
 1034         int count = PCI_VPD_TIMEOUT;
 1035 
 1036         KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
 1037 
 1038         WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
 1039         WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
 1040         while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
 1041                 if (--count < 0)
 1042                         return (ENXIO);
 1043                 DELAY(1);       /* limit looping */
 1044         }
 1045 
 1046         return (0);
 1047 }
 1048 #endif
 1049 
 1050 #undef PCI_VPD_TIMEOUT
 1051 
 1052 struct vpd_readstate {
 1053         device_t        pcib;
 1054         pcicfgregs      *cfg;
 1055         uint32_t        val;
 1056         int             bytesinval;
 1057         int             off;
 1058         uint8_t         cksum;
 1059 };
 1060 
 1061 static int
 1062 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
 1063 {
 1064         uint32_t reg;
 1065         uint8_t byte;
 1066 
 1067         if (vrs->bytesinval == 0) {
 1068                 if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
 1069                         return (ENXIO);
 1070                 vrs->val = le32toh(reg);
 1071                 vrs->off += 4;
 1072                 byte = vrs->val & 0xff;
 1073                 vrs->bytesinval = 3;
 1074         } else {
 1075                 vrs->val = vrs->val >> 8;
 1076                 byte = vrs->val & 0xff;
 1077                 vrs->bytesinval--;
 1078         }
 1079 
 1080         vrs->cksum += byte;
 1081         *data = byte;
 1082         return (0);
 1083 }
 1084 
 1085 static void
 1086 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
 1087 {
 1088         struct vpd_readstate vrs;
 1089         int state;
 1090         int name;
 1091         int remain;
 1092         int i;
 1093         int alloc, off;         /* alloc/off for RO/W arrays */
 1094         int cksumvalid;
 1095         int dflen;
 1096         int firstrecord;
 1097         uint8_t byte;
 1098         uint8_t byte2;
 1099 
 1100         /* init vpd reader */
 1101         vrs.bytesinval = 0;
 1102         vrs.off = 0;
 1103         vrs.pcib = pcib;
 1104         vrs.cfg = cfg;
 1105         vrs.cksum = 0;
 1106 
 1107         state = 0;
 1108         name = remain = i = 0;  /* shut up stupid gcc */
 1109         alloc = off = 0;        /* shut up stupid gcc */
 1110         dflen = 0;              /* shut up stupid gcc */
 1111         cksumvalid = -1;
 1112         firstrecord = 1;
 1113         while (state >= 0) {
 1114                 if (vpd_nextbyte(&vrs, &byte)) {
 1115                         pci_printf(cfg, "VPD read timed out\n");
 1116                         state = -2;
 1117                         break;
 1118                 }
 1119 #if 0
 1120                 pci_printf(cfg, "vpd: val: %#x, off: %d, bytesinval: %d, byte: "
 1121                     "%#hhx, state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
 1122                     vrs.off, vrs.bytesinval, byte, state, remain, name, i);
 1123 #endif
 1124                 switch (state) {
 1125                 case 0:         /* item name */
 1126                         if (byte & 0x80) {
 1127                                 if (vpd_nextbyte(&vrs, &byte2)) {
 1128                                         state = -2;
 1129                                         break;
 1130                                 }
 1131                                 remain = byte2;
 1132                                 if (vpd_nextbyte(&vrs, &byte2)) {
 1133                                         state = -2;
 1134                                         break;
 1135                                 }
 1136                                 remain |= byte2 << 8;
 1137                                 name = byte & 0x7f;
 1138                         } else {
 1139                                 remain = byte & 0x7;
 1140                                 name = (byte >> 3) & 0xf;
 1141                         }
 1142                         if (firstrecord) {
 1143                                 if (name != 0x2) {
 1144                                         pci_printf(cfg, "VPD data does not " \
 1145                                             "start with ident (%#x)\n", name);
 1146                                         state = -2;
 1147                                         break;
 1148                                 }
 1149                                 firstrecord = 0;
 1150                         }
 1151                         if (vrs.off + remain - vrs.bytesinval > 0x8000) {
 1152                                 pci_printf(cfg,
 1153                                     "VPD data overflow, remain %#x\n", remain);
 1154                                 state = -1;
 1155                                 break;
 1156                         }
 1157                         switch (name) {
 1158                         case 0x2:       /* String */
 1159                                 if (cfg->vpd.vpd_ident != NULL) {
 1160                                         pci_printf(cfg,
 1161                                             "duplicate VPD ident record\n");
 1162                                         state = -2;
 1163                                         break;
 1164                                 }
 1165                                 if (remain > 255) {
 1166                                         pci_printf(cfg,
 1167                                             "VPD ident length %d exceeds 255\n",
 1168                                             remain);
 1169                                         state = -2;
 1170                                         break;
 1171                                 }
 1172                                 cfg->vpd.vpd_ident = malloc(remain + 1,
 1173                                     M_DEVBUF, M_WAITOK);
 1174                                 i = 0;
 1175                                 state = 1;
 1176                                 break;
 1177                         case 0xf:       /* End */
 1178                                 state = -1;
 1179                                 break;
 1180                         case 0x10:      /* VPD-R */
 1181                                 alloc = 8;
 1182                                 off = 0;
 1183                                 cfg->vpd.vpd_ros = malloc(alloc *
 1184                                     sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
 1185                                     M_WAITOK | M_ZERO);
 1186                                 state = 2;
 1187                                 break;
 1188                         case 0x11:      /* VPD-W */
 1189                                 alloc = 8;
 1190                                 off = 0;
 1191                                 cfg->vpd.vpd_w = malloc(alloc *
 1192                                     sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
 1193                                     M_WAITOK | M_ZERO);
 1194                                 state = 5;
 1195                                 break;
 1196                         default:        /* Invalid data, abort */
 1197                                 pci_printf(cfg, "invalid VPD name: %#x\n", name);
 1198                                 state = -2;
 1199                                 break;
 1200                         }
 1201                         break;
 1202 
 1203                 case 1: /* Identifier String */
 1204                         cfg->vpd.vpd_ident[i++] = byte;
 1205                         remain--;
 1206                         if (remain == 0)  {
 1207                                 cfg->vpd.vpd_ident[i] = '\0';
 1208                                 state = 0;
 1209                         }
 1210                         break;
 1211 
 1212                 case 2: /* VPD-R Keyword Header */
 1213                         if (off == alloc) {
 1214                                 cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
 1215                                     (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
 1216                                     M_DEVBUF, M_WAITOK | M_ZERO);
 1217                         }
 1218                         cfg->vpd.vpd_ros[off].keyword[0] = byte;
 1219                         if (vpd_nextbyte(&vrs, &byte2)) {
 1220                                 state = -2;
 1221                                 break;
 1222                         }
 1223                         cfg->vpd.vpd_ros[off].keyword[1] = byte2;
 1224                         if (vpd_nextbyte(&vrs, &byte2)) {
 1225                                 state = -2;
 1226                                 break;
 1227                         }
 1228                         cfg->vpd.vpd_ros[off].len = dflen = byte2;
 1229                         if (dflen == 0 &&
 1230                             strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
 1231                             2) == 0) {
 1232                                 /*
 1233                                  * if this happens, we can't trust the rest
 1234                                  * of the VPD.
 1235                                  */
 1236                                 pci_printf(cfg, "invalid VPD RV record");
 1237                                 cksumvalid = 0;
 1238                                 state = -1;
 1239                                 break;
 1240                         } else if (dflen == 0) {
 1241                                 cfg->vpd.vpd_ros[off].value = malloc(1 *
 1242                                     sizeof(*cfg->vpd.vpd_ros[off].value),
 1243                                     M_DEVBUF, M_WAITOK);
 1244                                 cfg->vpd.vpd_ros[off].value[0] = '\x00';
 1245                         } else
 1246                                 cfg->vpd.vpd_ros[off].value = malloc(
 1247                                     (dflen + 1) *
 1248                                     sizeof(*cfg->vpd.vpd_ros[off].value),
 1249                                     M_DEVBUF, M_WAITOK);
 1250                         remain -= 3;
 1251                         i = 0;
 1252                         /* keep in sync w/ state 3's transitions */
 1253                         if (dflen == 0 && remain == 0)
 1254                                 state = 0;
 1255                         else if (dflen == 0)
 1256                                 state = 2;
 1257                         else
 1258                                 state = 3;
 1259                         break;
 1260 
 1261                 case 3: /* VPD-R Keyword Value */
 1262                         cfg->vpd.vpd_ros[off].value[i++] = byte;
 1263                         if (strncmp(cfg->vpd.vpd_ros[off].keyword,
 1264                             "RV", 2) == 0 && cksumvalid == -1) {
 1265                                 if (vrs.cksum == 0)
 1266                                         cksumvalid = 1;
 1267                                 else {
 1268                                         if (bootverbose)
 1269                                                 pci_printf(cfg,
 1270                                             "bad VPD cksum, remain %hhu\n",
 1271                                                     vrs.cksum);
 1272                                         cksumvalid = 0;
 1273                                         state = -1;
 1274                                         break;
 1275                                 }
 1276                         }
 1277                         dflen--;
 1278                         remain--;
 1279                         /* keep in sync w/ state 2's transitions */
 1280                         if (dflen == 0)
 1281                                 cfg->vpd.vpd_ros[off++].value[i++] = '\0';
 1282                         if (dflen == 0 && remain == 0) {
 1283                                 cfg->vpd.vpd_rocnt = off;
 1284                                 cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
 1285                                     off * sizeof(*cfg->vpd.vpd_ros),
 1286                                     M_DEVBUF, M_WAITOK | M_ZERO);
 1287                                 state = 0;
 1288                         } else if (dflen == 0)
 1289                                 state = 2;
 1290                         break;
 1291 
 1292                 case 4:
 1293                         remain--;
 1294                         if (remain == 0)
 1295                                 state = 0;
 1296                         break;
 1297 
 1298                 case 5: /* VPD-W Keyword Header */
 1299                         if (off == alloc) {
 1300                                 cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
 1301                                     (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
 1302                                     M_DEVBUF, M_WAITOK | M_ZERO);
 1303                         }
 1304                         cfg->vpd.vpd_w[off].keyword[0] = byte;
 1305                         if (vpd_nextbyte(&vrs, &byte2)) {
 1306                                 state = -2;
 1307                                 break;
 1308                         }
 1309                         cfg->vpd.vpd_w[off].keyword[1] = byte2;
 1310                         if (vpd_nextbyte(&vrs, &byte2)) {
 1311                                 state = -2;
 1312                                 break;
 1313                         }
 1314                         cfg->vpd.vpd_w[off].len = dflen = byte2;
 1315                         cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
 1316                         cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
 1317                             sizeof(*cfg->vpd.vpd_w[off].value),
 1318                             M_DEVBUF, M_WAITOK);
 1319                         remain -= 3;
 1320                         i = 0;
 1321                         /* keep in sync w/ state 6's transitions */
 1322                         if (dflen == 0 && remain == 0)
 1323                                 state = 0;
 1324                         else if (dflen == 0)
 1325                                 state = 5;
 1326                         else
 1327                                 state = 6;
 1328                         break;
 1329 
 1330                 case 6: /* VPD-W Keyword Value */
 1331                         cfg->vpd.vpd_w[off].value[i++] = byte;
 1332                         dflen--;
 1333                         remain--;
 1334                         /* keep in sync w/ state 5's transitions */
 1335                         if (dflen == 0)
 1336                                 cfg->vpd.vpd_w[off++].value[i++] = '\0';
 1337                         if (dflen == 0 && remain == 0) {
 1338                                 cfg->vpd.vpd_wcnt = off;
 1339                                 cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
 1340                                     off * sizeof(*cfg->vpd.vpd_w),
 1341                                     M_DEVBUF, M_WAITOK | M_ZERO);
 1342                                 state = 0;
 1343                         } else if (dflen == 0)
 1344                                 state = 5;
 1345                         break;
 1346 
 1347                 default:
 1348                         pci_printf(cfg, "invalid state: %d\n", state);
 1349                         state = -1;
 1350                         break;
 1351                 }
 1352 
 1353                 if (cfg->vpd.vpd_ident == NULL || cfg->vpd.vpd_ident[0] == '\0') {
 1354                         pci_printf(cfg, "no valid vpd ident found\n");
 1355                         state = -2;
 1356                 }
 1357         }
 1358 
 1359         if (cksumvalid <= 0 || state < -1) {
 1360                 /* read-only data bad, clean up */
 1361                 if (cfg->vpd.vpd_ros != NULL) {
 1362                         for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
 1363                                 free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
 1364                         free(cfg->vpd.vpd_ros, M_DEVBUF);
 1365                         cfg->vpd.vpd_ros = NULL;
 1366                 }
 1367         }
 1368         if (state < -1) {
 1369                 /* I/O error, clean up */
 1370                 pci_printf(cfg, "failed to read VPD data.\n");
 1371                 if (cfg->vpd.vpd_ident != NULL) {
 1372                         free(cfg->vpd.vpd_ident, M_DEVBUF);
 1373                         cfg->vpd.vpd_ident = NULL;
 1374                 }
 1375                 if (cfg->vpd.vpd_w != NULL) {
 1376                         for (off = 0; cfg->vpd.vpd_w[off].value; off++)
 1377                                 free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
 1378                         free(cfg->vpd.vpd_w, M_DEVBUF);
 1379                         cfg->vpd.vpd_w = NULL;
 1380                 }
 1381         }
 1382         cfg->vpd.vpd_cached = 1;
 1383 #undef REG
 1384 #undef WREG
 1385 }
 1386 
 1387 int
 1388 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
 1389 {
 1390         struct pci_devinfo *dinfo = device_get_ivars(child);
 1391         pcicfgregs *cfg = &dinfo->cfg;
 1392 
 1393         if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
 1394                 pci_read_vpd(device_get_parent(dev), cfg);
 1395 
 1396         *identptr = cfg->vpd.vpd_ident;
 1397 
 1398         if (*identptr == NULL)
 1399                 return (ENXIO);
 1400 
 1401         return (0);
 1402 }
 1403 
 1404 int
 1405 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
 1406         const char **vptr)
 1407 {
 1408         struct pci_devinfo *dinfo = device_get_ivars(child);
 1409         pcicfgregs *cfg = &dinfo->cfg;
 1410         int i;
 1411 
 1412         if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
 1413                 pci_read_vpd(device_get_parent(dev), cfg);
 1414 
 1415         for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
 1416                 if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
 1417                     sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
 1418                         *vptr = cfg->vpd.vpd_ros[i].value;
 1419                         return (0);
 1420                 }
 1421 
 1422         *vptr = NULL;
 1423         return (ENXIO);
 1424 }
 1425 
 1426 struct pcicfg_vpd *
 1427 pci_fetch_vpd_list(device_t dev)
 1428 {
 1429         struct pci_devinfo *dinfo = device_get_ivars(dev);
 1430         pcicfgregs *cfg = &dinfo->cfg;
 1431 
 1432         if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
 1433                 pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
 1434         return (&cfg->vpd);
 1435 }
 1436 
 1437 /*
 1438  * Find the requested HyperTransport capability and return the offset
 1439  * in configuration space via the pointer provided.  The function
 1440  * returns 0 on success and an error code otherwise.
 1441  */
 1442 int
 1443 pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
 1444 {
 1445         int ptr, error;
 1446         uint16_t val;
 1447 
 1448         error = pci_find_cap(child, PCIY_HT, &ptr);
 1449         if (error)
 1450                 return (error);
 1451 
 1452         /*
 1453          * Traverse the capabilities list checking each HT capability
 1454          * to see if it matches the requested HT capability.
 1455          */
 1456         for (;;) {
 1457                 val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
 1458                 if (capability == PCIM_HTCAP_SLAVE ||
 1459                     capability == PCIM_HTCAP_HOST)
 1460                         val &= 0xe000;
 1461                 else
 1462                         val &= PCIM_HTCMD_CAP_MASK;
 1463                 if (val == capability) {
 1464                         if (capreg != NULL)
 1465                                 *capreg = ptr;
 1466                         return (0);
 1467                 }
 1468 
 1469                 /* Skip to the next HT capability. */
 1470                 if (pci_find_next_cap(child, PCIY_HT, ptr, &ptr) != 0)
 1471                         break;
 1472         }
 1473 
 1474         return (ENOENT);
 1475 }
 1476 
 1477 /*
 1478  * Find the next requested HyperTransport capability after start and return
 1479  * the offset in configuration space via the pointer provided.  The function
 1480  * returns 0 on success and an error code otherwise.
 1481  */
 1482 int
 1483 pci_find_next_htcap_method(device_t dev, device_t child, int capability,
 1484     int start, int *capreg)
 1485 {
 1486         int ptr;
 1487         uint16_t val;
 1488 
 1489         KASSERT(pci_read_config(child, start + PCICAP_ID, 1) == PCIY_HT,
 1490             ("start capability is not HyperTransport capability"));
 1491         ptr = start;
 1492 
 1493         /*
 1494          * Traverse the capabilities list checking each HT capability
 1495          * to see if it matches the requested HT capability.
 1496          */
 1497         for (;;) {
 1498                 /* Skip to the next HT capability. */
 1499                 if (pci_find_next_cap(child, PCIY_HT, ptr, &ptr) != 0)
 1500                         break;
 1501 
 1502                 val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
 1503                 if (capability == PCIM_HTCAP_SLAVE ||
 1504                     capability == PCIM_HTCAP_HOST)
 1505                         val &= 0xe000;
 1506                 else
 1507                         val &= PCIM_HTCMD_CAP_MASK;
 1508                 if (val == capability) {
 1509                         if (capreg != NULL)
 1510                                 *capreg = ptr;
 1511                         return (0);
 1512                 }
 1513         }
 1514 
 1515         return (ENOENT);
 1516 }
 1517 
 1518 /*
 1519  * Find the requested capability and return the offset in
 1520  * configuration space via the pointer provided.  The function returns
 1521  * 0 on success and an error code otherwise.
 1522  */
 1523 int
 1524 pci_find_cap_method(device_t dev, device_t child, int capability,
 1525     int *capreg)
 1526 {
 1527         struct pci_devinfo *dinfo = device_get_ivars(child);
 1528         pcicfgregs *cfg = &dinfo->cfg;
 1529         uint32_t status;
 1530         uint8_t ptr;
 1531 
 1532         /*
 1533          * Check the CAP_LIST bit of the PCI status register first.
 1534          */
 1535         status = pci_read_config(child, PCIR_STATUS, 2);
 1536         if (!(status & PCIM_STATUS_CAPPRESENT))
 1537                 return (ENXIO);
 1538 
 1539         /*
 1540          * Determine the start pointer of the capabilities list.
 1541          */
 1542         switch (cfg->hdrtype & PCIM_HDRTYPE) {
 1543         case PCIM_HDRTYPE_NORMAL:
 1544         case PCIM_HDRTYPE_BRIDGE:
 1545                 ptr = PCIR_CAP_PTR;
 1546                 break;
 1547         case PCIM_HDRTYPE_CARDBUS:
 1548                 ptr = PCIR_CAP_PTR_2;
 1549                 break;
 1550         default:
 1551                 /* XXX: panic? */
 1552                 return (ENXIO);         /* no extended capabilities support */
 1553         }
 1554         ptr = pci_read_config(child, ptr, 1);
 1555 
 1556         /*
 1557          * Traverse the capabilities list.
 1558          */
 1559         while (ptr != 0) {
 1560                 if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
 1561                         if (capreg != NULL)
 1562                                 *capreg = ptr;
 1563                         return (0);
 1564                 }
 1565                 ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
 1566         }
 1567 
 1568         return (ENOENT);
 1569 }
 1570 
 1571 /*
 1572  * Find the next requested capability after start and return the offset in
 1573  * configuration space via the pointer provided.  The function returns
 1574  * 0 on success and an error code otherwise.
 1575  */
 1576 int
 1577 pci_find_next_cap_method(device_t dev, device_t child, int capability,
 1578     int start, int *capreg)
 1579 {
 1580         uint8_t ptr;
 1581 
 1582         KASSERT(pci_read_config(child, start + PCICAP_ID, 1) == capability,
 1583             ("start capability is not expected capability"));
 1584 
 1585         ptr = pci_read_config(child, start + PCICAP_NEXTPTR, 1);
 1586         while (ptr != 0) {
 1587                 if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
 1588                         if (capreg != NULL)
 1589                                 *capreg = ptr;
 1590                         return (0);
 1591                 }
 1592                 ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
 1593         }
 1594 
 1595         return (ENOENT);
 1596 }
 1597 
 1598 /*
 1599  * Find the requested extended capability and return the offset in
 1600  * configuration space via the pointer provided.  The function returns
 1601  * 0 on success and an error code otherwise.
 1602  */
 1603 int
 1604 pci_find_extcap_method(device_t dev, device_t child, int capability,
 1605     int *capreg)
 1606 {
 1607         struct pci_devinfo *dinfo = device_get_ivars(child);
 1608         pcicfgregs *cfg = &dinfo->cfg;
 1609         uint32_t ecap;
 1610         uint16_t ptr;
 1611 
 1612         /* Only supported for PCI-express devices. */
 1613         if (cfg->pcie.pcie_location == 0)
 1614                 return (ENXIO);
 1615 
 1616         ptr = PCIR_EXTCAP;
 1617         ecap = pci_read_config(child, ptr, 4);
 1618         if (ecap == 0xffffffff || ecap == 0)
 1619                 return (ENOENT);
 1620         for (;;) {
 1621                 if (PCI_EXTCAP_ID(ecap) == capability) {
 1622                         if (capreg != NULL)
 1623                                 *capreg = ptr;
 1624                         return (0);
 1625                 }
 1626                 ptr = PCI_EXTCAP_NEXTPTR(ecap);
 1627                 if (ptr == 0)
 1628                         break;
 1629                 ecap = pci_read_config(child, ptr, 4);
 1630         }
 1631 
 1632         return (ENOENT);
 1633 }
 1634 
 1635 /*
 1636  * Find the next requested extended capability after start and return the
 1637  * offset in configuration space via the pointer provided.  The function
 1638  * returns 0 on success and an error code otherwise.
 1639  */
 1640 int
 1641 pci_find_next_extcap_method(device_t dev, device_t child, int capability,
 1642     int start, int *capreg)
 1643 {
 1644         struct pci_devinfo *dinfo = device_get_ivars(child);
 1645         pcicfgregs *cfg = &dinfo->cfg;
 1646         uint32_t ecap;
 1647         uint16_t ptr;
 1648 
 1649         /* Only supported for PCI-express devices. */
 1650         if (cfg->pcie.pcie_location == 0)
 1651                 return (ENXIO);
 1652 
 1653         ecap = pci_read_config(child, start, 4);
 1654         KASSERT(PCI_EXTCAP_ID(ecap) == capability,
 1655             ("start extended capability is not expected capability"));
 1656         ptr = PCI_EXTCAP_NEXTPTR(ecap);
 1657         while (ptr != 0) {
 1658                 ecap = pci_read_config(child, ptr, 4);
 1659                 if (PCI_EXTCAP_ID(ecap) == capability) {
 1660                         if (capreg != NULL)
 1661                                 *capreg = ptr;
 1662                         return (0);
 1663                 }
 1664                 ptr = PCI_EXTCAP_NEXTPTR(ecap);
 1665         }
 1666 
 1667         return (ENOENT);
 1668 }
 1669 
 1670 /*
 1671  * Support for MSI-X message interrupts.
 1672  */
 1673 static void
 1674 pci_write_msix_entry(device_t dev, u_int index, uint64_t address, uint32_t data)
 1675 {
 1676         struct pci_devinfo *dinfo = device_get_ivars(dev);
 1677         struct pcicfg_msix *msix = &dinfo->cfg.msix;
 1678         uint32_t offset;
 1679 
 1680         KASSERT(msix->msix_table_len > index, ("bogus index"));
 1681         offset = msix->msix_table_offset + index * 16;
 1682         bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
 1683         bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
 1684         bus_write_4(msix->msix_table_res, offset + 8, data);
 1685 }
 1686 
 1687 void
 1688 pci_enable_msix_method(device_t dev, device_t child, u_int index,
 1689     uint64_t address, uint32_t data)
 1690 {
 1691 
 1692         if (pci_msix_rewrite_table) {
 1693                 struct pci_devinfo *dinfo = device_get_ivars(child);
 1694                 struct pcicfg_msix *msix = &dinfo->cfg.msix;
 1695 
 1696                 /*
 1697                  * Some VM hosts require MSIX to be disabled in the
 1698                  * control register before updating the MSIX table
 1699                  * entries are allowed. It is not enough to only
 1700                  * disable MSIX while updating a single entry. MSIX
 1701                  * must be disabled while updating all entries in the
 1702                  * table.
 1703                  */
 1704                 pci_write_config(child,
 1705                     msix->msix_location + PCIR_MSIX_CTRL,
 1706                     msix->msix_ctrl & ~PCIM_MSIXCTRL_MSIX_ENABLE, 2);
 1707                 pci_resume_msix(child);
 1708         } else
 1709                 pci_write_msix_entry(child, index, address, data);
 1710 
 1711         /* Enable MSI -> HT mapping. */
 1712         pci_ht_map_msi(child, address);
 1713 }
 1714 
 1715 void
 1716 pci_mask_msix(device_t dev, u_int index)
 1717 {
 1718         struct pci_devinfo *dinfo = device_get_ivars(dev);
 1719         struct pcicfg_msix *msix = &dinfo->cfg.msix;
 1720         uint32_t offset, val;
 1721 
 1722         KASSERT(msix->msix_msgnum > index, ("bogus index"));
 1723         offset = msix->msix_table_offset + index * 16 + 12;
 1724         val = bus_read_4(msix->msix_table_res, offset);
 1725         val |= PCIM_MSIX_VCTRL_MASK;
 1726 
 1727         /*
 1728          * Some devices (e.g. Samsung PM961) do not support reads of this
 1729          * register, so always write the new value.
 1730          */
 1731         bus_write_4(msix->msix_table_res, offset, val);
 1732 }
 1733 
 1734 void
 1735 pci_unmask_msix(device_t dev, u_int index)
 1736 {
 1737         struct pci_devinfo *dinfo = device_get_ivars(dev);
 1738         struct pcicfg_msix *msix = &dinfo->cfg.msix;
 1739         uint32_t offset, val;
 1740 
 1741         KASSERT(msix->msix_table_len > index, ("bogus index"));
 1742         offset = msix->msix_table_offset + index * 16 + 12;
 1743         val = bus_read_4(msix->msix_table_res, offset);
 1744         val &= ~PCIM_MSIX_VCTRL_MASK;
 1745 
 1746         /*
 1747          * Some devices (e.g. Samsung PM961) do not support reads of this
 1748          * register, so always write the new value.
 1749          */
 1750         bus_write_4(msix->msix_table_res, offset, val);
 1751 }
 1752 
 1753 int
 1754 pci_pending_msix(device_t dev, u_int index)
 1755 {
 1756         struct pci_devinfo *dinfo = device_get_ivars(dev);
 1757         struct pcicfg_msix *msix = &dinfo->cfg.msix;
 1758         uint32_t offset, bit;
 1759 
 1760         KASSERT(msix->msix_table_len > index, ("bogus index"));
 1761         offset = msix->msix_pba_offset + (index / 32) * 4;
 1762         bit = 1 << index % 32;
 1763         return (bus_read_4(msix->msix_pba_res, offset) & bit);
 1764 }
 1765 
 1766 /*
 1767  * Restore MSI-X registers and table during resume.  If MSI-X is
 1768  * enabled then walk the virtual table to restore the actual MSI-X
 1769  * table.
 1770  */
 1771 static void
 1772 pci_resume_msix(device_t dev)
 1773 {
 1774         struct pci_devinfo *dinfo = device_get_ivars(dev);
 1775         struct pcicfg_msix *msix = &dinfo->cfg.msix;
 1776         struct msix_table_entry *mte;
 1777         struct msix_vector *mv;
 1778         int i;
 1779 
 1780         if (msix->msix_alloc > 0) {
 1781                 /* First, mask all vectors. */
 1782                 for (i = 0; i < msix->msix_msgnum; i++)
 1783                         pci_mask_msix(dev, i);
 1784 
 1785                 /* Second, program any messages with at least one handler. */
 1786                 for (i = 0; i < msix->msix_table_len; i++) {
 1787                         mte = &msix->msix_table[i];
 1788                         if (mte->mte_vector == 0 || mte->mte_handlers == 0)
 1789                                 continue;
 1790                         mv = &msix->msix_vectors[mte->mte_vector - 1];
 1791                         pci_write_msix_entry(dev, i, mv->mv_address,
 1792                             mv->mv_data);
 1793                         pci_unmask_msix(dev, i);
 1794                 }
 1795         }
 1796         pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
 1797             msix->msix_ctrl, 2);
 1798 }
 1799 
 1800 /*
 1801  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
 1802  * returned in *count.  After this function returns, each message will be
 1803  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
 1804  */
 1805 int
 1806 pci_alloc_msix_method(device_t dev, device_t child, int *count)
 1807 {
 1808         struct pci_devinfo *dinfo = device_get_ivars(child);
 1809         pcicfgregs *cfg = &dinfo->cfg;
 1810         struct resource_list_entry *rle;
 1811         int actual, error, i, irq, max;
 1812 
 1813         /* Don't let count == 0 get us into trouble. */
 1814         if (*count == 0)
 1815                 return (EINVAL);
 1816 
 1817         /* If rid 0 is allocated, then fail. */
 1818         rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
 1819         if (rle != NULL && rle->res != NULL)
 1820                 return (ENXIO);
 1821 
 1822         /* Already have allocated messages? */
 1823         if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
 1824                 return (ENXIO);
 1825 
 1826         /* If MSI-X is blacklisted for this system, fail. */
 1827         if (pci_msix_blacklisted())
 1828                 return (ENXIO);
 1829 
 1830         /* MSI-X capability present? */
 1831         if (cfg->msix.msix_location == 0 || !pci_do_msix)
 1832                 return (ENODEV);
 1833 
 1834         /* Make sure the appropriate BARs are mapped. */
 1835         rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
 1836             cfg->msix.msix_table_bar);
 1837         if (rle == NULL || rle->res == NULL ||
 1838             !(rman_get_flags(rle->res) & RF_ACTIVE))
 1839                 return (ENXIO);
 1840         cfg->msix.msix_table_res = rle->res;
 1841         if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
 1842                 rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
 1843                     cfg->msix.msix_pba_bar);
 1844                 if (rle == NULL || rle->res == NULL ||
 1845                     !(rman_get_flags(rle->res) & RF_ACTIVE))
 1846                         return (ENXIO);
 1847         }
 1848         cfg->msix.msix_pba_res = rle->res;
 1849 
 1850         if (bootverbose)
 1851                 device_printf(child,
 1852                     "attempting to allocate %d MSI-X vectors (%d supported)\n",
 1853                     *count, cfg->msix.msix_msgnum);
 1854         max = min(*count, cfg->msix.msix_msgnum);
 1855         for (i = 0; i < max; i++) {
 1856                 /* Allocate a message. */
 1857                 error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
 1858                 if (error) {
 1859                         if (i == 0)
 1860                                 return (error);
 1861                         break;
 1862                 }
 1863                 resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
 1864                     irq, 1);
 1865         }
 1866         actual = i;
 1867 
 1868         if (bootverbose) {
 1869                 rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
 1870                 if (actual == 1)
 1871                         device_printf(child, "using IRQ %ju for MSI-X\n",
 1872                             rle->start);
 1873                 else {
 1874                         int run;
 1875 
 1876                         /*
 1877                          * Be fancy and try to print contiguous runs of
 1878                          * IRQ values as ranges.  'irq' is the previous IRQ.
 1879                          * 'run' is true if we are in a range.
 1880                          */
 1881                         device_printf(child, "using IRQs %ju", rle->start);
 1882                         irq = rle->start;
 1883                         run = 0;
 1884                         for (i = 1; i < actual; i++) {
 1885                                 rle = resource_list_find(&dinfo->resources,
 1886                                     SYS_RES_IRQ, i + 1);
 1887 
 1888                                 /* Still in a run? */
 1889                                 if (rle->start == irq + 1) {
 1890                                         run = 1;
 1891                                         irq++;
 1892                                         continue;
 1893                                 }
 1894 
 1895                                 /* Finish previous range. */
 1896                                 if (run) {
 1897                                         printf("-%d", irq);
 1898                                         run = 0;
 1899                                 }
 1900 
 1901                                 /* Start new range. */
 1902                                 printf(",%ju", rle->start);
 1903                                 irq = rle->start;
 1904                         }
 1905 
 1906                         /* Unfinished range? */
 1907                         if (run)
 1908                                 printf("-%d", irq);
 1909                         printf(" for MSI-X\n");
 1910                 }
 1911         }
 1912 
 1913         /* Mask all vectors. */
 1914         for (i = 0; i < cfg->msix.msix_msgnum; i++)
 1915                 pci_mask_msix(child, i);
 1916 
 1917         /* Allocate and initialize vector data and virtual table. */
 1918         cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
 1919             M_DEVBUF, M_WAITOK | M_ZERO);
 1920         cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
 1921             M_DEVBUF, M_WAITOK | M_ZERO);
 1922         for (i = 0; i < actual; i++) {
 1923                 rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
 1924                 cfg->msix.msix_vectors[i].mv_irq = rle->start;
 1925                 cfg->msix.msix_table[i].mte_vector = i + 1;
 1926         }
 1927 
 1928         /* Update control register to enable MSI-X. */
 1929         cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
 1930         pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
 1931             cfg->msix.msix_ctrl, 2);
 1932 
 1933         /* Update counts of alloc'd messages. */
 1934         cfg->msix.msix_alloc = actual;
 1935         cfg->msix.msix_table_len = actual;
 1936         *count = actual;
 1937         return (0);
 1938 }
 1939 
 1940 /*
 1941  * By default, pci_alloc_msix() will assign the allocated IRQ
 1942  * resources consecutively to the first N messages in the MSI-X table.
 1943  * However, device drivers may want to use different layouts if they
 1944  * either receive fewer messages than they asked for, or they wish to
 1945  * populate the MSI-X table sparsely.  This method allows the driver
 1946  * to specify what layout it wants.  It must be called after a
 1947  * successful pci_alloc_msix() but before any of the associated
 1948  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
 1949  *
 1950  * The 'vectors' array contains 'count' message vectors.  The array
 1951  * maps directly to the MSI-X table in that index 0 in the array
 1952  * specifies the vector for the first message in the MSI-X table, etc.
 1953  * The vector value in each array index can either be 0 to indicate
 1954  * that no vector should be assigned to a message slot, or it can be a
 1955  * number from 1 to N (where N is the count returned from a
 1956  * succcessful call to pci_alloc_msix()) to indicate which message
 1957  * vector (IRQ) to be used for the corresponding message.
 1958  *
 1959  * On successful return, each message with a non-zero vector will have
 1960  * an associated SYS_RES_IRQ whose rid is equal to the array index +
 1961  * 1.  Additionally, if any of the IRQs allocated via the previous
 1962  * call to pci_alloc_msix() are not used in the mapping, those IRQs
 1963  * will be freed back to the system automatically.
 1964  *
 1965  * For example, suppose a driver has a MSI-X table with 6 messages and
 1966  * asks for 6 messages, but pci_alloc_msix() only returns a count of
 1967  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
 1968  * C.  After the call to pci_alloc_msix(), the device will be setup to
 1969  * have an MSI-X table of ABC--- (where - means no vector assigned).
 1970  * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
 1971  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
 1972  * be freed back to the system.  This device will also have valid
 1973  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
 1974  *
 1975  * In any case, the SYS_RES_IRQ rid X will always map to the message
 1976  * at MSI-X table index X - 1 and will only be valid if a vector is
 1977  * assigned to that table entry.
 1978  */
 1979 int
 1980 pci_remap_msix_method(device_t dev, device_t child, int count,
 1981     const u_int *vectors)
 1982 {
 1983         struct pci_devinfo *dinfo = device_get_ivars(child);
 1984         struct pcicfg_msix *msix = &dinfo->cfg.msix;
 1985         struct resource_list_entry *rle;
 1986         int i, irq, j, *used;
 1987 
 1988         /*
 1989          * Have to have at least one message in the table but the
 1990          * table can't be bigger than the actual MSI-X table in the
 1991          * device.
 1992          */
 1993         if (count == 0 || count > msix->msix_msgnum)
 1994                 return (EINVAL);
 1995 
 1996         /* Sanity check the vectors. */
 1997         for (i = 0; i < count; i++)
 1998                 if (vectors[i] > msix->msix_alloc)
 1999                         return (EINVAL);
 2000 
 2001         /*
 2002          * Make sure there aren't any holes in the vectors to be used.
 2003          * It's a big pain to support it, and it doesn't really make
 2004          * sense anyway.  Also, at least one vector must be used.
 2005          */
 2006         used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
 2007             M_ZERO);
 2008         for (i = 0; i < count; i++)
 2009                 if (vectors[i] != 0)
 2010                         used[vectors[i] - 1] = 1;
 2011         for (i = 0; i < msix->msix_alloc - 1; i++)
 2012                 if (used[i] == 0 && used[i + 1] == 1) {
 2013                         free(used, M_DEVBUF);
 2014                         return (EINVAL);
 2015                 }
 2016         if (used[0] != 1) {
 2017                 free(used, M_DEVBUF);
 2018                 return (EINVAL);
 2019         }
 2020 
 2021         /* Make sure none of the resources are allocated. */
 2022         for (i = 0; i < msix->msix_table_len; i++) {
 2023                 if (msix->msix_table[i].mte_vector == 0)
 2024                         continue;
 2025                 if (msix->msix_table[i].mte_handlers > 0) {
 2026                         free(used, M_DEVBUF);
 2027                         return (EBUSY);
 2028                 }
 2029                 rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
 2030                 KASSERT(rle != NULL, ("missing resource"));
 2031                 if (rle->res != NULL) {
 2032                         free(used, M_DEVBUF);
 2033                         return (EBUSY);
 2034                 }
 2035         }
 2036 
 2037         /* Free the existing resource list entries. */
 2038         for (i = 0; i < msix->msix_table_len; i++) {
 2039                 if (msix->msix_table[i].mte_vector == 0)
 2040                         continue;
 2041                 resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
 2042         }
 2043 
 2044         /*
 2045          * Build the new virtual table keeping track of which vectors are
 2046          * used.
 2047          */
 2048         free(msix->msix_table, M_DEVBUF);
 2049         msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
 2050             M_DEVBUF, M_WAITOK | M_ZERO);
 2051         for (i = 0; i < count; i++)
 2052                 msix->msix_table[i].mte_vector = vectors[i];
 2053         msix->msix_table_len = count;
 2054 
 2055         /* Free any unused IRQs and resize the vectors array if necessary. */
 2056         j = msix->msix_alloc - 1;
 2057         if (used[j] == 0) {
 2058                 struct msix_vector *vec;
 2059 
 2060                 while (used[j] == 0) {
 2061                         PCIB_RELEASE_MSIX(device_get_parent(dev), child,
 2062                             msix->msix_vectors[j].mv_irq);
 2063                         j--;
 2064                 }
 2065                 vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
 2066                     M_WAITOK);
 2067                 bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
 2068                     (j + 1));
 2069                 free(msix->msix_vectors, M_DEVBUF);
 2070                 msix->msix_vectors = vec;
 2071                 msix->msix_alloc = j + 1;
 2072         }
 2073         free(used, M_DEVBUF);
 2074 
 2075         /* Map the IRQs onto the rids. */
 2076         for (i = 0; i < count; i++) {
 2077                 if (vectors[i] == 0)
 2078                         continue;
 2079                 irq = msix->msix_vectors[vectors[i] - 1].mv_irq;
 2080                 resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
 2081                     irq, 1);
 2082         }
 2083 
 2084         if (bootverbose) {
 2085                 device_printf(child, "Remapped MSI-X IRQs as: ");
 2086                 for (i = 0; i < count; i++) {
 2087                         if (i != 0)
 2088                                 printf(", ");
 2089                         if (vectors[i] == 0)
 2090                                 printf("---");
 2091                         else
 2092                                 printf("%d",
 2093                                     msix->msix_vectors[vectors[i] - 1].mv_irq);
 2094                 }
 2095                 printf("\n");
 2096         }
 2097 
 2098         return (0);
 2099 }
 2100 
 2101 static int
 2102 pci_release_msix(device_t dev, device_t child)
 2103 {
 2104         struct pci_devinfo *dinfo = device_get_ivars(child);
 2105         struct pcicfg_msix *msix = &dinfo->cfg.msix;
 2106         struct resource_list_entry *rle;
 2107         int i;
 2108 
 2109         /* Do we have any messages to release? */
 2110         if (msix->msix_alloc == 0)
 2111                 return (ENODEV);
 2112 
 2113         /* Make sure none of the resources are allocated. */
 2114         for (i = 0; i < msix->msix_table_len; i++) {
 2115                 if (msix->msix_table[i].mte_vector == 0)
 2116                         continue;
 2117                 if (msix->msix_table[i].mte_handlers > 0)
 2118                         return (EBUSY);
 2119                 rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
 2120                 KASSERT(rle != NULL, ("missing resource"));
 2121                 if (rle->res != NULL)
 2122                         return (EBUSY);
 2123         }
 2124 
 2125         /* Update control register to disable MSI-X. */
 2126         msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
 2127         pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
 2128             msix->msix_ctrl, 2);
 2129 
 2130         /* Free the resource list entries. */
 2131         for (i = 0; i < msix->msix_table_len; i++) {
 2132                 if (msix->msix_table[i].mte_vector == 0)
 2133                         continue;
 2134                 resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
 2135         }
 2136         free(msix->msix_table, M_DEVBUF);
 2137         msix->msix_table_len = 0;
 2138 
 2139         /* Release the IRQs. */
 2140         for (i = 0; i < msix->msix_alloc; i++)
 2141                 PCIB_RELEASE_MSIX(device_get_parent(dev), child,
 2142                     msix->msix_vectors[i].mv_irq);
 2143         free(msix->msix_vectors, M_DEVBUF);
 2144         msix->msix_alloc = 0;
 2145         return (0);
 2146 }
 2147 
 2148 /*
 2149  * Return the max supported MSI-X messages this device supports.
 2150  * Basically, assuming the MD code can alloc messages, this function
 2151  * should return the maximum value that pci_alloc_msix() can return.
 2152  * Thus, it is subject to the tunables, etc.
 2153  */
 2154 int
 2155 pci_msix_count_method(device_t dev, device_t child)
 2156 {
 2157         struct pci_devinfo *dinfo = device_get_ivars(child);
 2158         struct pcicfg_msix *msix = &dinfo->cfg.msix;
 2159 
 2160         if (pci_do_msix && msix->msix_location != 0)
 2161                 return (msix->msix_msgnum);
 2162         return (0);
 2163 }
 2164 
 2165 int
 2166 pci_msix_pba_bar_method(device_t dev, device_t child)
 2167 {
 2168         struct pci_devinfo *dinfo = device_get_ivars(child);
 2169         struct pcicfg_msix *msix = &dinfo->cfg.msix;
 2170 
 2171         if (pci_do_msix && msix->msix_location != 0)
 2172                 return (msix->msix_pba_bar);
 2173         return (-1);
 2174 }
 2175 
 2176 int
 2177 pci_msix_table_bar_method(device_t dev, device_t child)
 2178 {
 2179         struct pci_devinfo *dinfo = device_get_ivars(child);
 2180         struct pcicfg_msix *msix = &dinfo->cfg.msix;
 2181 
 2182         if (pci_do_msix && msix->msix_location != 0)
 2183                 return (msix->msix_table_bar);
 2184         return (-1);
 2185 }
 2186 
 2187 /*
 2188  * HyperTransport MSI mapping control
 2189  */
 2190 void
 2191 pci_ht_map_msi(device_t dev, uint64_t addr)
 2192 {
 2193         struct pci_devinfo *dinfo = device_get_ivars(dev);
 2194         struct pcicfg_ht *ht = &dinfo->cfg.ht;
 2195 
 2196         if (!ht->ht_msimap)
 2197                 return;
 2198 
 2199         if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
 2200             ht->ht_msiaddr >> 20 == addr >> 20) {
 2201                 /* Enable MSI -> HT mapping. */
 2202                 ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
 2203                 pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
 2204                     ht->ht_msictrl, 2);
 2205         }
 2206 
 2207         if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
 2208                 /* Disable MSI -> HT mapping. */
 2209                 ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
 2210                 pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
 2211                     ht->ht_msictrl, 2);
 2212         }
 2213 }
 2214 
 2215 int
 2216 pci_get_relaxed_ordering_enabled(device_t dev)
 2217 {
 2218         struct pci_devinfo *dinfo = device_get_ivars(dev);
 2219         int cap;
 2220         uint16_t val;
 2221 
 2222         cap = dinfo->cfg.pcie.pcie_location;
 2223         if (cap == 0)
 2224                 return (0);
 2225         val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
 2226         val &= PCIEM_CTL_RELAXED_ORD_ENABLE;
 2227         return (val != 0);
 2228 }
 2229 
 2230 int
 2231 pci_get_max_payload(device_t dev)
 2232 {
 2233         struct pci_devinfo *dinfo = device_get_ivars(dev);
 2234         int cap;
 2235         uint16_t val;
 2236 
 2237         cap = dinfo->cfg.pcie.pcie_location;
 2238         if (cap == 0)
 2239                 return (0);
 2240         val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
 2241         val &= PCIEM_CTL_MAX_PAYLOAD;
 2242         val >>= 5;
 2243         return (1 << (val + 7));
 2244 }
 2245 
 2246 int
 2247 pci_get_max_read_req(device_t dev)
 2248 {
 2249         struct pci_devinfo *dinfo = device_get_ivars(dev);
 2250         int cap;
 2251         uint16_t val;
 2252 
 2253         cap = dinfo->cfg.pcie.pcie_location;
 2254         if (cap == 0)
 2255                 return (0);
 2256         val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
 2257         val &= PCIEM_CTL_MAX_READ_REQUEST;
 2258         val >>= 12;
 2259         return (1 << (val + 7));
 2260 }
 2261 
 2262 int
 2263 pci_set_max_read_req(device_t dev, int size)
 2264 {
 2265         struct pci_devinfo *dinfo = device_get_ivars(dev);
 2266         int cap;
 2267         uint16_t val;
 2268 
 2269         cap = dinfo->cfg.pcie.pcie_location;
 2270         if (cap == 0)
 2271                 return (0);
 2272         if (size < 128)
 2273                 size = 128;
 2274         if (size > 4096)
 2275                 size = 4096;
 2276         size = (1 << (fls(size) - 1));
 2277         val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
 2278         val &= ~PCIEM_CTL_MAX_READ_REQUEST;
 2279         val |= (fls(size) - 8) << 12;
 2280         pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
 2281         return (size);
 2282 }
 2283 
 2284 uint32_t
 2285 pcie_read_config(device_t dev, int reg, int width)
 2286 {
 2287         struct pci_devinfo *dinfo = device_get_ivars(dev);
 2288         int cap;
 2289 
 2290         cap = dinfo->cfg.pcie.pcie_location;
 2291         if (cap == 0) {
 2292                 if (width == 2)
 2293                         return (0xffff);
 2294                 return (0xffffffff);
 2295         }
 2296 
 2297         return (pci_read_config(dev, cap + reg, width));
 2298 }
 2299 
 2300 void
 2301 pcie_write_config(device_t dev, int reg, uint32_t value, int width)
 2302 {
 2303         struct pci_devinfo *dinfo = device_get_ivars(dev);
 2304         int cap;
 2305 
 2306         cap = dinfo->cfg.pcie.pcie_location;
 2307         if (cap == 0)
 2308                 return;
 2309         pci_write_config(dev, cap + reg, value, width);
 2310 }
 2311 
 2312 /*
 2313  * Adjusts a PCI-e capability register by clearing the bits in mask
 2314  * and setting the bits in (value & mask).  Bits not set in mask are
 2315  * not adjusted.
 2316  *
 2317  * Returns the old value on success or all ones on failure.
 2318  */
 2319 uint32_t
 2320 pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
 2321     int width)
 2322 {
 2323         struct pci_devinfo *dinfo = device_get_ivars(dev);
 2324         uint32_t old, new;
 2325         int cap;
 2326 
 2327         cap = dinfo->cfg.pcie.pcie_location;
 2328         if (cap == 0) {
 2329                 if (width == 2)
 2330                         return (0xffff);
 2331                 return (0xffffffff);
 2332         }
 2333 
 2334         old = pci_read_config(dev, cap + reg, width);
 2335         new = old & ~mask;
 2336         new |= (value & mask);
 2337         pci_write_config(dev, cap + reg, new, width);
 2338         return (old);
 2339 }
 2340 
 2341 /*
 2342  * Support for MSI message signalled interrupts.
 2343  */
 2344 void
 2345 pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
 2346     uint16_t data)
 2347 {
 2348         struct pci_devinfo *dinfo = device_get_ivars(child);
 2349         struct pcicfg_msi *msi = &dinfo->cfg.msi;
 2350 
 2351         /* Write data and address values. */
 2352         pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
 2353             address & 0xffffffff, 4);
 2354         if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
 2355                 pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
 2356                     address >> 32, 4);
 2357                 pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
 2358                     data, 2);
 2359         } else
 2360                 pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
 2361                     2);
 2362 
 2363         /* Enable MSI in the control register. */
 2364         msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
 2365         pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
 2366             msi->msi_ctrl, 2);
 2367 
 2368         /* Enable MSI -> HT mapping. */
 2369         pci_ht_map_msi(child, address);
 2370 }
 2371 
 2372 void
 2373 pci_disable_msi_method(device_t dev, device_t child)
 2374 {
 2375         struct pci_devinfo *dinfo = device_get_ivars(child);
 2376         struct pcicfg_msi *msi = &dinfo->cfg.msi;
 2377 
 2378         /* Disable MSI -> HT mapping. */
 2379         pci_ht_map_msi(child, 0);
 2380 
 2381         /* Disable MSI in the control register. */
 2382         msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
 2383         pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
 2384             msi->msi_ctrl, 2);
 2385 }
 2386 
 2387 /*
 2388  * Restore MSI registers during resume.  If MSI is enabled then
 2389  * restore the data and address registers in addition to the control
 2390  * register.
 2391  */
 2392 static void
 2393 pci_resume_msi(device_t dev)
 2394 {
 2395         struct pci_devinfo *dinfo = device_get_ivars(dev);
 2396         struct pcicfg_msi *msi = &dinfo->cfg.msi;
 2397         uint64_t address;
 2398         uint16_t data;
 2399 
 2400         if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
 2401                 address = msi->msi_addr;
 2402                 data = msi->msi_data;
 2403                 pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
 2404                     address & 0xffffffff, 4);
 2405                 if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
 2406                         pci_write_config(dev, msi->msi_location +
 2407                             PCIR_MSI_ADDR_HIGH, address >> 32, 4);
 2408                         pci_write_config(dev, msi->msi_location +
 2409                             PCIR_MSI_DATA_64BIT, data, 2);
 2410                 } else
 2411                         pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
 2412                             data, 2);
 2413         }
 2414         pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
 2415             2);
 2416 }
 2417 
 2418 static int
 2419 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
 2420 {
 2421         struct pci_devinfo *dinfo = device_get_ivars(dev);
 2422         pcicfgregs *cfg = &dinfo->cfg;
 2423         struct resource_list_entry *rle;
 2424         struct msix_table_entry *mte;
 2425         struct msix_vector *mv;
 2426         uint64_t addr;
 2427         uint32_t data;
 2428         int error, i, j;
 2429 
 2430         /*
 2431          * Handle MSI first.  We try to find this IRQ among our list
 2432          * of MSI IRQs.  If we find it, we request updated address and
 2433          * data registers and apply the results.
 2434          */
 2435         if (cfg->msi.msi_alloc > 0) {
 2436                 /* If we don't have any active handlers, nothing to do. */
 2437                 if (cfg->msi.msi_handlers == 0)
 2438                         return (0);
 2439                 for (i = 0; i < cfg->msi.msi_alloc; i++) {
 2440                         rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
 2441                             i + 1);
 2442                         if (rle->start == irq) {
 2443                                 error = PCIB_MAP_MSI(device_get_parent(bus),
 2444                                     dev, irq, &addr, &data);
 2445                                 if (error)
 2446                                         return (error);
 2447                                 pci_disable_msi(dev);
 2448                                 dinfo->cfg.msi.msi_addr = addr;
 2449                                 dinfo->cfg.msi.msi_data = data;
 2450                                 pci_enable_msi(dev, addr, data);
 2451                                 return (0);
 2452                         }
 2453                 }
 2454                 return (ENOENT);
 2455         }
 2456 
 2457         /*
 2458          * For MSI-X, we check to see if we have this IRQ.  If we do,
 2459          * we request the updated mapping info.  If that works, we go
 2460          * through all the slots that use this IRQ and update them.
 2461          */
 2462         if (cfg->msix.msix_alloc > 0) {
 2463                 for (i = 0; i < cfg->msix.msix_alloc; i++) {
 2464                         mv = &cfg->msix.msix_vectors[i];
 2465                         if (mv->mv_irq == irq) {
 2466                                 error = PCIB_MAP_MSI(device_get_parent(bus),
 2467                                     dev, irq, &addr, &data);
 2468                                 if (error)
 2469                                         return (error);
 2470                                 mv->mv_address = addr;
 2471                                 mv->mv_data = data;
 2472                                 for (j = 0; j < cfg->msix.msix_table_len; j++) {
 2473                                         mte = &cfg->msix.msix_table[j];
 2474                                         if (mte->mte_vector != i + 1)
 2475                                                 continue;
 2476                                         if (mte->mte_handlers == 0)
 2477                                                 continue;
 2478                                         pci_mask_msix(dev, j);
 2479                                         pci_enable_msix(dev, j, addr, data);
 2480                                         pci_unmask_msix(dev, j);
 2481                                 }
 2482                         }
 2483                 }
 2484                 return (ENOENT);
 2485         }
 2486 
 2487         return (ENOENT);
 2488 }
 2489 
 2490 /*
 2491  * Returns true if the specified device is blacklisted because MSI
 2492  * doesn't work.
 2493  */
 2494 int
 2495 pci_msi_device_blacklisted(device_t dev)
 2496 {
 2497 
 2498         if (!pci_honor_msi_blacklist)
 2499                 return (0);
 2500 
 2501         return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
 2502 }
 2503 
 2504 /*
 2505  * Determine if MSI is blacklisted globally on this system.  Currently,
 2506  * we just check for blacklisted chipsets as represented by the
 2507  * host-PCI bridge at device 0:0:0.  In the future, it may become
 2508  * necessary to check other system attributes, such as the kenv values
 2509  * that give the motherboard manufacturer and model number.
 2510  */
 2511 static int
 2512 pci_msi_blacklisted(void)
 2513 {
 2514         device_t dev;
 2515 
 2516         if (!pci_honor_msi_blacklist)
 2517                 return (0);
 2518 
 2519         /* Blacklist all non-PCI-express and non-PCI-X chipsets. */
 2520         if (!(pcie_chipset || pcix_chipset)) {
 2521                 if (vm_guest != VM_GUEST_NO) {
 2522                         /*
 2523                          * Whitelist older chipsets in virtual
 2524                          * machines known to support MSI.
 2525                          */
 2526                         dev = pci_find_bsf(0, 0, 0);
 2527                         if (dev != NULL)
 2528                                 return (!pci_has_quirk(pci_get_devid(dev),
 2529                                         PCI_QUIRK_ENABLE_MSI_VM));
 2530                 }
 2531                 return (1);
 2532         }
 2533 
 2534         dev = pci_find_bsf(0, 0, 0);
 2535         if (dev != NULL)
 2536                 return (pci_msi_device_blacklisted(dev));
 2537         return (0);
 2538 }
 2539 
 2540 /*
 2541  * Returns true if the specified device is blacklisted because MSI-X
 2542  * doesn't work.  Note that this assumes that if MSI doesn't work,
 2543  * MSI-X doesn't either.
 2544  */
 2545 int
 2546 pci_msix_device_blacklisted(device_t dev)
 2547 {
 2548 
 2549         if (!pci_honor_msi_blacklist)
 2550                 return (0);
 2551 
 2552         if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
 2553                 return (1);
 2554 
 2555         return (pci_msi_device_blacklisted(dev));
 2556 }
 2557 
 2558 /*
 2559  * Determine if MSI-X is blacklisted globally on this system.  If MSI
 2560  * is blacklisted, assume that MSI-X is as well.  Check for additional
 2561  * chipsets where MSI works but MSI-X does not.
 2562  */
 2563 static int
 2564 pci_msix_blacklisted(void)
 2565 {
 2566         device_t dev;
 2567 
 2568         if (!pci_honor_msi_blacklist)
 2569                 return (0);
 2570 
 2571         dev = pci_find_bsf(0, 0, 0);
 2572         if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
 2573             PCI_QUIRK_DISABLE_MSIX))
 2574                 return (1);
 2575 
 2576         return (pci_msi_blacklisted());
 2577 }
 2578 
 2579 /*
 2580  * Attempt to allocate *count MSI messages.  The actual number allocated is
 2581  * returned in *count.  After this function returns, each message will be
 2582  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
 2583  */
 2584 int
 2585 pci_alloc_msi_method(device_t dev, device_t child, int *count)
 2586 {
 2587         struct pci_devinfo *dinfo = device_get_ivars(child);
 2588         pcicfgregs *cfg = &dinfo->cfg;
 2589         struct resource_list_entry *rle;
 2590         int actual, error, i, irqs[32];
 2591         uint16_t ctrl;
 2592 
 2593         /* Don't let count == 0 get us into trouble. */
 2594         if (*count == 0)
 2595                 return (EINVAL);
 2596 
 2597         /* If rid 0 is allocated, then fail. */
 2598         rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
 2599         if (rle != NULL && rle->res != NULL)
 2600                 return (ENXIO);
 2601 
 2602         /* Already have allocated messages? */
 2603         if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
 2604                 return (ENXIO);
 2605 
 2606         /* If MSI is blacklisted for this system, fail. */
 2607         if (pci_msi_blacklisted())
 2608                 return (ENXIO);
 2609 
 2610         /* MSI capability present? */
 2611         if (cfg->msi.msi_location == 0 || !pci_do_msi)
 2612                 return (ENODEV);
 2613 
 2614         if (bootverbose)
 2615                 device_printf(child,
 2616                     "attempting to allocate %d MSI vectors (%d supported)\n",
 2617                     *count, cfg->msi.msi_msgnum);
 2618 
 2619         /* Don't ask for more than the device supports. */
 2620         actual = min(*count, cfg->msi.msi_msgnum);
 2621 
 2622         /* Don't ask for more than 32 messages. */
 2623         actual = min(actual, 32);
 2624 
 2625         /* MSI requires power of 2 number of messages. */
 2626         if (!powerof2(actual))
 2627                 return (EINVAL);
 2628 
 2629         for (;;) {
 2630                 /* Try to allocate N messages. */
 2631                 error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
 2632                     actual, irqs);
 2633                 if (error == 0)
 2634                         break;
 2635                 if (actual == 1)
 2636                         return (error);
 2637 
 2638                 /* Try N / 2. */
 2639                 actual >>= 1;
 2640         }
 2641 
 2642         /*
 2643          * We now have N actual messages mapped onto SYS_RES_IRQ
 2644          * resources in the irqs[] array, so add new resources
 2645          * starting at rid 1.
 2646          */
 2647         for (i = 0; i < actual; i++)
 2648                 resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
 2649                     irqs[i], irqs[i], 1);
 2650 
 2651         if (bootverbose) {
 2652                 if (actual == 1)
 2653                         device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
 2654                 else {
 2655                         int run;
 2656 
 2657                         /*
 2658                          * Be fancy and try to print contiguous runs
 2659                          * of IRQ values as ranges.  'run' is true if
 2660                          * we are in a range.
 2661                          */
 2662                         device_printf(child, "using IRQs %d", irqs[0]);
 2663                         run = 0;
 2664                         for (i = 1; i < actual; i++) {
 2665                                 /* Still in a run? */
 2666                                 if (irqs[i] == irqs[i - 1] + 1) {
 2667                                         run = 1;
 2668                                         continue;
 2669                                 }
 2670 
 2671                                 /* Finish previous range. */
 2672                                 if (run) {
 2673                                         printf("-%d", irqs[i - 1]);
 2674                                         run = 0;
 2675                                 }
 2676 
 2677                                 /* Start new range. */
 2678                                 printf(",%d", irqs[i]);
 2679                         }
 2680 
 2681                         /* Unfinished range? */
 2682                         if (run)
 2683                                 printf("-%d", irqs[actual - 1]);
 2684                         printf(" for MSI\n");
 2685                 }
 2686         }
 2687 
 2688         /* Update control register with actual count. */
 2689         ctrl = cfg->msi.msi_ctrl;
 2690         ctrl &= ~PCIM_MSICTRL_MME_MASK;
 2691         ctrl |= (ffs(actual) - 1) << 4;
 2692         cfg->msi.msi_ctrl = ctrl;
 2693         pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
 2694 
 2695         /* Update counts of alloc'd messages. */
 2696         cfg->msi.msi_alloc = actual;
 2697         cfg->msi.msi_handlers = 0;
 2698         *count = actual;
 2699         return (0);
 2700 }
 2701 
 2702 /* Release the MSI messages associated with this device. */
 2703 int
 2704 pci_release_msi_method(device_t dev, device_t child)
 2705 {
 2706         struct pci_devinfo *dinfo = device_get_ivars(child);
 2707         struct pcicfg_msi *msi = &dinfo->cfg.msi;
 2708         struct resource_list_entry *rle;
 2709         int error, i, irqs[32];
 2710 
 2711         /* Try MSI-X first. */
 2712         error = pci_release_msix(dev, child);
 2713         if (error != ENODEV)
 2714                 return (error);
 2715 
 2716         /* Do we have any messages to release? */
 2717         if (msi->msi_alloc == 0)
 2718                 return (ENODEV);
 2719         KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
 2720 
 2721         /* Make sure none of the resources are allocated. */
 2722         if (msi->msi_handlers > 0)
 2723                 return (EBUSY);
 2724         for (i = 0; i < msi->msi_alloc; i++) {
 2725                 rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
 2726                 KASSERT(rle != NULL, ("missing MSI resource"));
 2727                 if (rle->res != NULL)
 2728                         return (EBUSY);
 2729                 irqs[i] = rle->start;
 2730         }
 2731 
 2732         /* Update control register with 0 count. */
 2733         KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
 2734             ("%s: MSI still enabled", __func__));
 2735         msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
 2736         pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
 2737             msi->msi_ctrl, 2);
 2738 
 2739         /* Release the messages. */
 2740         PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
 2741         for (i = 0; i < msi->msi_alloc; i++)
 2742                 resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
 2743 
 2744         /* Update alloc count. */
 2745         msi->msi_alloc = 0;
 2746         msi->msi_addr = 0;
 2747         msi->msi_data = 0;
 2748         return (0);
 2749 }
 2750 
 2751 /*
 2752  * Return the max supported MSI messages this device supports.
 2753  * Basically, assuming the MD code can alloc messages, this function
 2754  * should return the maximum value that pci_alloc_msi() can return.
 2755  * Thus, it is subject to the tunables, etc.
 2756  */
 2757 int
 2758 pci_msi_count_method(device_t dev, device_t child)
 2759 {
 2760         struct pci_devinfo *dinfo = device_get_ivars(child);
 2761         struct pcicfg_msi *msi = &dinfo->cfg.msi;
 2762 
 2763         if (pci_do_msi && msi->msi_location != 0)
 2764                 return (msi->msi_msgnum);
 2765         return (0);
 2766 }
 2767 
 2768 /* free pcicfgregs structure and all depending data structures */
 2769 
 2770 int
 2771 pci_freecfg(struct pci_devinfo *dinfo)
 2772 {
 2773         struct devlist *devlist_head;
 2774         struct pci_map *pm, *next;
 2775         int i;
 2776 
 2777         devlist_head = &pci_devq;
 2778 
 2779         if (dinfo->cfg.vpd.vpd_reg) {
 2780                 free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
 2781                 for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
 2782                         free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
 2783                 free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
 2784                 for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
 2785                         free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
 2786                 free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
 2787         }
 2788         STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
 2789                 free(pm, M_DEVBUF);
 2790         }
 2791         STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
 2792         free(dinfo, M_DEVBUF);
 2793 
 2794         /* increment the generation count */
 2795         pci_generation++;
 2796 
 2797         /* we're losing one device */
 2798         pci_numdevs--;
 2799         return (0);
 2800 }
 2801 
 2802 /*
 2803  * PCI power manangement
 2804  */
 2805 int
 2806 pci_set_powerstate_method(device_t dev, device_t child, int state)
 2807 {
 2808         struct pci_devinfo *dinfo = device_get_ivars(child);
 2809         pcicfgregs *cfg = &dinfo->cfg;
 2810         uint16_t status;
 2811         int oldstate, highest, delay;
 2812 
 2813         if (cfg->pp.pp_cap == 0)
 2814                 return (EOPNOTSUPP);
 2815 
 2816         /*
 2817          * Optimize a no state change request away.  While it would be OK to
 2818          * write to the hardware in theory, some devices have shown odd
 2819          * behavior when going from D3 -> D3.
 2820          */
 2821         oldstate = pci_get_powerstate(child);
 2822         if (oldstate == state)
 2823                 return (0);
 2824 
 2825         /*
 2826          * The PCI power management specification states that after a state
 2827          * transition between PCI power states, system software must
 2828          * guarantee a minimal delay before the function accesses the device.
 2829          * Compute the worst case delay that we need to guarantee before we
 2830          * access the device.  Many devices will be responsive much more
 2831          * quickly than this delay, but there are some that don't respond
 2832          * instantly to state changes.  Transitions to/from D3 state require
 2833          * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
 2834          * is done below with DELAY rather than a sleeper function because
 2835          * this function can be called from contexts where we cannot sleep.
 2836          */
 2837         highest = (oldstate > state) ? oldstate : state;
 2838         if (highest == PCI_POWERSTATE_D3)
 2839             delay = 10000;
 2840         else if (highest == PCI_POWERSTATE_D2)
 2841             delay = 200;
 2842         else
 2843             delay = 0;
 2844         status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
 2845             & ~PCIM_PSTAT_DMASK;
 2846         switch (state) {
 2847         case PCI_POWERSTATE_D0:
 2848                 status |= PCIM_PSTAT_D0;
 2849                 break;
 2850         case PCI_POWERSTATE_D1:
 2851                 if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
 2852                         return (EOPNOTSUPP);
 2853                 status |= PCIM_PSTAT_D1;
 2854                 break;
 2855         case PCI_POWERSTATE_D2:
 2856                 if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
 2857                         return (EOPNOTSUPP);
 2858                 status |= PCIM_PSTAT_D2;
 2859                 break;
 2860         case PCI_POWERSTATE_D3:
 2861                 status |= PCIM_PSTAT_D3;
 2862                 break;
 2863         default:
 2864                 return (EINVAL);
 2865         }
 2866 
 2867         if (bootverbose)
 2868                 pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
 2869                     state);
 2870 
 2871         PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
 2872         if (delay)
 2873                 DELAY(delay);
 2874         return (0);
 2875 }
 2876 
 2877 int
 2878 pci_get_powerstate_method(device_t dev, device_t child)
 2879 {
 2880         struct pci_devinfo *dinfo = device_get_ivars(child);
 2881         pcicfgregs *cfg = &dinfo->cfg;
 2882         uint16_t status;
 2883         int result;
 2884 
 2885         if (cfg->pp.pp_cap != 0) {
 2886                 status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
 2887                 switch (status & PCIM_PSTAT_DMASK) {
 2888                 case PCIM_PSTAT_D0:
 2889                         result = PCI_POWERSTATE_D0;
 2890                         break;
 2891                 case PCIM_PSTAT_D1:
 2892                         result = PCI_POWERSTATE_D1;
 2893                         break;
 2894                 case PCIM_PSTAT_D2:
 2895                         result = PCI_POWERSTATE_D2;
 2896                         break;
 2897                 case PCIM_PSTAT_D3:
 2898                         result = PCI_POWERSTATE_D3;
 2899                         break;
 2900                 default:
 2901                         result = PCI_POWERSTATE_UNKNOWN;
 2902                         break;
 2903                 }
 2904         } else {
 2905                 /* No support, device is always at D0 */
 2906                 result = PCI_POWERSTATE_D0;
 2907         }
 2908         return (result);
 2909 }
 2910 
 2911 /*
 2912  * Some convenience functions for PCI device drivers.
 2913  */
 2914 
 2915 static __inline void
 2916 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
 2917 {
 2918         uint16_t        command;
 2919 
 2920         command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
 2921         command |= bit;
 2922         PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
 2923 }
 2924 
 2925 static __inline void
 2926 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
 2927 {
 2928         uint16_t        command;
 2929 
 2930         command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
 2931         command &= ~bit;
 2932         PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
 2933 }
 2934 
 2935 int
 2936 pci_enable_busmaster_method(device_t dev, device_t child)
 2937 {
 2938         pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
 2939         return (0);
 2940 }
 2941 
 2942 int
 2943 pci_disable_busmaster_method(device_t dev, device_t child)
 2944 {
 2945         pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
 2946         return (0);
 2947 }
 2948 
 2949 int
 2950 pci_enable_io_method(device_t dev, device_t child, int space)
 2951 {
 2952         uint16_t bit;
 2953 
 2954         switch(space) {
 2955         case SYS_RES_IOPORT:
 2956                 bit = PCIM_CMD_PORTEN;
 2957                 break;
 2958         case SYS_RES_MEMORY:
 2959                 bit = PCIM_CMD_MEMEN;
 2960                 break;
 2961         default:
 2962                 return (EINVAL);
 2963         }
 2964         pci_set_command_bit(dev, child, bit);
 2965         return (0);
 2966 }
 2967 
 2968 int
 2969 pci_disable_io_method(device_t dev, device_t child, int space)
 2970 {
 2971         uint16_t bit;
 2972 
 2973         switch(space) {
 2974         case SYS_RES_IOPORT:
 2975                 bit = PCIM_CMD_PORTEN;
 2976                 break;
 2977         case SYS_RES_MEMORY:
 2978                 bit = PCIM_CMD_MEMEN;
 2979                 break;
 2980         default:
 2981                 return (EINVAL);
 2982         }
 2983         pci_clear_command_bit(dev, child, bit);
 2984         return (0);
 2985 }
 2986 
 2987 /*
 2988  * New style pci driver.  Parent device is either a pci-host-bridge or a
 2989  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
 2990  */
 2991 
 2992 void
 2993 pci_print_verbose(struct pci_devinfo *dinfo)
 2994 {
 2995 
 2996         if (bootverbose) {
 2997                 pcicfgregs *cfg = &dinfo->cfg;
 2998 
 2999                 printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
 3000                     cfg->vendor, cfg->device, cfg->revid);
 3001                 printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
 3002                     cfg->domain, cfg->bus, cfg->slot, cfg->func);
 3003                 printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
 3004                     cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
 3005                     cfg->mfdev);
 3006                 printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
 3007                     cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
 3008                 printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
 3009                     cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
 3010                     cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
 3011                 if (cfg->intpin > 0)
 3012                         printf("\tintpin=%c, irq=%d\n",
 3013                             cfg->intpin +'a' -1, cfg->intline);
 3014                 if (cfg->pp.pp_cap) {
 3015                         uint16_t status;
 3016 
 3017                         status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
 3018                         printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
 3019                             cfg->pp.pp_cap & PCIM_PCAP_SPEC,
 3020                             cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
 3021                             cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
 3022                             status & PCIM_PSTAT_DMASK);
 3023                 }
 3024                 if (cfg->msi.msi_location) {
 3025                         int ctrl;
 3026 
 3027                         ctrl = cfg->msi.msi_ctrl;
 3028                         printf("\tMSI supports %d message%s%s%s\n",
 3029                             cfg->msi.msi_msgnum,
 3030                             (cfg->msi.msi_msgnum == 1) ? "" : "s",
 3031                             (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
 3032                             (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
 3033                 }
 3034                 if (cfg->msix.msix_location) {
 3035                         printf("\tMSI-X supports %d message%s ",
 3036                             cfg->msix.msix_msgnum,
 3037                             (cfg->msix.msix_msgnum == 1) ? "" : "s");
 3038                         if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
 3039                                 printf("in map 0x%x\n",
 3040                                     cfg->msix.msix_table_bar);
 3041                         else
 3042                                 printf("in maps 0x%x and 0x%x\n",
 3043                                     cfg->msix.msix_table_bar,
 3044                                     cfg->msix.msix_pba_bar);
 3045                 }
 3046         }
 3047 }
 3048 
 3049 static int
 3050 pci_porten(device_t dev)
 3051 {
 3052         return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
 3053 }
 3054 
 3055 static int
 3056 pci_memen(device_t dev)
 3057 {
 3058         return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
 3059 }
 3060 
 3061 void
 3062 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
 3063     int *bar64)
 3064 {
 3065         struct pci_devinfo *dinfo;
 3066         pci_addr_t map, testval;
 3067         int ln2range;
 3068         uint16_t cmd;
 3069 
 3070         /*
 3071          * The device ROM BAR is special.  It is always a 32-bit
 3072          * memory BAR.  Bit 0 is special and should not be set when
 3073          * sizing the BAR.
 3074          */
 3075         dinfo = device_get_ivars(dev);
 3076         if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
 3077                 map = pci_read_config(dev, reg, 4);
 3078                 pci_write_config(dev, reg, 0xfffffffe, 4);
 3079                 testval = pci_read_config(dev, reg, 4);
 3080                 pci_write_config(dev, reg, map, 4);
 3081                 *mapp = map;
 3082                 *testvalp = testval;
 3083                 if (bar64 != NULL)
 3084                         *bar64 = 0;
 3085                 return;
 3086         }
 3087 
 3088         map = pci_read_config(dev, reg, 4);
 3089         ln2range = pci_maprange(map);
 3090         if (ln2range == 64)
 3091                 map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
 3092 
 3093         /*
 3094          * Disable decoding via the command register before
 3095          * determining the BAR's length since we will be placing it in
 3096          * a weird state.
 3097          */
 3098         cmd = pci_read_config(dev, PCIR_COMMAND, 2);
 3099         pci_write_config(dev, PCIR_COMMAND,
 3100             cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
 3101 
 3102         /*
 3103          * Determine the BAR's length by writing all 1's.  The bottom
 3104          * log_2(size) bits of the BAR will stick as 0 when we read
 3105          * the value back.
 3106          *
 3107          * NB: according to the PCI Local Bus Specification, rev. 3.0:
 3108          * "Software writes 0FFFFFFFFh to both registers, reads them back,
 3109          * and combines the result into a 64-bit value." (section 6.2.5.1)
 3110          *
 3111          * Writes to both registers must be performed before attempting to
 3112          * read back the size value.
 3113          */
 3114         testval = 0;
 3115         pci_write_config(dev, reg, 0xffffffff, 4);
 3116         if (ln2range == 64) {
 3117                 pci_write_config(dev, reg + 4, 0xffffffff, 4);
 3118                 testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
 3119         }
 3120         testval |= pci_read_config(dev, reg, 4);
 3121 
 3122         /*
 3123          * Restore the original value of the BAR.  We may have reprogrammed
 3124          * the BAR of the low-level console device and when booting verbose,
 3125          * we need the console device addressable.
 3126          */
 3127         pci_write_config(dev, reg, map, 4);
 3128         if (ln2range == 64)
 3129                 pci_write_config(dev, reg + 4, map >> 32, 4);
 3130         pci_write_config(dev, PCIR_COMMAND, cmd, 2);
 3131 
 3132         *mapp = map;
 3133         *testvalp = testval;
 3134         if (bar64 != NULL)
 3135                 *bar64 = (ln2range == 64);
 3136 }
 3137 
 3138 static void
 3139 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
 3140 {
 3141         struct pci_devinfo *dinfo;
 3142         int ln2range;
 3143 
 3144         /* The device ROM BAR is always a 32-bit memory BAR. */
 3145         dinfo = device_get_ivars(dev);
 3146         if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
 3147                 ln2range = 32;
 3148         else
 3149                 ln2range = pci_maprange(pm->pm_value);
 3150         pci_write_config(dev, pm->pm_reg, base, 4);
 3151         if (ln2range == 64)
 3152                 pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
 3153         pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
 3154         if (ln2range == 64)
 3155                 pm->pm_value |= (pci_addr_t)pci_read_config(dev,
 3156                     pm->pm_reg + 4, 4) << 32;
 3157 }
 3158 
 3159 struct pci_map *
 3160 pci_find_bar(device_t dev, int reg)
 3161 {
 3162         struct pci_devinfo *dinfo;
 3163         struct pci_map *pm;
 3164 
 3165         dinfo = device_get_ivars(dev);
 3166         STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
 3167                 if (pm->pm_reg == reg)
 3168                         return (pm);
 3169         }
 3170         return (NULL);
 3171 }
 3172 
 3173 struct pci_map *
 3174 pci_first_bar(device_t dev)
 3175 {
 3176         struct pci_devinfo *dinfo;
 3177 
 3178         dinfo = device_get_ivars(dev);
 3179         return (STAILQ_FIRST(&dinfo->cfg.maps));
 3180 }
 3181 
 3182 struct pci_map *
 3183 pci_next_bar(struct pci_map *pm)
 3184 {
 3185         return (STAILQ_NEXT(pm, pm_link));
 3186 }
 3187 
 3188 int
 3189 pci_bar_enabled(device_t dev, struct pci_map *pm)
 3190 {
 3191         struct pci_devinfo *dinfo;
 3192         uint16_t cmd;
 3193 
 3194         dinfo = device_get_ivars(dev);
 3195         if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
 3196             !(pm->pm_value & PCIM_BIOS_ENABLE))
 3197                 return (0);
 3198 #ifdef PCI_IOV
 3199         if ((dinfo->cfg.flags & PCICFG_VF) != 0) {
 3200                 struct pcicfg_iov *iov;
 3201 
 3202                 iov = dinfo->cfg.iov;
 3203                 cmd = pci_read_config(iov->iov_pf,
 3204                     iov->iov_pos + PCIR_SRIOV_CTL, 2);
 3205                 return ((cmd & PCIM_SRIOV_VF_MSE) != 0);
 3206         }
 3207 #endif
 3208         cmd = pci_read_config(dev, PCIR_COMMAND, 2);
 3209         if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
 3210                 return ((cmd & PCIM_CMD_MEMEN) != 0);
 3211         else
 3212                 return ((cmd & PCIM_CMD_PORTEN) != 0);
 3213 }
 3214 
 3215 struct pci_map *
 3216 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
 3217 {
 3218         struct pci_devinfo *dinfo;
 3219         struct pci_map *pm, *prev;
 3220 
 3221         dinfo = device_get_ivars(dev);
 3222         pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
 3223         pm->pm_reg = reg;
 3224         pm->pm_value = value;
 3225         pm->pm_size = size;
 3226         STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
 3227                 KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
 3228                     reg));
 3229                 if (STAILQ_NEXT(prev, pm_link) == NULL ||
 3230                     STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
 3231                         break;
 3232         }
 3233         if (prev != NULL)
 3234                 STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
 3235         else
 3236                 STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
 3237         return (pm);
 3238 }
 3239 
 3240 static void
 3241 pci_restore_bars(device_t dev)
 3242 {
 3243         struct pci_devinfo *dinfo;
 3244         struct pci_map *pm;
 3245         int ln2range;
 3246 
 3247         dinfo = device_get_ivars(dev);
 3248         STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
 3249                 if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
 3250                         ln2range = 32;
 3251                 else
 3252                         ln2range = pci_maprange(pm->pm_value);
 3253                 pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
 3254                 if (ln2range == 64)
 3255                         pci_write_config(dev, pm->pm_reg + 4,
 3256                             pm->pm_value >> 32, 4);
 3257         }
 3258 }
 3259 
 3260 /*
 3261  * Add a resource based on a pci map register. Return 1 if the map
 3262  * register is a 32bit map register or 2 if it is a 64bit register.
 3263  */
 3264 static int
 3265 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
 3266     int force, int prefetch)
 3267 {
 3268         struct pci_map *pm;
 3269         pci_addr_t base, map, testval;
 3270         pci_addr_t start, end, count;
 3271         int barlen, basezero, flags, maprange, mapsize, type;
 3272         uint16_t cmd;
 3273         struct resource *res;
 3274 
 3275         /*
 3276          * The BAR may already exist if the device is a CardBus card
 3277          * whose CIS is stored in this BAR.
 3278          */
 3279         pm = pci_find_bar(dev, reg);
 3280         if (pm != NULL) {
 3281                 maprange = pci_maprange(pm->pm_value);
 3282                 barlen = maprange == 64 ? 2 : 1;
 3283                 return (barlen);
 3284         }
 3285 
 3286         pci_read_bar(dev, reg, &map, &testval, NULL);
 3287         if (PCI_BAR_MEM(map)) {
 3288                 type = SYS_RES_MEMORY;
 3289                 if (map & PCIM_BAR_MEM_PREFETCH)
 3290                         prefetch = 1;
 3291         } else
 3292                 type = SYS_RES_IOPORT;
 3293         mapsize = pci_mapsize(testval);
 3294         base = pci_mapbase(map);
 3295 #ifdef __PCI_BAR_ZERO_VALID
 3296         basezero = 0;
 3297 #else
 3298         basezero = base == 0;
 3299 #endif
 3300         maprange = pci_maprange(map);
 3301         barlen = maprange == 64 ? 2 : 1;
 3302 
 3303         /*
 3304          * For I/O registers, if bottom bit is set, and the next bit up
 3305          * isn't clear, we know we have a BAR that doesn't conform to the
 3306          * spec, so ignore it.  Also, sanity check the size of the data
 3307          * areas to the type of memory involved.  Memory must be at least
 3308          * 16 bytes in size, while I/O ranges must be at least 4.
 3309          */
 3310         if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
 3311                 return (barlen);
 3312         if ((type == SYS_RES_MEMORY && mapsize < 4) ||
 3313             (type == SYS_RES_IOPORT && mapsize < 2))
 3314                 return (barlen);
 3315 
 3316         /* Save a record of this BAR. */
 3317         pm = pci_add_bar(dev, reg, map, mapsize);
 3318         if (bootverbose) {
 3319                 printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
 3320                     reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
 3321                 if (type == SYS_RES_IOPORT && !pci_porten(dev))
 3322                         printf(", port disabled\n");
 3323                 else if (type == SYS_RES_MEMORY && !pci_memen(dev))
 3324                         printf(", memory disabled\n");
 3325                 else
 3326                         printf(", enabled\n");
 3327         }
 3328 
 3329         /*
 3330          * If base is 0, then we have problems if this architecture does
 3331          * not allow that.  It is best to ignore such entries for the
 3332          * moment.  These will be allocated later if the driver specifically
 3333          * requests them.  However, some removable buses look better when
 3334          * all resources are allocated, so allow '' to be overridden.
 3335          *
 3336          * Similarly treat maps whose values is the same as the test value
 3337          * read back.  These maps have had all f's written to them by the
 3338          * BIOS in an attempt to disable the resources.
 3339          */
 3340         if (!force && (basezero || map == testval))
 3341                 return (barlen);
 3342         if ((u_long)base != base) {
 3343                 device_printf(bus,
 3344                     "pci%d:%d:%d:%d bar %#x too many address bits",
 3345                     pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
 3346                     pci_get_function(dev), reg);
 3347                 return (barlen);
 3348         }
 3349 
 3350         /*
 3351          * This code theoretically does the right thing, but has
 3352          * undesirable side effects in some cases where peripherals
 3353          * respond oddly to having these bits enabled.  Let the user
 3354          * be able to turn them off (since pci_enable_io_modes is 1 by
 3355          * default).
 3356          */
 3357         if (pci_enable_io_modes) {
 3358                 /* Turn on resources that have been left off by a lazy BIOS */
 3359                 if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
 3360                         cmd = pci_read_config(dev, PCIR_COMMAND, 2);
 3361                         cmd |= PCIM_CMD_PORTEN;
 3362                         pci_write_config(dev, PCIR_COMMAND, cmd, 2);
 3363                 }
 3364                 if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
 3365                         cmd = pci_read_config(dev, PCIR_COMMAND, 2);
 3366                         cmd |= PCIM_CMD_MEMEN;
 3367                         pci_write_config(dev, PCIR_COMMAND, cmd, 2);
 3368                 }
 3369         } else {
 3370                 if (type == SYS_RES_IOPORT && !pci_porten(dev))
 3371                         return (barlen);
 3372                 if (type == SYS_RES_MEMORY && !pci_memen(dev))
 3373                         return (barlen);
 3374         }
 3375 
 3376         count = (pci_addr_t)1 << mapsize;
 3377         flags = RF_ALIGNMENT_LOG2(mapsize);
 3378         if (prefetch)
 3379                 flags |= RF_PREFETCHABLE;
 3380         if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
 3381                 start = 0;      /* Let the parent decide. */
 3382                 end = ~0;
 3383         } else {
 3384                 start = base;
 3385                 end = base + count - 1;
 3386         }
 3387         resource_list_add(rl, type, reg, start, end, count);
 3388 
 3389         /*
 3390          * Try to allocate the resource for this BAR from our parent
 3391          * so that this resource range is already reserved.  The
 3392          * driver for this device will later inherit this resource in
 3393          * pci_alloc_resource().
 3394          */
 3395         res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
 3396             flags);
 3397         if ((pci_do_realloc_bars
 3398                 || pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_REALLOC_BAR))
 3399             && res == NULL && (start != 0 || end != ~0)) {
 3400                 /*
 3401                  * If the allocation fails, try to allocate a resource for
 3402                  * this BAR using any available range.  The firmware felt
 3403                  * it was important enough to assign a resource, so don't
 3404                  * disable decoding if we can help it.
 3405                  */
 3406                 resource_list_delete(rl, type, reg);
 3407                 resource_list_add(rl, type, reg, 0, ~0, count);
 3408                 res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0,
 3409                     count, flags);
 3410         }
 3411         if (res == NULL) {
 3412                 /*
 3413                  * If the allocation fails, delete the resource list entry
 3414                  * and disable decoding for this device.
 3415                  *
 3416                  * If the driver requests this resource in the future,
 3417                  * pci_reserve_map() will try to allocate a fresh
 3418                  * resource range.
 3419                  */
 3420                 resource_list_delete(rl, type, reg);
 3421                 pci_disable_io(dev, type);
 3422                 if (bootverbose)
 3423                         device_printf(bus,
 3424                             "pci%d:%d:%d:%d bar %#x failed to allocate\n",
 3425                             pci_get_domain(dev), pci_get_bus(dev),
 3426                             pci_get_slot(dev), pci_get_function(dev), reg);
 3427         } else {
 3428                 start = rman_get_start(res);
 3429                 pci_write_bar(dev, pm, start);
 3430         }
 3431         return (barlen);
 3432 }
 3433 
 3434 /*
 3435  * For ATA devices we need to decide early what addressing mode to use.
 3436  * Legacy demands that the primary and secondary ATA ports sits on the
 3437  * same addresses that old ISA hardware did. This dictates that we use
 3438  * those addresses and ignore the BAR's if we cannot set PCI native
 3439  * addressing mode.
 3440  */
 3441 static void
 3442 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
 3443     uint32_t prefetchmask)
 3444 {
 3445         int rid, type, progif;
 3446 #if 0
 3447         /* if this device supports PCI native addressing use it */
 3448         progif = pci_read_config(dev, PCIR_PROGIF, 1);
 3449         if ((progif & 0x8a) == 0x8a) {
 3450                 if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
 3451                     pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
 3452                         printf("Trying ATA native PCI addressing mode\n");
 3453                         pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
 3454                 }
 3455         }
 3456 #endif
 3457         progif = pci_read_config(dev, PCIR_PROGIF, 1);
 3458         type = SYS_RES_IOPORT;
 3459         if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
 3460                 pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
 3461                     prefetchmask & (1 << 0));
 3462                 pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
 3463                     prefetchmask & (1 << 1));
 3464         } else {
 3465                 rid = PCIR_BAR(0);
 3466                 resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
 3467                 (void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
 3468                     0x1f7, 8, 0);
 3469                 rid = PCIR_BAR(1);
 3470                 resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
 3471                 (void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
 3472                     0x3f6, 1, 0);
 3473         }
 3474         if (progif & PCIP_STORAGE_IDE_MODESEC) {
 3475                 pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
 3476                     prefetchmask & (1 << 2));
 3477                 pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
 3478                     prefetchmask & (1 << 3));
 3479         } else {
 3480                 rid = PCIR_BAR(2);
 3481                 resource_list_add(rl, type, rid, 0x170, 0x177, 8);
 3482                 (void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
 3483                     0x177, 8, 0);
 3484                 rid = PCIR_BAR(3);
 3485                 resource_list_add(rl, type, rid, 0x376, 0x376, 1);
 3486                 (void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
 3487                     0x376, 1, 0);
 3488         }
 3489         pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
 3490             prefetchmask & (1 << 4));
 3491         pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
 3492             prefetchmask & (1 << 5));
 3493 }
 3494 
 3495 static void
 3496 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
 3497 {
 3498         struct pci_devinfo *dinfo = device_get_ivars(dev);
 3499         pcicfgregs *cfg = &dinfo->cfg;
 3500         char tunable_name[64];
 3501         int irq;
 3502 
 3503         /* Has to have an intpin to have an interrupt. */
 3504         if (cfg->intpin == 0)
 3505                 return;
 3506 
 3507         /* Let the user override the IRQ with a tunable. */
 3508         irq = PCI_INVALID_IRQ;
 3509         snprintf(tunable_name, sizeof(tunable_name),
 3510             "hw.pci%d.%d.%d.INT%c.irq",
 3511             cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
 3512         if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
 3513                 irq = PCI_INVALID_IRQ;
 3514 
 3515         /*
 3516          * If we didn't get an IRQ via the tunable, then we either use the
 3517          * IRQ value in the intline register or we ask the bus to route an
 3518          * interrupt for us.  If force_route is true, then we only use the
 3519          * value in the intline register if the bus was unable to assign an
 3520          * IRQ.
 3521          */
 3522         if (!PCI_INTERRUPT_VALID(irq)) {
 3523                 if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
 3524                         irq = PCI_ASSIGN_INTERRUPT(bus, dev);
 3525                 if (!PCI_INTERRUPT_VALID(irq))
 3526                         irq = cfg->intline;
 3527         }
 3528 
 3529         /* If after all that we don't have an IRQ, just bail. */
 3530         if (!PCI_INTERRUPT_VALID(irq))
 3531                 return;
 3532 
 3533         /* Update the config register if it changed. */
 3534         if (irq != cfg->intline) {
 3535                 cfg->intline = irq;
 3536                 pci_write_config(dev, PCIR_INTLINE, irq, 1);
 3537         }
 3538 
 3539         /* Add this IRQ as rid 0 interrupt resource. */
 3540         resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
 3541 }
 3542 
 3543 /* Perform early OHCI takeover from SMM. */
 3544 static void
 3545 ohci_early_takeover(device_t self)
 3546 {
 3547         struct resource *res;
 3548         uint32_t ctl;
 3549         int rid;
 3550         int i;
 3551 
 3552         rid = PCIR_BAR(0);
 3553         res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
 3554         if (res == NULL)
 3555                 return;
 3556 
 3557         ctl = bus_read_4(res, OHCI_CONTROL);
 3558         if (ctl & OHCI_IR) {
 3559                 if (bootverbose)
 3560                         printf("ohci early: "
 3561                             "SMM active, request owner change\n");
 3562                 bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
 3563                 for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
 3564                         DELAY(1000);
 3565                         ctl = bus_read_4(res, OHCI_CONTROL);
 3566                 }
 3567                 if (ctl & OHCI_IR) {
 3568                         if (bootverbose)
 3569                                 printf("ohci early: "
 3570                                     "SMM does not respond, resetting\n");
 3571                         bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
 3572                 }
 3573                 /* Disable interrupts */
 3574                 bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
 3575         }
 3576 
 3577         bus_release_resource(self, SYS_RES_MEMORY, rid, res);
 3578 }
 3579 
 3580 /* Perform early UHCI takeover from SMM. */
 3581 static void
 3582 uhci_early_takeover(device_t self)
 3583 {
 3584         struct resource *res;
 3585         int rid;
 3586 
 3587         /*
 3588          * Set the PIRQD enable bit and switch off all the others. We don't
 3589          * want legacy support to interfere with us XXX Does this also mean
 3590          * that the BIOS won't touch the keyboard anymore if it is connected
 3591          * to the ports of the root hub?
 3592          */
 3593         pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
 3594 
 3595         /* Disable interrupts */
 3596         rid = PCI_UHCI_BASE_REG;
 3597         res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
 3598         if (res != NULL) {
 3599                 bus_write_2(res, UHCI_INTR, 0);
 3600                 bus_release_resource(self, SYS_RES_IOPORT, rid, res);
 3601         }
 3602 }
 3603 
 3604 /* Perform early EHCI takeover from SMM. */
 3605 static void
 3606 ehci_early_takeover(device_t self)
 3607 {
 3608         struct resource *res;
 3609         uint32_t cparams;
 3610         uint32_t eec;
 3611         uint8_t eecp;
 3612         uint8_t bios_sem;
 3613         uint8_t offs;
 3614         int rid;
 3615         int i;
 3616 
 3617         rid = PCIR_BAR(0);
 3618         res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
 3619         if (res == NULL)
 3620                 return;
 3621 
 3622         cparams = bus_read_4(res, EHCI_HCCPARAMS);
 3623 
 3624         /* Synchronise with the BIOS if it owns the controller. */
 3625         for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
 3626             eecp = EHCI_EECP_NEXT(eec)) {
 3627                 eec = pci_read_config(self, eecp, 4);
 3628                 if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
 3629                         continue;
 3630                 }
 3631                 bios_sem = pci_read_config(self, eecp +
 3632                     EHCI_LEGSUP_BIOS_SEM, 1);
 3633                 if (bios_sem == 0) {
 3634                         continue;
 3635                 }
 3636                 if (bootverbose)
 3637                         printf("ehci early: "
 3638                             "SMM active, request owner change\n");
 3639 
 3640                 pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
 3641 
 3642                 for (i = 0; (i < 100) && (bios_sem != 0); i++) {
 3643                         DELAY(1000);
 3644                         bios_sem = pci_read_config(self, eecp +
 3645                             EHCI_LEGSUP_BIOS_SEM, 1);
 3646                 }
 3647 
 3648                 if (bios_sem != 0) {
 3649                         if (bootverbose)
 3650                                 printf("ehci early: "
 3651                                     "SMM does not respond\n");
 3652                 }
 3653                 /* Disable interrupts */
 3654                 offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
 3655                 bus_write_4(res, offs + EHCI_USBINTR, 0);
 3656         }
 3657         bus_release_resource(self, SYS_RES_MEMORY, rid, res);
 3658 }
 3659 
 3660 /* Perform early XHCI takeover from SMM. */
 3661 static void
 3662 xhci_early_takeover(device_t self)
 3663 {
 3664         struct resource *res;
 3665         uint32_t cparams;
 3666         uint32_t eec;
 3667         uint8_t eecp;
 3668         uint8_t bios_sem;
 3669         uint8_t offs;
 3670         int rid;
 3671         int i;
 3672 
 3673         rid = PCIR_BAR(0);
 3674         res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
 3675         if (res == NULL)
 3676                 return;
 3677 
 3678         cparams = bus_read_4(res, XHCI_HCSPARAMS0);
 3679 
 3680         eec = -1;
 3681 
 3682         /* Synchronise with the BIOS if it owns the controller. */
 3683         for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
 3684             eecp += XHCI_XECP_NEXT(eec) << 2) {
 3685                 eec = bus_read_4(res, eecp);
 3686 
 3687                 if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
 3688                         continue;
 3689 
 3690                 bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
 3691                 if (bios_sem == 0)
 3692                         continue;
 3693 
 3694                 if (bootverbose)
 3695                         printf("xhci early: "
 3696                             "SMM active, request owner change\n");
 3697 
 3698                 bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
 3699 
 3700                 /* wait a maximum of 5 second */
 3701 
 3702                 for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
 3703                         DELAY(1000);
 3704                         bios_sem = bus_read_1(res, eecp +
 3705                             XHCI_XECP_BIOS_SEM);
 3706                 }
 3707 
 3708                 if (bios_sem != 0) {
 3709                         if (bootverbose)
 3710                                 printf("xhci early: "
 3711                                     "SMM does not respond\n");
 3712                 }
 3713 
 3714                 /* Disable interrupts */
 3715                 offs = bus_read_1(res, XHCI_CAPLENGTH);
 3716                 bus_write_4(res, offs + XHCI_USBCMD, 0);
 3717                 bus_read_4(res, offs + XHCI_USBSTS);
 3718         }
 3719         bus_release_resource(self, SYS_RES_MEMORY, rid, res);
 3720 }
 3721 
 3722 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
 3723 static void
 3724 pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
 3725     struct resource_list *rl)
 3726 {
 3727         struct resource *res;
 3728         char *cp;
 3729         rman_res_t start, end, count;
 3730         int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
 3731 
 3732         switch (cfg->hdrtype & PCIM_HDRTYPE) {
 3733         case PCIM_HDRTYPE_BRIDGE:
 3734                 sec_reg = PCIR_SECBUS_1;
 3735                 sub_reg = PCIR_SUBBUS_1;
 3736                 break;
 3737         case PCIM_HDRTYPE_CARDBUS:
 3738                 sec_reg = PCIR_SECBUS_2;
 3739                 sub_reg = PCIR_SUBBUS_2;
 3740                 break;
 3741         default:
 3742                 return;
 3743         }
 3744 
 3745         /*
 3746          * If the existing bus range is valid, attempt to reserve it
 3747          * from our parent.  If this fails for any reason, clear the
 3748          * secbus and subbus registers.
 3749          *
 3750          * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
 3751          * This would at least preserve the existing sec_bus if it is
 3752          * valid.
 3753          */
 3754         sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
 3755         sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
 3756 
 3757         /* Quirk handling. */
 3758         switch (pci_get_devid(dev)) {
 3759         case 0x12258086:                /* Intel 82454KX/GX (Orion) */
 3760                 sup_bus = pci_read_config(dev, 0x41, 1);
 3761                 if (sup_bus != 0xff) {
 3762                         sec_bus = sup_bus + 1;
 3763                         sub_bus = sup_bus + 1;
 3764                         PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
 3765                         PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
 3766                 }
 3767                 break;
 3768 
 3769         case 0x00dd10de:
 3770                 /* Compaq R3000 BIOS sets wrong subordinate bus number. */
 3771                 if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
 3772                         break;
 3773                 if (strncmp(cp, "Compal", 6) != 0) {
 3774                         freeenv(cp);
 3775                         break;
 3776                 }
 3777                 freeenv(cp);
 3778                 if ((cp = kern_getenv("smbios.planar.product")) == NULL)
 3779                         break;
 3780                 if (strncmp(cp, "08A0", 4) != 0) {
 3781                         freeenv(cp);
 3782                         break;
 3783                 }
 3784                 freeenv(cp);
 3785                 if (sub_bus < 0xa) {
 3786                         sub_bus = 0xa;
 3787                         PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
 3788                 }
 3789                 break;
 3790         }
 3791 
 3792         if (bootverbose)
 3793                 printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
 3794         if (sec_bus > 0 && sub_bus >= sec_bus) {
 3795                 start = sec_bus;
 3796                 end = sub_bus;
 3797                 count = end - start + 1;
 3798 
 3799                 resource_list_add(rl, PCI_RES_BUS, 0, 0, ~0, count);
 3800 
 3801                 /*
 3802                  * If requested, clear secondary bus registers in
 3803                  * bridge devices to force a complete renumbering
 3804                  * rather than reserving the existing range.  However,
 3805                  * preserve the existing size.
 3806                  */
 3807                 if (pci_clear_buses)
 3808                         goto clear;
 3809 
 3810                 rid = 0;
 3811                 res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
 3812                     start, end, count, 0);
 3813                 if (res != NULL)
 3814                         return;
 3815 
 3816                 if (bootverbose)
 3817                         device_printf(bus,
 3818                             "pci%d:%d:%d:%d secbus failed to allocate\n",
 3819                             pci_get_domain(dev), pci_get_bus(dev),
 3820                             pci_get_slot(dev), pci_get_function(dev));
 3821         }
 3822 
 3823 clear:
 3824         PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
 3825         PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
 3826 }
 3827 
 3828 static struct resource *
 3829 pci_alloc_secbus(device_t dev, device_t child, int *rid, rman_res_t start,
 3830     rman_res_t end, rman_res_t count, u_int flags)
 3831 {
 3832         struct pci_devinfo *dinfo;
 3833         pcicfgregs *cfg;
 3834         struct resource_list *rl;
 3835         struct resource *res;
 3836         int sec_reg, sub_reg;
 3837 
 3838         dinfo = device_get_ivars(child);
 3839         cfg = &dinfo->cfg;
 3840         rl = &dinfo->resources;
 3841         switch (cfg->hdrtype & PCIM_HDRTYPE) {
 3842         case PCIM_HDRTYPE_BRIDGE:
 3843                 sec_reg = PCIR_SECBUS_1;
 3844                 sub_reg = PCIR_SUBBUS_1;
 3845                 break;
 3846         case PCIM_HDRTYPE_CARDBUS:
 3847                 sec_reg = PCIR_SECBUS_2;
 3848                 sub_reg = PCIR_SUBBUS_2;
 3849                 break;
 3850         default:
 3851                 return (NULL);
 3852         }
 3853 
 3854         if (*rid != 0)
 3855                 return (NULL);
 3856 
 3857         if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
 3858                 resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
 3859         if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
 3860                 res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
 3861                     start, end, count, flags & ~RF_ACTIVE);
 3862                 if (res == NULL) {
 3863                         resource_list_delete(rl, PCI_RES_BUS, *rid);
 3864                         device_printf(child, "allocating %ju bus%s failed\n",
 3865                             count, count == 1 ? "" : "es");
 3866                         return (NULL);
 3867                 }
 3868                 if (bootverbose)
 3869                         device_printf(child,
 3870                             "Lazy allocation of %ju bus%s at %ju\n", count,
 3871                             count == 1 ? "" : "es", rman_get_start(res));
 3872                 PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
 3873                 PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
 3874         }
 3875         return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
 3876             end, count, flags));
 3877 }
 3878 #endif
 3879 
 3880 static int
 3881 pci_ea_bei_to_rid(device_t dev, int bei)
 3882 {
 3883 #ifdef PCI_IOV
 3884         struct pci_devinfo *dinfo;
 3885         int iov_pos;
 3886         struct pcicfg_iov *iov;
 3887 
 3888         dinfo = device_get_ivars(dev);
 3889         iov = dinfo->cfg.iov;
 3890         if (iov != NULL)
 3891                 iov_pos = iov->iov_pos;
 3892         else
 3893                 iov_pos = 0;
 3894 #endif
 3895 
 3896         /* Check if matches BAR */
 3897         if ((bei >= PCIM_EA_BEI_BAR_0) &&
 3898             (bei <= PCIM_EA_BEI_BAR_5))
 3899                 return (PCIR_BAR(bei));
 3900 
 3901         /* Check ROM */
 3902         if (bei == PCIM_EA_BEI_ROM)
 3903                 return (PCIR_BIOS);
 3904 
 3905 #ifdef PCI_IOV
 3906         /* Check if matches VF_BAR */
 3907         if ((iov != NULL) && (bei >= PCIM_EA_BEI_VF_BAR_0) &&
 3908             (bei <= PCIM_EA_BEI_VF_BAR_5))
 3909                 return (PCIR_SRIOV_BAR(bei - PCIM_EA_BEI_VF_BAR_0) +
 3910                     iov_pos);
 3911 #endif
 3912 
 3913         return (-1);
 3914 }
 3915 
 3916 int
 3917 pci_ea_is_enabled(device_t dev, int rid)
 3918 {
 3919         struct pci_ea_entry *ea;
 3920         struct pci_devinfo *dinfo;
 3921 
 3922         dinfo = device_get_ivars(dev);
 3923 
 3924         STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
 3925                 if (pci_ea_bei_to_rid(dev, ea->eae_bei) == rid)
 3926                         return ((ea->eae_flags & PCIM_EA_ENABLE) > 0);
 3927         }
 3928 
 3929         return (0);
 3930 }
 3931 
 3932 void
 3933 pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
 3934 {
 3935         struct pci_ea_entry *ea;
 3936         struct pci_devinfo *dinfo;
 3937         pci_addr_t start, end, count;
 3938         struct resource_list *rl;
 3939         int type, flags, rid;
 3940         struct resource *res;
 3941         uint32_t tmp;
 3942 #ifdef PCI_IOV
 3943         struct pcicfg_iov *iov;
 3944 #endif
 3945 
 3946         dinfo = device_get_ivars(dev);
 3947         rl = &dinfo->resources;
 3948         flags = 0;
 3949 
 3950 #ifdef PCI_IOV
 3951         iov = dinfo->cfg.iov;
 3952 #endif
 3953 
 3954         if (dinfo->cfg.ea.ea_location == 0)
 3955                 return;
 3956 
 3957         STAILQ_FOREACH(ea, &dinfo->cfg.ea.ea_entries, eae_link) {
 3958                 /*
 3959                  * TODO: Ignore EA-BAR if is not enabled.
 3960                  *   Currently the EA implementation supports
 3961                  *   only situation, where EA structure contains
 3962                  *   predefined entries. In case they are not enabled
 3963                  *   leave them unallocated and proceed with
 3964                  *   a legacy-BAR mechanism.
 3965                  */
 3966                 if ((ea->eae_flags & PCIM_EA_ENABLE) == 0)
 3967                         continue;
 3968 
 3969                 switch ((ea->eae_flags & PCIM_EA_PP) >> PCIM_EA_PP_OFFSET) {
 3970                 case PCIM_EA_P_MEM_PREFETCH:
 3971                 case PCIM_EA_P_VF_MEM_PREFETCH:
 3972                         flags = RF_PREFETCHABLE;
 3973                         /* FALLTHROUGH */
 3974                 case PCIM_EA_P_VF_MEM:
 3975                 case PCIM_EA_P_MEM:
 3976                         type = SYS_RES_MEMORY;
 3977                         break;
 3978                 case PCIM_EA_P_IO:
 3979                         type = SYS_RES_IOPORT;
 3980                         break;
 3981                 default:
 3982                         continue;
 3983                 }
 3984 
 3985                 if (alloc_iov != 0) {
 3986 #ifdef PCI_IOV
 3987                         /* Allocating IOV, confirm BEI matches */
 3988                         if ((ea->eae_bei < PCIM_EA_BEI_VF_BAR_0) ||
 3989                             (ea->eae_bei > PCIM_EA_BEI_VF_BAR_5))
 3990                                 continue;
 3991 #else
 3992                         continue;
 3993 #endif
 3994                 } else {
 3995                         /* Allocating BAR, confirm BEI matches */
 3996                         if (((ea->eae_bei < PCIM_EA_BEI_BAR_0) ||
 3997                             (ea->eae_bei > PCIM_EA_BEI_BAR_5)) &&
 3998                             (ea->eae_bei != PCIM_EA_BEI_ROM))
 3999                                 continue;
 4000                 }
 4001 
 4002                 rid = pci_ea_bei_to_rid(dev, ea->eae_bei);
 4003                 if (rid < 0)
 4004                         continue;
 4005 
 4006                 /* Skip resources already allocated by EA */
 4007                 if ((resource_list_find(rl, SYS_RES_MEMORY, rid) != NULL) ||
 4008                     (resource_list_find(rl, SYS_RES_IOPORT, rid) != NULL))
 4009                         continue;
 4010 
 4011                 start = ea->eae_base;
 4012                 count = ea->eae_max_offset + 1;
 4013 #ifdef PCI_IOV
 4014                 if (iov != NULL)
 4015                         count = count * iov->iov_num_vfs;
 4016 #endif
 4017                 end = start + count - 1;
 4018                 if (count == 0)
 4019                         continue;
 4020 
 4021                 resource_list_add(rl, type, rid, start, end, count);
 4022                 res = resource_list_reserve(rl, bus, dev, type, &rid, start, end, count,
 4023                     flags);
 4024                 if (res == NULL) {
 4025                         resource_list_delete(rl, type, rid);
 4026 
 4027                         /*
 4028                          * Failed to allocate using EA, disable entry.
 4029                          * Another attempt to allocation will be performed
 4030                          * further, but this time using legacy BAR registers
 4031                          */
 4032                         tmp = pci_read_config(dev, ea->eae_cfg_offset, 4);
 4033                         tmp &= ~PCIM_EA_ENABLE;
 4034                         pci_write_config(dev, ea->eae_cfg_offset, tmp, 4);
 4035 
 4036                         /*
 4037                          * Disabling entry might fail in case it is hardwired.
 4038                          * Read flags again to match current status.
 4039                          */
 4040                         ea->eae_flags = pci_read_config(dev, ea->eae_cfg_offset, 4);
 4041 
 4042                         continue;
 4043                 }
 4044 
 4045                 /* As per specification, fill BAR with zeros */
 4046                 pci_write_config(dev, rid, 0, 4);
 4047         }
 4048 }
 4049 
 4050 void
 4051 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
 4052 {
 4053         struct pci_devinfo *dinfo;
 4054         pcicfgregs *cfg;
 4055         struct resource_list *rl;
 4056         const struct pci_quirk *q;
 4057         uint32_t devid;
 4058         int i;
 4059 
 4060         dinfo = device_get_ivars(dev);
 4061         cfg = &dinfo->cfg;
 4062         rl = &dinfo->resources;
 4063         devid = (cfg->device << 16) | cfg->vendor;
 4064 
 4065         /* Allocate resources using Enhanced Allocation */
 4066         pci_add_resources_ea(bus, dev, 0);
 4067 
 4068         /* ATA devices needs special map treatment */
 4069         if ((pci_get_class(dev) == PCIC_STORAGE) &&
 4070             (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
 4071             ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
 4072              (!pci_read_config(dev, PCIR_BAR(0), 4) &&
 4073               !pci_read_config(dev, PCIR_BAR(2), 4))) )
 4074                 pci_ata_maps(bus, dev, rl, force, prefetchmask);
 4075         else
 4076                 for (i = 0; i < cfg->nummaps;) {
 4077                         /* Skip resources already managed by EA */
 4078                         if ((resource_list_find(rl, SYS_RES_MEMORY, PCIR_BAR(i)) != NULL) ||
 4079                             (resource_list_find(rl, SYS_RES_IOPORT, PCIR_BAR(i)) != NULL) ||
 4080                             pci_ea_is_enabled(dev, PCIR_BAR(i))) {
 4081                                 i++;
 4082                                 continue;
 4083                         }
 4084 
 4085                         /*
 4086                          * Skip quirked resources.
 4087                          */
 4088                         for (q = &pci_quirks[0]; q->devid != 0; q++)
 4089                                 if (q->devid == devid &&
 4090                                     q->type == PCI_QUIRK_UNMAP_REG &&
 4091                                     q->arg1 == PCIR_BAR(i))
 4092                                         break;
 4093                         if (q->devid != 0) {
 4094                                 i++;
 4095                                 continue;
 4096                         }
 4097                         i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
 4098                             prefetchmask & (1 << i));
 4099                 }
 4100 
 4101         /*
 4102          * Add additional, quirked resources.
 4103          */
 4104         for (q = &pci_quirks[0]; q->devid != 0; q++)
 4105                 if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
 4106                         pci_add_map(bus, dev, q->arg1, rl, force, 0);
 4107 
 4108         if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
 4109 #ifdef __PCI_REROUTE_INTERRUPT
 4110                 /*
 4111                  * Try to re-route interrupts. Sometimes the BIOS or
 4112                  * firmware may leave bogus values in these registers.
 4113                  * If the re-route fails, then just stick with what we
 4114                  * have.
 4115                  */
 4116                 pci_assign_interrupt(bus, dev, 1);
 4117 #else
 4118                 pci_assign_interrupt(bus, dev, 0);
 4119 #endif
 4120         }
 4121 
 4122         if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
 4123             pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
 4124                 if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
 4125                         xhci_early_takeover(dev);
 4126                 else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
 4127                         ehci_early_takeover(dev);
 4128                 else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
 4129                         ohci_early_takeover(dev);
 4130                 else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
 4131                         uhci_early_takeover(dev);
 4132         }
 4133 
 4134 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
 4135         /*
 4136          * Reserve resources for secondary bus ranges behind bridge
 4137          * devices.
 4138          */
 4139         pci_reserve_secbus(bus, dev, cfg, rl);
 4140 #endif
 4141 }
 4142 
 4143 static struct pci_devinfo *
 4144 pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
 4145     int slot, int func)
 4146 {
 4147         struct pci_devinfo *dinfo;
 4148 
 4149         dinfo = pci_read_device(pcib, dev, domain, busno, slot, func);
 4150         if (dinfo != NULL)
 4151                 pci_add_child(dev, dinfo);
 4152 
 4153         return (dinfo);
 4154 }
 4155 
 4156 void
 4157 pci_add_children(device_t dev, int domain, int busno)
 4158 {
 4159 #define REG(n, w)       PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
 4160         device_t pcib = device_get_parent(dev);
 4161         struct pci_devinfo *dinfo;
 4162         int maxslots;
 4163         int s, f, pcifunchigh;
 4164         uint8_t hdrtype;
 4165         int first_func;
 4166 
 4167         /*
 4168          * Try to detect a device at slot 0, function 0.  If it exists, try to
 4169          * enable ARI.  We must enable ARI before detecting the rest of the
 4170          * functions on this bus as ARI changes the set of slots and functions
 4171          * that are legal on this bus.
 4172          */
 4173         dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0);
 4174         if (dinfo != NULL && pci_enable_ari)
 4175                 PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
 4176 
 4177         /*
 4178          * Start looking for new devices on slot 0 at function 1 because we
 4179          * just identified the device at slot 0, function 0.
 4180          */
 4181         first_func = 1;
 4182 
 4183         maxslots = PCIB_MAXSLOTS(pcib);
 4184         for (s = 0; s <= maxslots; s++, first_func = 0) {
 4185                 pcifunchigh = 0;
 4186                 f = 0;
 4187                 DELAY(1);
 4188 
 4189                 /* If function 0 is not present, skip to the next slot. */
 4190                 if (REG(PCIR_VENDOR, 2) == PCIV_INVALID)
 4191                         continue;
 4192                 hdrtype = REG(PCIR_HDRTYPE, 1);
 4193                 if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
 4194                         continue;
 4195                 if (hdrtype & PCIM_MFDEV)
 4196                         pcifunchigh = PCIB_MAXFUNCS(pcib);
 4197                 for (f = first_func; f <= pcifunchigh; f++)
 4198                         pci_identify_function(pcib, dev, domain, busno, s, f);
 4199         }
 4200 #undef REG
 4201 }
 4202 
 4203 int
 4204 pci_rescan_method(device_t dev)
 4205 {
 4206 #define REG(n, w)       PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
 4207         device_t pcib = device_get_parent(dev);
 4208         device_t child, *devlist, *unchanged;
 4209         int devcount, error, i, j, maxslots, oldcount;
 4210         int busno, domain, s, f, pcifunchigh;
 4211         uint8_t hdrtype;
 4212 
 4213         /* No need to check for ARI on a rescan. */
 4214         error = device_get_children(dev, &devlist, &devcount);
 4215         if (error)
 4216                 return (error);
 4217         if (devcount != 0) {
 4218                 unchanged = malloc(devcount * sizeof(device_t), M_TEMP,
 4219                     M_NOWAIT | M_ZERO);
 4220                 if (unchanged == NULL) {
 4221                         free(devlist, M_TEMP);
 4222                         return (ENOMEM);
 4223                 }
 4224         } else
 4225                 unchanged = NULL;
 4226 
 4227         domain = pcib_get_domain(dev);
 4228         busno = pcib_get_bus(dev);
 4229         maxslots = PCIB_MAXSLOTS(pcib);
 4230         for (s = 0; s <= maxslots; s++) {
 4231                 /* If function 0 is not present, skip to the next slot. */
 4232                 f = 0;
 4233                 if (REG(PCIR_VENDOR, 2) == PCIV_INVALID)
 4234                         continue;
 4235                 pcifunchigh = 0;
 4236                 hdrtype = REG(PCIR_HDRTYPE, 1);
 4237                 if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
 4238                         continue;
 4239                 if (hdrtype & PCIM_MFDEV)
 4240                         pcifunchigh = PCIB_MAXFUNCS(pcib);
 4241                 for (f = 0; f <= pcifunchigh; f++) {
 4242                         if (REG(PCIR_VENDOR, 2) == PCIV_INVALID)
 4243                                 continue;
 4244 
 4245                         /*
 4246                          * Found a valid function.  Check if a
 4247                          * device_t for this device already exists.
 4248                          */
 4249                         for (i = 0; i < devcount; i++) {
 4250                                 child = devlist[i];
 4251                                 if (child == NULL)
 4252                                         continue;
 4253                                 if (pci_get_slot(child) == s &&
 4254                                     pci_get_function(child) == f) {
 4255                                         unchanged[i] = child;
 4256                                         goto next_func;
 4257                                 }
 4258                         }
 4259 
 4260                         pci_identify_function(pcib, dev, domain, busno, s, f);
 4261                 next_func:;
 4262                 }
 4263         }
 4264 
 4265         /* Remove devices that are no longer present. */
 4266         for (i = 0; i < devcount; i++) {
 4267                 if (unchanged[i] != NULL)
 4268                         continue;
 4269                 device_delete_child(dev, devlist[i]);
 4270         }
 4271 
 4272         free(devlist, M_TEMP);
 4273         oldcount = devcount;
 4274 
 4275         /* Try to attach the devices just added. */
 4276         error = device_get_children(dev, &devlist, &devcount);
 4277         if (error) {
 4278                 free(unchanged, M_TEMP);
 4279                 return (error);
 4280         }
 4281 
 4282         for (i = 0; i < devcount; i++) {
 4283                 for (j = 0; j < oldcount; j++) {
 4284                         if (devlist[i] == unchanged[j])
 4285                                 goto next_device;
 4286                 }
 4287 
 4288                 device_probe_and_attach(devlist[i]);
 4289         next_device:;
 4290         }
 4291 
 4292         free(unchanged, M_TEMP);
 4293         free(devlist, M_TEMP);
 4294         return (0);
 4295 #undef REG
 4296 }
 4297 
 4298 #ifdef PCI_IOV
 4299 device_t
 4300 pci_add_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
 4301     uint16_t did)
 4302 {
 4303         struct pci_devinfo *vf_dinfo;
 4304         device_t pcib;
 4305         int busno, slot, func;
 4306 
 4307         pcib = device_get_parent(bus);
 4308 
 4309         PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
 4310 
 4311         vf_dinfo = pci_fill_devinfo(pcib, bus, pci_get_domain(pcib), busno,
 4312             slot, func, vid, did);
 4313 
 4314         vf_dinfo->cfg.flags |= PCICFG_VF;
 4315         pci_add_child(bus, vf_dinfo);
 4316 
 4317         return (vf_dinfo->cfg.dev);
 4318 }
 4319 
 4320 device_t
 4321 pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
 4322     uint16_t vid, uint16_t did)
 4323 {
 4324 
 4325         return (pci_add_iov_child(bus, pf, rid, vid, did));
 4326 }
 4327 #endif
 4328 
 4329 /*
 4330  * For PCIe device set Max_Payload_Size to match PCIe root's.
 4331  */
 4332 static void
 4333 pcie_setup_mps(device_t dev)
 4334 {
 4335         struct pci_devinfo *dinfo = device_get_ivars(dev);
 4336         device_t root;
 4337         uint16_t rmps, mmps, mps;
 4338 
 4339         if (dinfo->cfg.pcie.pcie_location == 0)
 4340                 return;
 4341         root = pci_find_pcie_root_port(dev);
 4342         if (root == NULL)
 4343                 return;
 4344         /* Check whether the MPS is already configured. */
 4345         rmps = pcie_read_config(root, PCIER_DEVICE_CTL, 2) &
 4346             PCIEM_CTL_MAX_PAYLOAD;
 4347         mps = pcie_read_config(dev, PCIER_DEVICE_CTL, 2) &
 4348             PCIEM_CTL_MAX_PAYLOAD;
 4349         if (mps == rmps)
 4350                 return;
 4351         /* Check whether the device is capable of the root's MPS. */
 4352         mmps = (pcie_read_config(dev, PCIER_DEVICE_CAP, 2) &
 4353             PCIEM_CAP_MAX_PAYLOAD) << 5;
 4354         if (rmps > mmps) {
 4355                 /*
 4356                  * The device is unable to handle root's MPS.  Limit root.
 4357                  * XXX: We should traverse through all the tree, applying
 4358                  * it to all the devices.
 4359                  */
 4360                 pcie_adjust_config(root, PCIER_DEVICE_CTL,
 4361                     PCIEM_CTL_MAX_PAYLOAD, mmps, 2);
 4362         } else {
 4363                 pcie_adjust_config(dev, PCIER_DEVICE_CTL,
 4364                     PCIEM_CTL_MAX_PAYLOAD, rmps, 2);
 4365         }
 4366 }
 4367 
 4368 static void
 4369 pci_add_child_clear_aer(device_t dev, struct pci_devinfo *dinfo)
 4370 {
 4371         int aer;
 4372         uint32_t r;
 4373         uint16_t r2;
 4374 
 4375         if (dinfo->cfg.pcie.pcie_location != 0 &&
 4376             dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT) {
 4377                 r2 = pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
 4378                     PCIER_ROOT_CTL, 2);
 4379                 r2 &= ~(PCIEM_ROOT_CTL_SERR_CORR |
 4380                     PCIEM_ROOT_CTL_SERR_NONFATAL | PCIEM_ROOT_CTL_SERR_FATAL);
 4381                 pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
 4382                     PCIER_ROOT_CTL, r2, 2);
 4383         }
 4384         if (pci_find_extcap(dev, PCIZ_AER, &aer) == 0) {
 4385                 r = pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
 4386                 pci_write_config(dev, aer + PCIR_AER_UC_STATUS, r, 4);
 4387                 if (r != 0 && bootverbose) {
 4388                         pci_printf(&dinfo->cfg,
 4389                             "clearing AER UC 0x%08x -> 0x%08x\n",
 4390                             r, pci_read_config(dev, aer + PCIR_AER_UC_STATUS,
 4391                             4));
 4392                 }
 4393 
 4394                 r = pci_read_config(dev, aer + PCIR_AER_UC_MASK, 4);
 4395                 r &= ~(PCIM_AER_UC_TRAINING_ERROR |
 4396                     PCIM_AER_UC_DL_PROTOCOL_ERROR |
 4397                     PCIM_AER_UC_SURPRISE_LINK_DOWN |
 4398                     PCIM_AER_UC_POISONED_TLP |
 4399                     PCIM_AER_UC_FC_PROTOCOL_ERROR |
 4400                     PCIM_AER_UC_COMPLETION_TIMEOUT |
 4401                     PCIM_AER_UC_COMPLETER_ABORT |
 4402                     PCIM_AER_UC_UNEXPECTED_COMPLETION |
 4403                     PCIM_AER_UC_RECEIVER_OVERFLOW |
 4404                     PCIM_AER_UC_MALFORMED_TLP |
 4405                     PCIM_AER_UC_ECRC_ERROR |
 4406                     PCIM_AER_UC_UNSUPPORTED_REQUEST |
 4407                     PCIM_AER_UC_ACS_VIOLATION |
 4408                     PCIM_AER_UC_INTERNAL_ERROR |
 4409                     PCIM_AER_UC_MC_BLOCKED_TLP |
 4410                     PCIM_AER_UC_ATOMIC_EGRESS_BLK |
 4411                     PCIM_AER_UC_TLP_PREFIX_BLOCKED);
 4412                 pci_write_config(dev, aer + PCIR_AER_UC_MASK, r, 4);
 4413 
 4414                 r = pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
 4415                 pci_write_config(dev, aer + PCIR_AER_COR_STATUS, r, 4);
 4416                 if (r != 0 && bootverbose) {
 4417                         pci_printf(&dinfo->cfg,
 4418                             "clearing AER COR 0x%08x -> 0x%08x\n",
 4419                             r, pci_read_config(dev, aer + PCIR_AER_COR_STATUS,
 4420                             4));
 4421                 }
 4422 
 4423                 r = pci_read_config(dev, aer + PCIR_AER_COR_MASK, 4);
 4424                 r &= ~(PCIM_AER_COR_RECEIVER_ERROR |
 4425                     PCIM_AER_COR_BAD_TLP |
 4426                     PCIM_AER_COR_BAD_DLLP |
 4427                     PCIM_AER_COR_REPLAY_ROLLOVER |
 4428                     PCIM_AER_COR_REPLAY_TIMEOUT |
 4429                     PCIM_AER_COR_ADVISORY_NF_ERROR |
 4430                     PCIM_AER_COR_INTERNAL_ERROR |
 4431                     PCIM_AER_COR_HEADER_LOG_OVFLOW);
 4432                 pci_write_config(dev, aer + PCIR_AER_COR_MASK, r, 4);
 4433 
 4434                 r = pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
 4435                     PCIER_DEVICE_CTL, 2);
 4436                 r |=  PCIEM_CTL_COR_ENABLE | PCIEM_CTL_NFER_ENABLE |
 4437                     PCIEM_CTL_FER_ENABLE | PCIEM_CTL_URR_ENABLE;
 4438                 pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
 4439                     PCIER_DEVICE_CTL, r, 2);
 4440         }
 4441 }
 4442 
 4443 void
 4444 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
 4445 {
 4446         device_t dev;
 4447 
 4448         dinfo->cfg.dev = dev = device_add_child(bus, NULL, -1);
 4449         device_set_ivars(dev, dinfo);
 4450         resource_list_init(&dinfo->resources);
 4451         pci_cfg_save(dev, dinfo, 0);
 4452         pci_cfg_restore(dev, dinfo);
 4453         pci_print_verbose(dinfo);
 4454         pci_add_resources(bus, dev, 0, 0);
 4455         pcie_setup_mps(dev);
 4456         pci_child_added(dinfo->cfg.dev);
 4457 
 4458         if (pci_clear_aer_on_attach)
 4459                 pci_add_child_clear_aer(dev, dinfo);
 4460 
 4461         EVENTHANDLER_INVOKE(pci_add_device, dinfo->cfg.dev);
 4462 }
 4463 
 4464 void
 4465 pci_child_added_method(device_t dev, device_t child)
 4466 {
 4467 
 4468 }
 4469 
 4470 static int
 4471 pci_probe(device_t dev)
 4472 {
 4473 
 4474         device_set_desc(dev, "PCI bus");
 4475 
 4476         /* Allow other subclasses to override this driver. */
 4477         return (BUS_PROBE_GENERIC);
 4478 }
 4479 
 4480 int
 4481 pci_attach_common(device_t dev)
 4482 {
 4483         struct pci_softc *sc;
 4484         int busno, domain;
 4485 #ifdef PCI_RES_BUS
 4486         int rid;
 4487 #endif
 4488 
 4489         sc = device_get_softc(dev);
 4490         domain = pcib_get_domain(dev);
 4491         busno = pcib_get_bus(dev);
 4492 #ifdef PCI_RES_BUS
 4493         rid = 0;
 4494         sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
 4495             1, 0);
 4496         if (sc->sc_bus == NULL) {
 4497                 device_printf(dev, "failed to allocate bus number\n");
 4498                 return (ENXIO);
 4499         }
 4500 #endif
 4501         if (bootverbose)
 4502                 device_printf(dev, "domain=%d, physical bus=%d\n",
 4503                     domain, busno);
 4504         sc->sc_dma_tag = bus_get_dma_tag(dev);
 4505         return (0);
 4506 }
 4507 
 4508 int
 4509 pci_attach(device_t dev)
 4510 {
 4511         int busno, domain, error;
 4512 
 4513         error = pci_attach_common(dev);
 4514         if (error)
 4515                 return (error);
 4516 
 4517         /*
 4518          * Since there can be multiple independently numbered PCI
 4519          * buses on systems with multiple PCI domains, we can't use
 4520          * the unit number to decide which bus we are probing. We ask
 4521          * the parent pcib what our domain and bus numbers are.
 4522          */
 4523         domain = pcib_get_domain(dev);
 4524         busno = pcib_get_bus(dev);
 4525         pci_add_children(dev, domain, busno);
 4526         return (bus_generic_attach(dev));
 4527 }
 4528 
 4529 int
 4530 pci_detach(device_t dev)
 4531 {
 4532 #ifdef PCI_RES_BUS
 4533         struct pci_softc *sc;
 4534 #endif
 4535         int error;
 4536 
 4537         error = bus_generic_detach(dev);
 4538         if (error)
 4539                 return (error);
 4540 #ifdef PCI_RES_BUS
 4541         sc = device_get_softc(dev);
 4542         error = bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus);
 4543         if (error)
 4544                 return (error);
 4545 #endif
 4546         return (device_delete_children(dev));
 4547 }
 4548 
 4549 static void
 4550 pci_hint_device_unit(device_t dev, device_t child, const char *name, int *unitp)
 4551 {
 4552         int line, unit;
 4553         const char *at;
 4554         char me1[24], me2[32];
 4555         uint8_t b, s, f;
 4556         uint32_t d;
 4557         device_location_cache_t *cache;
 4558 
 4559         d = pci_get_domain(child);
 4560         b = pci_get_bus(child);
 4561         s = pci_get_slot(child);
 4562         f = pci_get_function(child);
 4563         snprintf(me1, sizeof(me1), "pci%u:%u:%u", b, s, f);
 4564         snprintf(me2, sizeof(me2), "pci%u:%u:%u:%u", d, b, s, f);
 4565         line = 0;
 4566         cache = dev_wired_cache_init();
 4567         while (resource_find_dev(&line, name, &unit, "at", NULL) == 0) {
 4568                 resource_string_value(name, unit, "at", &at);
 4569                 if (strcmp(at, me1) == 0 || strcmp(at, me2) == 0) {
 4570                         *unitp = unit;
 4571                         break;
 4572                 }
 4573                 if (dev_wired_cache_match(cache, child, at)) {
 4574                         *unitp = unit;
 4575                         break;
 4576                 }
 4577         }
 4578         dev_wired_cache_fini(cache);
 4579 }
 4580 
 4581 static void
 4582 pci_set_power_child(device_t dev, device_t child, int state)
 4583 {
 4584         device_t pcib;
 4585         int dstate;
 4586 
 4587         /*
 4588          * Set the device to the given state.  If the firmware suggests
 4589          * a different power state, use it instead.  If power management
 4590          * is not present, the firmware is responsible for managing
 4591          * device power.  Skip children who aren't attached since they
 4592          * are handled separately.
 4593          */
 4594         pcib = device_get_parent(dev);
 4595         dstate = state;
 4596         if (device_is_attached(child) &&
 4597             PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0)
 4598                 pci_set_powerstate(child, dstate);
 4599 }
 4600 
 4601 int
 4602 pci_suspend_child(device_t dev, device_t child)
 4603 {
 4604         struct pci_devinfo *dinfo;
 4605         struct resource_list_entry *rle;
 4606         int error;
 4607 
 4608         dinfo = device_get_ivars(child);
 4609 
 4610         /*
 4611          * Save the PCI configuration space for the child and set the
 4612          * device in the appropriate power state for this sleep state.
 4613          */
 4614         pci_cfg_save(child, dinfo, 0);
 4615 
 4616         /* Suspend devices before potentially powering them down. */
 4617         error = bus_generic_suspend_child(dev, child);
 4618 
 4619         if (error)
 4620                 return (error);
 4621 
 4622         if (pci_do_power_suspend) {
 4623                 /*
 4624                  * Make sure this device's interrupt handler is not invoked
 4625                  * in the case the device uses a shared interrupt that can
 4626                  * be raised by some other device.
 4627                  * This is applicable only to regular (legacy) PCI interrupts
 4628                  * as MSI/MSI-X interrupts are never shared.
 4629                  */
 4630                 rle = resource_list_find(&dinfo->resources,
 4631                     SYS_RES_IRQ, 0);
 4632                 if (rle != NULL && rle->res != NULL)
 4633                         (void)bus_suspend_intr(child, rle->res);
 4634                 pci_set_power_child(dev, child, PCI_POWERSTATE_D3);
 4635         }
 4636 
 4637         return (0);
 4638 }
 4639 
 4640 int
 4641 pci_resume_child(device_t dev, device_t child)
 4642 {
 4643         struct pci_devinfo *dinfo;
 4644         struct resource_list_entry *rle;
 4645 
 4646         if (pci_do_power_resume)
 4647                 pci_set_power_child(dev, child, PCI_POWERSTATE_D0);
 4648 
 4649         dinfo = device_get_ivars(child);
 4650         pci_cfg_restore(child, dinfo);
 4651         if (!device_is_attached(child))
 4652                 pci_cfg_save(child, dinfo, 1);
 4653 
 4654         bus_generic_resume_child(dev, child);
 4655 
 4656         /*
 4657          * Allow interrupts only after fully resuming the driver and hardware.
 4658          */
 4659         if (pci_do_power_suspend) {
 4660                 /* See pci_suspend_child for details. */
 4661                 rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
 4662                 if (rle != NULL && rle->res != NULL)
 4663                         (void)bus_resume_intr(child, rle->res);
 4664         }
 4665 
 4666         return (0);
 4667 }
 4668 
 4669 int
 4670 pci_resume(device_t dev)
 4671 {
 4672         device_t child, *devlist;
 4673         int error, i, numdevs;
 4674 
 4675         if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
 4676                 return (error);
 4677 
 4678         /*
 4679          * Resume critical devices first, then everything else later.
 4680          */
 4681         for (i = 0; i < numdevs; i++) {
 4682                 child = devlist[i];
 4683                 switch (pci_get_class(child)) {
 4684                 case PCIC_DISPLAY:
 4685                 case PCIC_MEMORY:
 4686                 case PCIC_BRIDGE:
 4687                 case PCIC_BASEPERIPH:
 4688                         BUS_RESUME_CHILD(dev, child);
 4689                         break;
 4690                 }
 4691         }
 4692         for (i = 0; i < numdevs; i++) {
 4693                 child = devlist[i];
 4694                 switch (pci_get_class(child)) {
 4695                 case PCIC_DISPLAY:
 4696                 case PCIC_MEMORY:
 4697                 case PCIC_BRIDGE:
 4698                 case PCIC_BASEPERIPH:
 4699                         break;
 4700                 default:
 4701                         BUS_RESUME_CHILD(dev, child);
 4702                 }
 4703         }
 4704         free(devlist, M_TEMP);
 4705         return (0);
 4706 }
 4707 
 4708 static void
 4709 pci_load_vendor_data(void)
 4710 {
 4711         caddr_t data;
 4712         void *ptr;
 4713         size_t sz;
 4714 
 4715         data = preload_search_by_type("pci_vendor_data");
 4716         if (data != NULL) {
 4717                 ptr = preload_fetch_addr(data);
 4718                 sz = preload_fetch_size(data);
 4719                 if (ptr != NULL && sz != 0) {
 4720                         pci_vendordata = ptr;
 4721                         pci_vendordata_size = sz;
 4722                         /* terminate the database */
 4723                         pci_vendordata[pci_vendordata_size] = '\n';
 4724                 }
 4725         }
 4726 }
 4727 
 4728 void
 4729 pci_driver_added(device_t dev, driver_t *driver)
 4730 {
 4731         int numdevs;
 4732         device_t *devlist;
 4733         device_t child;
 4734         struct pci_devinfo *dinfo;
 4735         int i;
 4736 
 4737         if (bootverbose)
 4738                 device_printf(dev, "driver added\n");
 4739         DEVICE_IDENTIFY(driver, dev);
 4740         if (device_get_children(dev, &devlist, &numdevs) != 0)
 4741                 return;
 4742         for (i = 0; i < numdevs; i++) {
 4743                 child = devlist[i];
 4744                 if (device_get_state(child) != DS_NOTPRESENT)
 4745                         continue;
 4746                 dinfo = device_get_ivars(child);
 4747                 pci_print_verbose(dinfo);
 4748                 if (bootverbose)
 4749                         pci_printf(&dinfo->cfg, "reprobing on driver added\n");
 4750                 pci_cfg_restore(child, dinfo);
 4751                 if (device_probe_and_attach(child) != 0)
 4752                         pci_child_detached(dev, child);
 4753         }
 4754         free(devlist, M_TEMP);
 4755 }
 4756 
 4757 int
 4758 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
 4759     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
 4760 {
 4761         struct pci_devinfo *dinfo;
 4762         struct msix_table_entry *mte;
 4763         struct msix_vector *mv;
 4764         uint64_t addr;
 4765         uint32_t data;
 4766         void *cookie;
 4767         int error, rid;
 4768 
 4769         error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
 4770             arg, &cookie);
 4771         if (error)
 4772                 return (error);
 4773 
 4774         /* If this is not a direct child, just bail out. */
 4775         if (device_get_parent(child) != dev) {
 4776                 *cookiep = cookie;
 4777                 return(0);
 4778         }
 4779 
 4780         rid = rman_get_rid(irq);
 4781         if (rid == 0) {
 4782                 /* Make sure that INTx is enabled */
 4783                 pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
 4784         } else {
 4785                 /*
 4786                  * Check to see if the interrupt is MSI or MSI-X.
 4787                  * Ask our parent to map the MSI and give
 4788                  * us the address and data register values.
 4789                  * If we fail for some reason, teardown the
 4790                  * interrupt handler.
 4791                  */
 4792                 dinfo = device_get_ivars(child);
 4793                 if (dinfo->cfg.msi.msi_alloc > 0) {
 4794                         if (dinfo->cfg.msi.msi_addr == 0) {
 4795                                 KASSERT(dinfo->cfg.msi.msi_handlers == 0,
 4796                             ("MSI has handlers, but vectors not mapped"));
 4797                                 error = PCIB_MAP_MSI(device_get_parent(dev),
 4798                                     child, rman_get_start(irq), &addr, &data);
 4799                                 if (error)
 4800                                         goto bad;
 4801                                 dinfo->cfg.msi.msi_addr = addr;
 4802                                 dinfo->cfg.msi.msi_data = data;
 4803                         }
 4804                         if (dinfo->cfg.msi.msi_handlers == 0)
 4805                                 pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
 4806                                     dinfo->cfg.msi.msi_data);
 4807                         dinfo->cfg.msi.msi_handlers++;
 4808                 } else {
 4809                         KASSERT(dinfo->cfg.msix.msix_alloc > 0,
 4810                             ("No MSI or MSI-X interrupts allocated"));
 4811                         KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
 4812                             ("MSI-X index too high"));
 4813                         mte = &dinfo->cfg.msix.msix_table[rid - 1];
 4814                         KASSERT(mte->mte_vector != 0, ("no message vector"));
 4815                         mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
 4816                         KASSERT(mv->mv_irq == rman_get_start(irq),
 4817                             ("IRQ mismatch"));
 4818                         if (mv->mv_address == 0) {
 4819                                 KASSERT(mte->mte_handlers == 0,
 4820                     ("MSI-X table entry has handlers, but vector not mapped"));
 4821                                 error = PCIB_MAP_MSI(device_get_parent(dev),
 4822                                     child, rman_get_start(irq), &addr, &data);
 4823                                 if (error)
 4824                                         goto bad;
 4825                                 mv->mv_address = addr;
 4826                                 mv->mv_data = data;
 4827                         }
 4828 
 4829                         /*
 4830                          * The MSIX table entry must be made valid by
 4831                          * incrementing the mte_handlers before
 4832                          * calling pci_enable_msix() and
 4833                          * pci_resume_msix(). Else the MSIX rewrite
 4834                          * table quirk will not work as expected.
 4835                          */
 4836                         mte->mte_handlers++;
 4837                         if (mte->mte_handlers == 1) {
 4838                                 pci_enable_msix(child, rid - 1, mv->mv_address,
 4839                                     mv->mv_data);
 4840                                 pci_unmask_msix(child, rid - 1);
 4841                         }
 4842                 }
 4843 
 4844                 /*
 4845                  * Make sure that INTx is disabled if we are using MSI/MSI-X,
 4846                  * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
 4847                  * in which case we "enable" INTx so MSI/MSI-X actually works.
 4848                  */
 4849                 if (!pci_has_quirk(pci_get_devid(child),
 4850                     PCI_QUIRK_MSI_INTX_BUG))
 4851                         pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
 4852                 else
 4853                         pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
 4854         bad:
 4855                 if (error) {
 4856                         (void)bus_generic_teardown_intr(dev, child, irq,
 4857                             cookie);
 4858                         return (error);
 4859                 }
 4860         }
 4861         *cookiep = cookie;
 4862         return (0);
 4863 }
 4864 
 4865 int
 4866 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
 4867     void *cookie)
 4868 {
 4869         struct msix_table_entry *mte;
 4870         struct resource_list_entry *rle;
 4871         struct pci_devinfo *dinfo;
 4872         int error, rid;
 4873 
 4874         if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
 4875                 return (EINVAL);
 4876 
 4877         /* If this isn't a direct child, just bail out */
 4878         if (device_get_parent(child) != dev)
 4879                 return(bus_generic_teardown_intr(dev, child, irq, cookie));
 4880 
 4881         rid = rman_get_rid(irq);
 4882         if (rid == 0) {
 4883                 /* Mask INTx */
 4884                 pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
 4885         } else {
 4886                 /*
 4887                  * Check to see if the interrupt is MSI or MSI-X.  If so,
 4888                  * decrement the appropriate handlers count and mask the
 4889                  * MSI-X message, or disable MSI messages if the count
 4890                  * drops to 0.
 4891                  */
 4892                 dinfo = device_get_ivars(child);
 4893                 rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
 4894                 if (rle->res != irq)
 4895                         return (EINVAL);
 4896                 if (dinfo->cfg.msi.msi_alloc > 0) {
 4897                         KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
 4898                             ("MSI-X index too high"));
 4899                         if (dinfo->cfg.msi.msi_handlers == 0)
 4900                                 return (EINVAL);
 4901                         dinfo->cfg.msi.msi_handlers--;
 4902                         if (dinfo->cfg.msi.msi_handlers == 0)
 4903                                 pci_disable_msi(child);
 4904                 } else {
 4905                         KASSERT(dinfo->cfg.msix.msix_alloc > 0,
 4906                             ("No MSI or MSI-X interrupts allocated"));
 4907                         KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
 4908                             ("MSI-X index too high"));
 4909                         mte = &dinfo->cfg.msix.msix_table[rid - 1];
 4910                         if (mte->mte_handlers == 0)
 4911                                 return (EINVAL);
 4912                         mte->mte_handlers--;
 4913                         if (mte->mte_handlers == 0)
 4914                                 pci_mask_msix(child, rid - 1);
 4915                 }
 4916         }
 4917         error = bus_generic_teardown_intr(dev, child, irq, cookie);
 4918         if (rid > 0)
 4919                 KASSERT(error == 0,
 4920                     ("%s: generic teardown failed for MSI/MSI-X", __func__));
 4921         return (error);
 4922 }
 4923 
 4924 int
 4925 pci_print_child(device_t dev, device_t child)
 4926 {
 4927         struct pci_devinfo *dinfo;
 4928         struct resource_list *rl;
 4929         int retval = 0;
 4930 
 4931         dinfo = device_get_ivars(child);
 4932         rl = &dinfo->resources;
 4933 
 4934         retval += bus_print_child_header(dev, child);
 4935 
 4936         retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx");
 4937         retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
 4938         retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
 4939         if (device_get_flags(dev))
 4940                 retval += printf(" flags %#x", device_get_flags(dev));
 4941 
 4942         retval += printf(" at device %d.%d", pci_get_slot(child),
 4943             pci_get_function(child));
 4944 
 4945         retval += bus_print_child_domain(dev, child);
 4946         retval += bus_print_child_footer(dev, child);
 4947 
 4948         return (retval);
 4949 }
 4950 
 4951 static const struct
 4952 {
 4953         int             class;
 4954         int             subclass;
 4955         int             report; /* 0 = bootverbose, 1 = always */
 4956         const char      *desc;
 4957 } pci_nomatch_tab[] = {
 4958         {PCIC_OLD,              -1,                     1, "old"},
 4959         {PCIC_OLD,              PCIS_OLD_NONVGA,        1, "non-VGA display device"},
 4960         {PCIC_OLD,              PCIS_OLD_VGA,           1, "VGA-compatible display device"},
 4961         {PCIC_STORAGE,          -1,                     1, "mass storage"},
 4962         {PCIC_STORAGE,          PCIS_STORAGE_SCSI,      1, "SCSI"},
 4963         {PCIC_STORAGE,          PCIS_STORAGE_IDE,       1, "ATA"},
 4964         {PCIC_STORAGE,          PCIS_STORAGE_FLOPPY,    1, "floppy disk"},
 4965         {PCIC_STORAGE,          PCIS_STORAGE_IPI,       1, "IPI"},
 4966         {PCIC_STORAGE,          PCIS_STORAGE_RAID,      1, "RAID"},
 4967         {PCIC_STORAGE,          PCIS_STORAGE_ATA_ADMA,  1, "ATA (ADMA)"},
 4968         {PCIC_STORAGE,          PCIS_STORAGE_SATA,      1, "SATA"},
 4969         {PCIC_STORAGE,          PCIS_STORAGE_SAS,       1, "SAS"},
 4970         {PCIC_STORAGE,          PCIS_STORAGE_NVM,       1, "NVM"},
 4971         {PCIC_NETWORK,          -1,                     1, "network"},
 4972         {PCIC_NETWORK,          PCIS_NETWORK_ETHERNET,  1, "ethernet"},
 4973         {PCIC_NETWORK,          PCIS_NETWORK_TOKENRING, 1, "token ring"},
 4974         {PCIC_NETWORK,          PCIS_NETWORK_FDDI,      1, "fddi"},
 4975         {PCIC_NETWORK,          PCIS_NETWORK_ATM,       1, "ATM"},
 4976         {PCIC_NETWORK,          PCIS_NETWORK_ISDN,      1, "ISDN"},
 4977         {PCIC_DISPLAY,          -1,                     1, "display"},
 4978         {PCIC_DISPLAY,          PCIS_DISPLAY_VGA,       1, "VGA"},
 4979         {PCIC_DISPLAY,          PCIS_DISPLAY_XGA,       1, "XGA"},
 4980         {PCIC_DISPLAY,          PCIS_DISPLAY_3D,        1, "3D"},
 4981         {PCIC_MULTIMEDIA,       -1,                     1, "multimedia"},
 4982         {PCIC_MULTIMEDIA,       PCIS_MULTIMEDIA_VIDEO,  1, "video"},
 4983         {PCIC_MULTIMEDIA,       PCIS_MULTIMEDIA_AUDIO,  1, "audio"},
 4984         {PCIC_MULTIMEDIA,       PCIS_MULTIMEDIA_TELE,   1, "telephony"},
 4985         {PCIC_MULTIMEDIA,       PCIS_MULTIMEDIA_HDA,    1, "HDA"},
 4986         {PCIC_MEMORY,           -1,                     1, "memory"},
 4987         {PCIC_MEMORY,           PCIS_MEMORY_RAM,        1, "RAM"},
 4988         {PCIC_MEMORY,           PCIS_MEMORY_FLASH,      1, "flash"},
 4989         {PCIC_BRIDGE,           -1,                     1, "bridge"},
 4990         {PCIC_BRIDGE,           PCIS_BRIDGE_HOST,       1, "HOST-PCI"},
 4991         {PCIC_BRIDGE,           PCIS_BRIDGE_ISA,        1, "PCI-ISA"},
 4992         {PCIC_BRIDGE,           PCIS_BRIDGE_EISA,       1, "PCI-EISA"},
 4993         {PCIC_BRIDGE,           PCIS_BRIDGE_MCA,        1, "PCI-MCA"},
 4994         {PCIC_BRIDGE,           PCIS_BRIDGE_PCI,        1, "PCI-PCI"},
 4995         {PCIC_BRIDGE,           PCIS_BRIDGE_PCMCIA,     1, "PCI-PCMCIA"},
 4996         {PCIC_BRIDGE,           PCIS_BRIDGE_NUBUS,      1, "PCI-NuBus"},
 4997         {PCIC_BRIDGE,           PCIS_BRIDGE_CARDBUS,    1, "PCI-CardBus"},
 4998         {PCIC_BRIDGE,           PCIS_BRIDGE_RACEWAY,    1, "PCI-RACEway"},
 4999         {PCIC_SIMPLECOMM,       -1,                     1, "simple comms"},
 5000         {PCIC_SIMPLECOMM,       PCIS_SIMPLECOMM_UART,   1, "UART"},     /* could detect 16550 */
 5001         {PCIC_SIMPLECOMM,       PCIS_SIMPLECOMM_PAR,    1, "parallel port"},
 5002         {PCIC_SIMPLECOMM,       PCIS_SIMPLECOMM_MULSER, 1, "multiport serial"},
 5003         {PCIC_SIMPLECOMM,       PCIS_SIMPLECOMM_MODEM,  1, "generic modem"},
 5004         {PCIC_BASEPERIPH,       -1,                     0, "base peripheral"},
 5005         {PCIC_BASEPERIPH,       PCIS_BASEPERIPH_PIC,    1, "interrupt controller"},
 5006         {PCIC_BASEPERIPH,       PCIS_BASEPERIPH_DMA,    1, "DMA controller"},
 5007         {PCIC_BASEPERIPH,       PCIS_BASEPERIPH_TIMER,  1, "timer"},
 5008         {PCIC_BASEPERIPH,       PCIS_BASEPERIPH_RTC,    1, "realtime clock"},
 5009         {PCIC_BASEPERIPH,       PCIS_BASEPERIPH_PCIHOT, 1, "PCI hot-plug controller"},
 5010         {PCIC_BASEPERIPH,       PCIS_BASEPERIPH_SDHC,   1, "SD host controller"},
 5011         {PCIC_BASEPERIPH,       PCIS_BASEPERIPH_IOMMU,  1, "IOMMU"},
 5012         {PCIC_INPUTDEV,         -1,                     1, "input device"},
 5013         {PCIC_INPUTDEV,         PCIS_INPUTDEV_KEYBOARD, 1, "keyboard"},
 5014         {PCIC_INPUTDEV,         PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
 5015         {PCIC_INPUTDEV,         PCIS_INPUTDEV_MOUSE,    1, "mouse"},
 5016         {PCIC_INPUTDEV,         PCIS_INPUTDEV_SCANNER,  1, "scanner"},
 5017         {PCIC_INPUTDEV,         PCIS_INPUTDEV_GAMEPORT, 1, "gameport"},
 5018         {PCIC_DOCKING,          -1,                     1, "docking station"},
 5019         {PCIC_PROCESSOR,        -1,                     1, "processor"},
 5020         {PCIC_SERIALBUS,        -1,                     1, "serial bus"},
 5021         {PCIC_SERIALBUS,        PCIS_SERIALBUS_FW,      1, "FireWire"},
 5022         {PCIC_SERIALBUS,        PCIS_SERIALBUS_ACCESS,  1, "AccessBus"},
 5023         {PCIC_SERIALBUS,        PCIS_SERIALBUS_SSA,     1, "SSA"},
 5024         {PCIC_SERIALBUS,        PCIS_SERIALBUS_USB,     1, "USB"},
 5025         {PCIC_SERIALBUS,        PCIS_SERIALBUS_FC,      1, "Fibre Channel"},
 5026         {PCIC_SERIALBUS,        PCIS_SERIALBUS_SMBUS,   0, "SMBus"},
 5027         {PCIC_WIRELESS,         -1,                     1, "wireless controller"},
 5028         {PCIC_WIRELESS,         PCIS_WIRELESS_IRDA,     1, "iRDA"},
 5029         {PCIC_WIRELESS,         PCIS_WIRELESS_IR,       1, "IR"},
 5030         {PCIC_WIRELESS,         PCIS_WIRELESS_RF,       1, "RF"},
 5031         {PCIC_INTELLIIO,        -1,                     1, "intelligent I/O controller"},
 5032         {PCIC_INTELLIIO,        PCIS_INTELLIIO_I2O,     1, "I2O"},
 5033         {PCIC_SATCOM,           -1,                     1, "satellite communication"},
 5034         {PCIC_SATCOM,           PCIS_SATCOM_TV,         1, "sat TV"},
 5035         {PCIC_SATCOM,           PCIS_SATCOM_AUDIO,      1, "sat audio"},
 5036         {PCIC_SATCOM,           PCIS_SATCOM_VOICE,      1, "sat voice"},
 5037         {PCIC_SATCOM,           PCIS_SATCOM_DATA,       1, "sat data"},
 5038         {PCIC_CRYPTO,           -1,                     1, "encrypt/decrypt"},
 5039         {PCIC_CRYPTO,           PCIS_CRYPTO_NETCOMP,    1, "network/computer crypto"},
 5040         {PCIC_CRYPTO,           PCIS_CRYPTO_ENTERTAIN,  1, "entertainment crypto"},
 5041         {PCIC_DASP,             -1,                     0, "dasp"},
 5042         {PCIC_DASP,             PCIS_DASP_DPIO,         1, "DPIO module"},
 5043         {PCIC_DASP,             PCIS_DASP_PERFCNTRS,    1, "performance counters"},
 5044         {PCIC_DASP,             PCIS_DASP_COMM_SYNC,    1, "communication synchronizer"},
 5045         {PCIC_DASP,             PCIS_DASP_MGMT_CARD,    1, "signal processing management"},
 5046         {PCIC_INSTRUMENT,       -1,                     0, "non-essential instrumentation"},
 5047         {0, 0, 0,               NULL}
 5048 };
 5049 
 5050 void
 5051 pci_probe_nomatch(device_t dev, device_t child)
 5052 {
 5053         int i, report;
 5054         const char *cp, *scp;
 5055         char *device;
 5056 
 5057         /*
 5058          * Look for a listing for this device in a loaded device database.
 5059          */
 5060         report = 1;
 5061         if ((device = pci_describe_device(child)) != NULL) {
 5062                 device_printf(dev, "<%s>", device);
 5063                 free(device, M_DEVBUF);
 5064         } else {
 5065                 /*
 5066                  * Scan the class/subclass descriptions for a general
 5067                  * description.
 5068                  */
 5069                 cp = "unknown";
 5070                 scp = NULL;
 5071                 for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
 5072                         if (pci_nomatch_tab[i].class == pci_get_class(child)) {
 5073                                 if (pci_nomatch_tab[i].subclass == -1) {
 5074                                         cp = pci_nomatch_tab[i].desc;
 5075                                         report = pci_nomatch_tab[i].report;
 5076                                 } else if (pci_nomatch_tab[i].subclass ==
 5077                                     pci_get_subclass(child)) {
 5078                                         scp = pci_nomatch_tab[i].desc;
 5079                                         report = pci_nomatch_tab[i].report;
 5080                                 }
 5081                         }
 5082                 }
 5083                 if (report || bootverbose) {
 5084                         device_printf(dev, "<%s%s%s>",
 5085                             cp ? cp : "",
 5086                             ((cp != NULL) && (scp != NULL)) ? ", " : "",
 5087                             scp ? scp : "");
 5088                 }
 5089         }
 5090         if (report || bootverbose) {
 5091                 printf(" at device %d.%d (no driver attached)\n",
 5092                     pci_get_slot(child), pci_get_function(child));
 5093         }
 5094         pci_cfg_save(child, device_get_ivars(child), 1);
 5095 }
 5096 
 5097 void
 5098 pci_child_detached(device_t dev, device_t child)
 5099 {
 5100         struct pci_devinfo *dinfo;
 5101         struct resource_list *rl;
 5102 
 5103         dinfo = device_get_ivars(child);
 5104         rl = &dinfo->resources;
 5105 
 5106         /*
 5107          * Have to deallocate IRQs before releasing any MSI messages and
 5108          * have to release MSI messages before deallocating any memory
 5109          * BARs.
 5110          */
 5111         if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
 5112                 pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
 5113         if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
 5114                 if (dinfo->cfg.msi.msi_alloc != 0)
 5115                         pci_printf(&dinfo->cfg, "Device leaked %d MSI "
 5116                             "vectors\n", dinfo->cfg.msi.msi_alloc);
 5117                 else
 5118                         pci_printf(&dinfo->cfg, "Device leaked %d MSI-X "
 5119                             "vectors\n", dinfo->cfg.msix.msix_alloc);
 5120                 (void)pci_release_msi(child);
 5121         }
 5122         if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
 5123                 pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
 5124         if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
 5125                 pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
 5126 #ifdef PCI_RES_BUS
 5127         if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
 5128                 pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
 5129 #endif
 5130 
 5131         pci_cfg_save(child, dinfo, 1);
 5132 }
 5133 
 5134 /*
 5135  * Parse the PCI device database, if loaded, and return a pointer to a
 5136  * description of the device.
 5137  *
 5138  * The database is flat text formatted as follows:
 5139  *
 5140  * Any line not in a valid format is ignored.
 5141  * Lines are terminated with newline '\n' characters.
 5142  *
 5143  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
 5144  * the vendor name.
 5145  *
 5146  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
 5147  * - devices cannot be listed without a corresponding VENDOR line.
 5148  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
 5149  * another TAB, then the device name.
 5150  */
 5151 
 5152 /*
 5153  * Assuming (ptr) points to the beginning of a line in the database,
 5154  * return the vendor or device and description of the next entry.
 5155  * The value of (vendor) or (device) inappropriate for the entry type
 5156  * is set to -1.  Returns nonzero at the end of the database.
 5157  *
 5158  * Note that this is slightly unrobust in the face of corrupt data;
 5159  * we attempt to safeguard against this by spamming the end of the
 5160  * database with a newline when we initialise.
 5161  */
 5162 static int
 5163 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
 5164 {
 5165         char    *cp = *ptr;
 5166         int     left;
 5167 
 5168         *device = -1;
 5169         *vendor = -1;
 5170         **desc = '\0';
 5171         for (;;) {
 5172                 left = pci_vendordata_size - (cp - pci_vendordata);
 5173                 if (left <= 0) {
 5174                         *ptr = cp;
 5175                         return(1);
 5176                 }
 5177 
 5178                 /* vendor entry? */
 5179                 if (*cp != '\t' &&
 5180                     sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
 5181                         break;
 5182                 /* device entry? */
 5183                 if (*cp == '\t' &&
 5184                     sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
 5185                         break;
 5186 
 5187                 /* skip to next line */
 5188                 while (*cp != '\n' && left > 0) {
 5189                         cp++;
 5190                         left--;
 5191                 }
 5192                 if (*cp == '\n') {
 5193                         cp++;
 5194                         left--;
 5195                 }
 5196         }
 5197         /* skip to next line */
 5198         while (*cp != '\n' && left > 0) {
 5199                 cp++;
 5200                 left--;
 5201         }
 5202         if (*cp == '\n' && left > 0)
 5203                 cp++;
 5204         *ptr = cp;
 5205         return(0);
 5206 }
 5207 
 5208 static char *
 5209 pci_describe_device(device_t dev)
 5210 {
 5211         int     vendor, device;
 5212         char    *desc, *vp, *dp, *line;
 5213 
 5214         desc = vp = dp = NULL;
 5215 
 5216         /*
 5217          * If we have no vendor data, we can't do anything.
 5218          */
 5219         if (pci_vendordata == NULL)
 5220                 goto out;
 5221 
 5222         /*
 5223          * Scan the vendor data looking for this device
 5224          */
 5225         line = pci_vendordata;
 5226         if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
 5227                 goto out;
 5228         for (;;) {
 5229                 if (pci_describe_parse_line(&line, &vendor, &device, &vp))
 5230                         goto out;
 5231                 if (vendor == pci_get_vendor(dev))
 5232                         break;
 5233         }
 5234         if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
 5235                 goto out;
 5236         for (;;) {
 5237                 if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
 5238                         *dp = 0;
 5239                         break;
 5240                 }
 5241                 if (vendor != -1) {
 5242                         *dp = 0;
 5243                         break;
 5244                 }
 5245                 if (device == pci_get_device(dev))
 5246                         break;
 5247         }
 5248         if (dp[0] == '\0')
 5249                 snprintf(dp, 80, "0x%x", pci_get_device(dev));
 5250         if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
 5251             NULL)
 5252                 sprintf(desc, "%s, %s", vp, dp);
 5253 out:
 5254         if (vp != NULL)
 5255                 free(vp, M_DEVBUF);
 5256         if (dp != NULL)
 5257                 free(dp, M_DEVBUF);
 5258         return(desc);
 5259 }
 5260 
 5261 int
 5262 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
 5263 {
 5264         struct pci_devinfo *dinfo;
 5265         pcicfgregs *cfg;
 5266 
 5267         dinfo = device_get_ivars(child);
 5268         cfg = &dinfo->cfg;
 5269 
 5270         switch (which) {
 5271         case PCI_IVAR_ETHADDR:
 5272                 /*
 5273                  * The generic accessor doesn't deal with failure, so
 5274                  * we set the return value, then return an error.
 5275                  */
 5276                 *((uint8_t **) result) = NULL;
 5277                 return (EINVAL);
 5278         case PCI_IVAR_SUBVENDOR:
 5279                 *result = cfg->subvendor;
 5280                 break;
 5281         case PCI_IVAR_SUBDEVICE:
 5282                 *result = cfg->subdevice;
 5283                 break;
 5284         case PCI_IVAR_VENDOR:
 5285                 *result = cfg->vendor;
 5286                 break;
 5287         case PCI_IVAR_DEVICE:
 5288                 *result = cfg->device;
 5289                 break;
 5290         case PCI_IVAR_DEVID:
 5291                 *result = (cfg->device << 16) | cfg->vendor;
 5292                 break;
 5293         case PCI_IVAR_CLASS:
 5294                 *result = cfg->baseclass;
 5295                 break;
 5296         case PCI_IVAR_SUBCLASS:
 5297                 *result = cfg->subclass;
 5298                 break;
 5299         case PCI_IVAR_PROGIF:
 5300                 *result = cfg->progif;
 5301                 break;
 5302         case PCI_IVAR_REVID:
 5303                 *result = cfg->revid;
 5304                 break;
 5305         case PCI_IVAR_INTPIN:
 5306                 *result = cfg->intpin;
 5307                 break;
 5308         case PCI_IVAR_IRQ:
 5309                 *result = cfg->intline;
 5310                 break;
 5311         case PCI_IVAR_DOMAIN:
 5312                 *result = cfg->domain;
 5313                 break;
 5314         case PCI_IVAR_BUS:
 5315                 *result = cfg->bus;
 5316                 break;
 5317         case PCI_IVAR_SLOT:
 5318                 *result = cfg->slot;
 5319                 break;
 5320         case PCI_IVAR_FUNCTION:
 5321                 *result = cfg->func;
 5322                 break;
 5323         case PCI_IVAR_CMDREG:
 5324                 *result = cfg->cmdreg;
 5325                 break;
 5326         case PCI_IVAR_CACHELNSZ:
 5327                 *result = cfg->cachelnsz;
 5328                 break;
 5329         case PCI_IVAR_MINGNT:
 5330                 if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
 5331                         *result = -1;
 5332                         return (EINVAL);
 5333                 }
 5334                 *result = cfg->mingnt;
 5335                 break;
 5336         case PCI_IVAR_MAXLAT:
 5337                 if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
 5338                         *result = -1;
 5339                         return (EINVAL);
 5340                 }
 5341                 *result = cfg->maxlat;
 5342                 break;
 5343         case PCI_IVAR_LATTIMER:
 5344                 *result = cfg->lattimer;
 5345                 break;
 5346         default:
 5347                 return (ENOENT);
 5348         }
 5349         return (0);
 5350 }
 5351 
 5352 int
 5353 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
 5354 {
 5355         struct pci_devinfo *dinfo;
 5356 
 5357         dinfo = device_get_ivars(child);
 5358 
 5359         switch (which) {
 5360         case PCI_IVAR_INTPIN:
 5361                 dinfo->cfg.intpin = value;
 5362                 return (0);
 5363         case PCI_IVAR_ETHADDR:
 5364         case PCI_IVAR_SUBVENDOR:
 5365         case PCI_IVAR_SUBDEVICE:
 5366         case PCI_IVAR_VENDOR:
 5367         case PCI_IVAR_DEVICE:
 5368         case PCI_IVAR_DEVID:
 5369         case PCI_IVAR_CLASS:
 5370         case PCI_IVAR_SUBCLASS:
 5371         case PCI_IVAR_PROGIF:
 5372         case PCI_IVAR_REVID:
 5373         case PCI_IVAR_IRQ:
 5374         case PCI_IVAR_DOMAIN:
 5375         case PCI_IVAR_BUS:
 5376         case PCI_IVAR_SLOT:
 5377         case PCI_IVAR_FUNCTION:
 5378                 return (EINVAL);        /* disallow for now */
 5379 
 5380         default:
 5381                 return (ENOENT);
 5382         }
 5383 }
 5384 
 5385 #include "opt_ddb.h"
 5386 #ifdef DDB
 5387 #include <ddb/ddb.h>
 5388 #include <sys/cons.h>
 5389 
 5390 /*
 5391  * List resources based on pci map registers, used for within ddb
 5392  */
 5393 
 5394 DB_SHOW_COMMAND_FLAGS(pciregs, db_pci_dump, DB_CMD_MEMSAFE)
 5395 {
 5396         struct pci_devinfo *dinfo;
 5397         struct devlist *devlist_head;
 5398         struct pci_conf *p;
 5399         const char *name;
 5400         int i, error, none_count;
 5401 
 5402         none_count = 0;
 5403         /* get the head of the device queue */
 5404         devlist_head = &pci_devq;
 5405 
 5406         /*
 5407          * Go through the list of devices and print out devices
 5408          */
 5409         for (error = 0, i = 0,
 5410              dinfo = STAILQ_FIRST(devlist_head);
 5411              (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
 5412              dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
 5413                 /* Populate pd_name and pd_unit */
 5414                 name = NULL;
 5415                 if (dinfo->cfg.dev)
 5416                         name = device_get_name(dinfo->cfg.dev);
 5417 
 5418                 p = &dinfo->conf;
 5419                 db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
 5420                         "chip=0x%08x rev=0x%02x hdr=0x%02x\n",
 5421                         (name && *name) ? name : "none",
 5422                         (name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
 5423                         none_count++,
 5424                         p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
 5425                         p->pc_sel.pc_func, (p->pc_class << 16) |
 5426                         (p->pc_subclass << 8) | p->pc_progif,
 5427                         (p->pc_subdevice << 16) | p->pc_subvendor,
 5428                         (p->pc_device << 16) | p->pc_vendor,
 5429                         p->pc_revid, p->pc_hdr);
 5430         }
 5431 }
 5432 #endif /* DDB */
 5433 
 5434 struct resource *
 5435 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
 5436     rman_res_t start, rman_res_t end, rman_res_t count, u_int num,
 5437     u_int flags)
 5438 {
 5439         struct pci_devinfo *dinfo = device_get_ivars(child);
 5440         struct resource_list *rl = &dinfo->resources;
 5441         struct resource *res;
 5442         struct pci_map *pm;
 5443         uint16_t cmd;
 5444         pci_addr_t map, testval;
 5445         int mapsize;
 5446 
 5447         res = NULL;
 5448 
 5449         /* If rid is managed by EA, ignore it */
 5450         if (pci_ea_is_enabled(child, *rid))
 5451                 goto out;
 5452 
 5453         pm = pci_find_bar(child, *rid);
 5454         if (pm != NULL) {
 5455                 /* This is a BAR that we failed to allocate earlier. */
 5456                 mapsize = pm->pm_size;
 5457                 map = pm->pm_value;
 5458         } else {
 5459                 /*
 5460                  * Weed out the bogons, and figure out how large the
 5461                  * BAR/map is.  BARs that read back 0 here are bogus
 5462                  * and unimplemented.  Note: atapci in legacy mode are
 5463                  * special and handled elsewhere in the code.  If you
 5464                  * have a atapci device in legacy mode and it fails
 5465                  * here, that other code is broken.
 5466                  */
 5467                 pci_read_bar(child, *rid, &map, &testval, NULL);
 5468 
 5469                 /*
 5470                  * Determine the size of the BAR and ignore BARs with a size
 5471                  * of 0.  Device ROM BARs use a different mask value.
 5472                  */
 5473                 if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
 5474                         mapsize = pci_romsize(testval);
 5475                 else
 5476                         mapsize = pci_mapsize(testval);
 5477                 if (mapsize == 0)
 5478                         goto out;
 5479                 pm = pci_add_bar(child, *rid, map, mapsize);
 5480         }
 5481 
 5482         if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
 5483                 if (type != SYS_RES_MEMORY) {
 5484                         if (bootverbose)
 5485                                 device_printf(dev,
 5486                                     "child %s requested type %d for rid %#x,"
 5487                                     " but the BAR says it is an memio\n",
 5488                                     device_get_nameunit(child), type, *rid);
 5489                         goto out;
 5490                 }
 5491         } else {
 5492                 if (type != SYS_RES_IOPORT) {
 5493                         if (bootverbose)
 5494                                 device_printf(dev,
 5495                                     "child %s requested type %d for rid %#x,"
 5496                                     " but the BAR says it is an ioport\n",
 5497                                     device_get_nameunit(child), type, *rid);
 5498                         goto out;
 5499                 }
 5500         }
 5501 
 5502         /*
 5503          * For real BARs, we need to override the size that
 5504          * the driver requests, because that's what the BAR
 5505          * actually uses and we would otherwise have a
 5506          * situation where we might allocate the excess to
 5507          * another driver, which won't work.
 5508          */
 5509         count = ((pci_addr_t)1 << mapsize) * num;
 5510         if (RF_ALIGNMENT(flags) < mapsize)
 5511                 flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
 5512         if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
 5513                 flags |= RF_PREFETCHABLE;
 5514 
 5515         /*
 5516          * Allocate enough resource, and then write back the
 5517          * appropriate BAR for that resource.
 5518          */
 5519         resource_list_add(rl, type, *rid, start, end, count);
 5520         res = resource_list_reserve(rl, dev, child, type, rid, start, end,
 5521             count, flags & ~RF_ACTIVE);
 5522         if (res == NULL) {
 5523                 resource_list_delete(rl, type, *rid);
 5524                 device_printf(child,
 5525                     "%#jx bytes of rid %#x res %d failed (%#jx, %#jx).\n",
 5526                     count, *rid, type, start, end);
 5527                 goto out;
 5528         }
 5529         if (bootverbose)
 5530                 device_printf(child,
 5531                     "Lazy allocation of %#jx bytes rid %#x type %d at %#jx\n",
 5532                     count, *rid, type, rman_get_start(res));
 5533 
 5534         /* Disable decoding via the CMD register before updating the BAR */
 5535         cmd = pci_read_config(child, PCIR_COMMAND, 2);
 5536         pci_write_config(child, PCIR_COMMAND,
 5537             cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
 5538 
 5539         map = rman_get_start(res);
 5540         pci_write_bar(child, pm, map);
 5541 
 5542         /* Restore the original value of the CMD register */
 5543         pci_write_config(child, PCIR_COMMAND, cmd, 2);
 5544 out:
 5545         return (res);
 5546 }
 5547 
 5548 struct resource *
 5549 pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
 5550     rman_res_t start, rman_res_t end, rman_res_t count, u_long num,
 5551     u_int flags)
 5552 {
 5553         struct pci_devinfo *dinfo;
 5554         struct resource_list *rl;
 5555         struct resource_list_entry *rle;
 5556         struct resource *res;
 5557         pcicfgregs *cfg;
 5558 
 5559         /*
 5560          * Perform lazy resource allocation
 5561          */
 5562         dinfo = device_get_ivars(child);
 5563         rl = &dinfo->resources;
 5564         cfg = &dinfo->cfg;
 5565         switch (type) {
 5566 #if defined(NEW_PCIB) && defined(PCI_RES_BUS)
 5567         case PCI_RES_BUS:
 5568                 return (pci_alloc_secbus(dev, child, rid, start, end, count,
 5569                     flags));
 5570 #endif
 5571         case SYS_RES_IRQ:
 5572                 /*
 5573                  * Can't alloc legacy interrupt once MSI messages have
 5574                  * been allocated.
 5575                  */
 5576                 if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
 5577                     cfg->msix.msix_alloc > 0))
 5578                         return (NULL);
 5579 
 5580                 /*
 5581                  * If the child device doesn't have an interrupt
 5582                  * routed and is deserving of an interrupt, try to
 5583                  * assign it one.
 5584                  */
 5585                 if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
 5586                     (cfg->intpin != 0))
 5587                         pci_assign_interrupt(dev, child, 0);
 5588                 break;
 5589         case SYS_RES_IOPORT:
 5590         case SYS_RES_MEMORY:
 5591 #ifdef NEW_PCIB
 5592                 /*
 5593                  * PCI-PCI bridge I/O window resources are not BARs.
 5594                  * For those allocations just pass the request up the
 5595                  * tree.
 5596                  */
 5597                 if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
 5598                         switch (*rid) {
 5599                         case PCIR_IOBASEL_1:
 5600                         case PCIR_MEMBASE_1:
 5601                         case PCIR_PMBASEL_1:
 5602                                 /*
 5603                                  * XXX: Should we bother creating a resource
 5604                                  * list entry?
 5605                                  */
 5606                                 return (bus_generic_alloc_resource(dev, child,
 5607                                     type, rid, start, end, count, flags));
 5608                         }
 5609                 }
 5610 #endif
 5611                 /* Reserve resources for this BAR if needed. */
 5612                 rle = resource_list_find(rl, type, *rid);
 5613                 if (rle == NULL) {
 5614                         res = pci_reserve_map(dev, child, type, rid, start, end,
 5615                             count, num, flags);
 5616                         if (res == NULL)
 5617                                 return (NULL);
 5618                 }
 5619         }
 5620         return (resource_list_alloc(rl, dev, child, type, rid,
 5621             start, end, count, flags));
 5622 }
 5623 
 5624 struct resource *
 5625 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
 5626     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 5627 {
 5628 #ifdef PCI_IOV
 5629         struct pci_devinfo *dinfo;
 5630 #endif
 5631 
 5632         if (device_get_parent(child) != dev)
 5633                 return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
 5634                     type, rid, start, end, count, flags));
 5635 
 5636 #ifdef PCI_IOV
 5637         dinfo = device_get_ivars(child);
 5638         if (dinfo->cfg.flags & PCICFG_VF) {
 5639                 switch (type) {
 5640                 /* VFs can't have I/O BARs. */
 5641                 case SYS_RES_IOPORT:
 5642                         return (NULL);
 5643                 case SYS_RES_MEMORY:
 5644                         return (pci_vf_alloc_mem_resource(dev, child, rid,
 5645                             start, end, count, flags));
 5646                 }
 5647 
 5648                 /* Fall through for other types of resource allocations. */
 5649         }
 5650 #endif
 5651 
 5652         return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
 5653             count, 1, flags));
 5654 }
 5655 
 5656 int
 5657 pci_release_resource(device_t dev, device_t child, int type, int rid,
 5658     struct resource *r)
 5659 {
 5660         struct pci_devinfo *dinfo;
 5661         struct resource_list *rl;
 5662         pcicfgregs *cfg __unused;
 5663 
 5664         if (device_get_parent(child) != dev)
 5665                 return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
 5666                     type, rid, r));
 5667 
 5668         dinfo = device_get_ivars(child);
 5669         cfg = &dinfo->cfg;
 5670 
 5671 #ifdef PCI_IOV
 5672         if (cfg->flags & PCICFG_VF) {
 5673                 switch (type) {
 5674                 /* VFs can't have I/O BARs. */
 5675                 case SYS_RES_IOPORT:
 5676                         return (EDOOFUS);
 5677                 case SYS_RES_MEMORY:
 5678                         return (pci_vf_release_mem_resource(dev, child, rid,
 5679                             r));
 5680                 }
 5681 
 5682                 /* Fall through for other types of resource allocations. */
 5683         }
 5684 #endif
 5685 
 5686 #ifdef NEW_PCIB
 5687         /*
 5688          * PCI-PCI bridge I/O window resources are not BARs.  For
 5689          * those allocations just pass the request up the tree.
 5690          */
 5691         if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
 5692             (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
 5693                 switch (rid) {
 5694                 case PCIR_IOBASEL_1:
 5695                 case PCIR_MEMBASE_1:
 5696                 case PCIR_PMBASEL_1:
 5697                         return (bus_generic_release_resource(dev, child, type,
 5698                             rid, r));
 5699                 }
 5700         }
 5701 #endif
 5702 
 5703         rl = &dinfo->resources;
 5704         return (resource_list_release(rl, dev, child, type, rid, r));
 5705 }
 5706 
 5707 int
 5708 pci_activate_resource(device_t dev, device_t child, int type, int rid,
 5709     struct resource *r)
 5710 {
 5711         struct pci_devinfo *dinfo;
 5712         int error;
 5713 
 5714         error = bus_generic_activate_resource(dev, child, type, rid, r);
 5715         if (error)
 5716                 return (error);
 5717 
 5718         /* Enable decoding in the command register when activating BARs. */
 5719         if (device_get_parent(child) == dev) {
 5720                 /* Device ROMs need their decoding explicitly enabled. */
 5721                 dinfo = device_get_ivars(child);
 5722                 if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
 5723                         pci_write_bar(child, pci_find_bar(child, rid),
 5724                             rman_get_start(r) | PCIM_BIOS_ENABLE);
 5725                 switch (type) {
 5726                 case SYS_RES_IOPORT:
 5727                 case SYS_RES_MEMORY:
 5728                         error = PCI_ENABLE_IO(dev, child, type);
 5729                         break;
 5730                 }
 5731         }
 5732         return (error);
 5733 }
 5734 
 5735 int
 5736 pci_deactivate_resource(device_t dev, device_t child, int type,
 5737     int rid, struct resource *r)
 5738 {
 5739         struct pci_devinfo *dinfo;
 5740         int error;
 5741 
 5742         error = bus_generic_deactivate_resource(dev, child, type, rid, r);
 5743         if (error)
 5744                 return (error);
 5745 
 5746         /* Disable decoding for device ROMs. */
 5747         if (device_get_parent(child) == dev) {
 5748                 dinfo = device_get_ivars(child);
 5749                 if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
 5750                         pci_write_bar(child, pci_find_bar(child, rid),
 5751                             rman_get_start(r));
 5752         }
 5753         return (0);
 5754 }
 5755 
 5756 void
 5757 pci_child_deleted(device_t dev, device_t child)
 5758 {
 5759         struct resource_list_entry *rle;
 5760         struct resource_list *rl;
 5761         struct pci_devinfo *dinfo;
 5762 
 5763         dinfo = device_get_ivars(child);
 5764         rl = &dinfo->resources;
 5765 
 5766         EVENTHANDLER_INVOKE(pci_delete_device, child);
 5767 
 5768         /* Turn off access to resources we're about to free */
 5769         if (bus_child_present(child) != 0) {
 5770                 pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
 5771                     PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
 5772 
 5773                 pci_disable_busmaster(child);
 5774         }
 5775 
 5776         /* Free all allocated resources */
 5777         STAILQ_FOREACH(rle, rl, link) {
 5778                 if (rle->res) {
 5779                         if (rman_get_flags(rle->res) & RF_ACTIVE ||
 5780                             resource_list_busy(rl, rle->type, rle->rid)) {
 5781                                 pci_printf(&dinfo->cfg,
 5782                                     "Resource still owned, oops. "
 5783                                     "(type=%d, rid=%d, addr=%lx)\n",
 5784                                     rle->type, rle->rid,
 5785                                     rman_get_start(rle->res));
 5786                                 bus_release_resource(child, rle->type, rle->rid,
 5787                                     rle->res);
 5788                         }
 5789                         resource_list_unreserve(rl, dev, child, rle->type,
 5790                             rle->rid);
 5791                 }
 5792         }
 5793         resource_list_free(rl);
 5794 
 5795         pci_freecfg(dinfo);
 5796 }
 5797 
 5798 void
 5799 pci_delete_resource(device_t dev, device_t child, int type, int rid)
 5800 {
 5801         struct pci_devinfo *dinfo;
 5802         struct resource_list *rl;
 5803         struct resource_list_entry *rle;
 5804 
 5805         if (device_get_parent(child) != dev)
 5806                 return;
 5807 
 5808         dinfo = device_get_ivars(child);
 5809         rl = &dinfo->resources;
 5810         rle = resource_list_find(rl, type, rid);
 5811         if (rle == NULL)
 5812                 return;
 5813 
 5814         if (rle->res) {
 5815                 if (rman_get_flags(rle->res) & RF_ACTIVE ||
 5816                     resource_list_busy(rl, type, rid)) {
 5817                         device_printf(dev, "delete_resource: "
 5818                             "Resource still owned by child, oops. "
 5819                             "(type=%d, rid=%d, addr=%jx)\n",
 5820                             type, rid, rman_get_start(rle->res));
 5821                         return;
 5822                 }
 5823                 resource_list_unreserve(rl, dev, child, type, rid);
 5824         }
 5825         resource_list_delete(rl, type, rid);
 5826 }
 5827 
 5828 struct resource_list *
 5829 pci_get_resource_list (device_t dev, device_t child)
 5830 {
 5831         struct pci_devinfo *dinfo = device_get_ivars(child);
 5832 
 5833         return (&dinfo->resources);
 5834 }
 5835 
 5836 #ifdef IOMMU
 5837 bus_dma_tag_t
 5838 pci_get_dma_tag(device_t bus, device_t dev)
 5839 {
 5840         bus_dma_tag_t tag;
 5841         struct pci_softc *sc;
 5842 
 5843         if (device_get_parent(dev) == bus) {
 5844                 /* try iommu and return if it works */
 5845                 tag = iommu_get_dma_tag(bus, dev);
 5846         } else
 5847                 tag = NULL;
 5848         if (tag == NULL) {
 5849                 sc = device_get_softc(bus);
 5850                 tag = sc->sc_dma_tag;
 5851         }
 5852         return (tag);
 5853 }
 5854 #else
 5855 bus_dma_tag_t
 5856 pci_get_dma_tag(device_t bus, device_t dev)
 5857 {
 5858         struct pci_softc *sc = device_get_softc(bus);
 5859 
 5860         return (sc->sc_dma_tag);
 5861 }
 5862 #endif
 5863 
 5864 uint32_t
 5865 pci_read_config_method(device_t dev, device_t child, int reg, int width)
 5866 {
 5867         struct pci_devinfo *dinfo = device_get_ivars(child);
 5868         pcicfgregs *cfg = &dinfo->cfg;
 5869 
 5870 #ifdef PCI_IOV
 5871         /*
 5872          * SR-IOV VFs don't implement the VID or DID registers, so we have to
 5873          * emulate them here.
 5874          */
 5875         if (cfg->flags & PCICFG_VF) {
 5876                 if (reg == PCIR_VENDOR) {
 5877                         switch (width) {
 5878                         case 4:
 5879                                 return (cfg->device << 16 | cfg->vendor);
 5880                         case 2:
 5881                                 return (cfg->vendor);
 5882                         case 1:
 5883                                 return (cfg->vendor & 0xff);
 5884                         default:
 5885                                 return (0xffffffff);
 5886                         }
 5887                 } else if (reg == PCIR_DEVICE) {
 5888                         switch (width) {
 5889                         /* Note that an unaligned 4-byte read is an error. */
 5890                         case 2:
 5891                                 return (cfg->device);
 5892                         case 1:
 5893                                 return (cfg->device & 0xff);
 5894                         default:
 5895                                 return (0xffffffff);
 5896                         }
 5897                 }
 5898         }
 5899 #endif
 5900 
 5901         return (PCIB_READ_CONFIG(device_get_parent(dev),
 5902             cfg->bus, cfg->slot, cfg->func, reg, width));
 5903 }
 5904 
 5905 void
 5906 pci_write_config_method(device_t dev, device_t child, int reg,
 5907     uint32_t val, int width)
 5908 {
 5909         struct pci_devinfo *dinfo = device_get_ivars(child);
 5910         pcicfgregs *cfg = &dinfo->cfg;
 5911 
 5912         PCIB_WRITE_CONFIG(device_get_parent(dev),
 5913             cfg->bus, cfg->slot, cfg->func, reg, val, width);
 5914 }
 5915 
 5916 int
 5917 pci_child_location_method(device_t dev, device_t child, struct sbuf *sb)
 5918 {
 5919 
 5920         sbuf_printf(sb, "slot=%d function=%d dbsf=pci%d:%d:%d:%d",
 5921             pci_get_slot(child), pci_get_function(child), pci_get_domain(child),
 5922             pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
 5923         return (0);
 5924 }
 5925 
 5926 int
 5927 pci_child_pnpinfo_method(device_t dev, device_t child, struct sbuf *sb)
 5928 {
 5929         struct pci_devinfo *dinfo;
 5930         pcicfgregs *cfg;
 5931 
 5932         dinfo = device_get_ivars(child);
 5933         cfg = &dinfo->cfg;
 5934         sbuf_printf(sb, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
 5935             "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
 5936             cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
 5937             cfg->progif);
 5938         return (0);
 5939 }
 5940 
 5941 int
 5942 pci_get_device_path_method(device_t bus, device_t child, const char *locator,
 5943     struct sbuf *sb)
 5944 {
 5945         device_t parent = device_get_parent(bus);
 5946         int rv;
 5947 
 5948         if (strcmp(locator, BUS_LOCATOR_UEFI) == 0) {
 5949                 rv = bus_generic_get_device_path(parent, bus, locator, sb);
 5950                 if (rv == 0) {
 5951                         sbuf_printf(sb, "/Pci(0x%x,0x%x)", pci_get_slot(child),
 5952                             pci_get_function(child));
 5953                 }
 5954                 return (0);
 5955         }
 5956         return (bus_generic_get_device_path(bus, child, locator, sb));
 5957 }
 5958 
 5959 int
 5960 pci_assign_interrupt_method(device_t dev, device_t child)
 5961 {
 5962         struct pci_devinfo *dinfo = device_get_ivars(child);
 5963         pcicfgregs *cfg = &dinfo->cfg;
 5964 
 5965         return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
 5966             cfg->intpin));
 5967 }
 5968 
 5969 static void
 5970 pci_lookup(void *arg, const char *name, device_t *dev)
 5971 {
 5972         long val;
 5973         char *end;
 5974         int domain, bus, slot, func;
 5975 
 5976         if (*dev != NULL)
 5977                 return;
 5978 
 5979         /*
 5980          * Accept pciconf-style selectors of either pciD:B:S:F or
 5981          * pciB:S:F.  In the latter case, the domain is assumed to
 5982          * be zero.
 5983          */
 5984         if (strncmp(name, "pci", 3) != 0)
 5985                 return;
 5986         val = strtol(name + 3, &end, 10);
 5987         if (val < 0 || val > INT_MAX || *end != ':')
 5988                 return;
 5989         domain = val;
 5990         val = strtol(end + 1, &end, 10);
 5991         if (val < 0 || val > INT_MAX || *end != ':')
 5992                 return;
 5993         bus = val;
 5994         val = strtol(end + 1, &end, 10);
 5995         if (val < 0 || val > INT_MAX)
 5996                 return;
 5997         slot = val;
 5998         if (*end == ':') {
 5999                 val = strtol(end + 1, &end, 10);
 6000                 if (val < 0 || val > INT_MAX || *end != '\0')
 6001                         return;
 6002                 func = val;
 6003         } else if (*end == '\0') {
 6004                 func = slot;
 6005                 slot = bus;
 6006                 bus = domain;
 6007                 domain = 0;
 6008         } else
 6009                 return;
 6010 
 6011         if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
 6012             func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
 6013                 return;
 6014 
 6015         *dev = pci_find_dbsf(domain, bus, slot, func);
 6016 }
 6017 
 6018 static int
 6019 pci_modevent(module_t mod, int what, void *arg)
 6020 {
 6021         static struct cdev *pci_cdev;
 6022         static eventhandler_tag tag;
 6023 
 6024         switch (what) {
 6025         case MOD_LOAD:
 6026                 STAILQ_INIT(&pci_devq);
 6027                 pci_generation = 0;
 6028                 pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
 6029                     "pci");
 6030                 pci_load_vendor_data();
 6031                 tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
 6032                     1000);
 6033                 break;
 6034 
 6035         case MOD_UNLOAD:
 6036                 if (tag != NULL)
 6037                         EVENTHANDLER_DEREGISTER(dev_lookup, tag);
 6038                 destroy_dev(pci_cdev);
 6039                 break;
 6040         }
 6041 
 6042         return (0);
 6043 }
 6044 
 6045 static void
 6046 pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
 6047 {
 6048 #define WREG(n, v)      pci_write_config(dev, pos + (n), (v), 2)
 6049         struct pcicfg_pcie *cfg;
 6050         int version, pos;
 6051 
 6052         cfg = &dinfo->cfg.pcie;
 6053         pos = cfg->pcie_location;
 6054 
 6055         version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
 6056 
 6057         WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
 6058 
 6059         if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
 6060             cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
 6061             cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
 6062                 WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
 6063 
 6064         if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
 6065             (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
 6066              (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
 6067                 WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
 6068 
 6069         if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
 6070             cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
 6071                 WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
 6072 
 6073         if (version > 1) {
 6074                 WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
 6075                 WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
 6076                 WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
 6077         }
 6078 #undef WREG
 6079 }
 6080 
 6081 static void
 6082 pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
 6083 {
 6084         pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
 6085             dinfo->cfg.pcix.pcix_command,  2);
 6086 }
 6087 
 6088 void
 6089 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
 6090 {
 6091 
 6092         /*
 6093          * Restore the device to full power mode.  We must do this
 6094          * before we restore the registers because moving from D3 to
 6095          * D0 will cause the chip's BARs and some other registers to
 6096          * be reset to some unknown power on reset values.  Cut down
 6097          * the noise on boot by doing nothing if we are already in
 6098          * state D0.
 6099          */
 6100         if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
 6101                 pci_set_powerstate(dev, PCI_POWERSTATE_D0);
 6102         pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
 6103         pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
 6104         pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
 6105         pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
 6106         pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
 6107         pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
 6108         switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
 6109         case PCIM_HDRTYPE_NORMAL:
 6110                 pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
 6111                 pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
 6112                 break;
 6113         case PCIM_HDRTYPE_BRIDGE:
 6114                 pci_write_config(dev, PCIR_SECLAT_1,
 6115                     dinfo->cfg.bridge.br_seclat, 1);
 6116                 pci_write_config(dev, PCIR_SUBBUS_1,
 6117                     dinfo->cfg.bridge.br_subbus, 1);
 6118                 pci_write_config(dev, PCIR_SECBUS_1,
 6119                     dinfo->cfg.bridge.br_secbus, 1);
 6120                 pci_write_config(dev, PCIR_PRIBUS_1,
 6121                     dinfo->cfg.bridge.br_pribus, 1);
 6122                 pci_write_config(dev, PCIR_BRIDGECTL_1,
 6123                     dinfo->cfg.bridge.br_control, 2);
 6124                 break;
 6125         case PCIM_HDRTYPE_CARDBUS:
 6126                 pci_write_config(dev, PCIR_SECLAT_2,
 6127                     dinfo->cfg.bridge.br_seclat, 1);
 6128                 pci_write_config(dev, PCIR_SUBBUS_2,
 6129                     dinfo->cfg.bridge.br_subbus, 1);
 6130                 pci_write_config(dev, PCIR_SECBUS_2,
 6131                     dinfo->cfg.bridge.br_secbus, 1);
 6132                 pci_write_config(dev, PCIR_PRIBUS_2,
 6133                     dinfo->cfg.bridge.br_pribus, 1);
 6134                 pci_write_config(dev, PCIR_BRIDGECTL_2,
 6135                     dinfo->cfg.bridge.br_control, 2);
 6136                 break;
 6137         }
 6138         pci_restore_bars(dev);
 6139 
 6140         if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_BRIDGE)
 6141                 pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
 6142 
 6143         /*
 6144          * Restore extended capabilities for PCI-Express and PCI-X
 6145          */
 6146         if (dinfo->cfg.pcie.pcie_location != 0)
 6147                 pci_cfg_restore_pcie(dev, dinfo);
 6148         if (dinfo->cfg.pcix.pcix_location != 0)
 6149                 pci_cfg_restore_pcix(dev, dinfo);
 6150 
 6151         /* Restore MSI and MSI-X configurations if they are present. */
 6152         if (dinfo->cfg.msi.msi_location != 0)
 6153                 pci_resume_msi(dev);
 6154         if (dinfo->cfg.msix.msix_location != 0)
 6155                 pci_resume_msix(dev);
 6156 
 6157 #ifdef PCI_IOV
 6158         if (dinfo->cfg.iov != NULL)
 6159                 pci_iov_cfg_restore(dev, dinfo);
 6160 #endif
 6161 }
 6162 
 6163 static void
 6164 pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
 6165 {
 6166 #define RREG(n) pci_read_config(dev, pos + (n), 2)
 6167         struct pcicfg_pcie *cfg;
 6168         int version, pos;
 6169 
 6170         cfg = &dinfo->cfg.pcie;
 6171         pos = cfg->pcie_location;
 6172 
 6173         cfg->pcie_flags = RREG(PCIER_FLAGS);
 6174 
 6175         version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
 6176 
 6177         cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
 6178 
 6179         if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
 6180             cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
 6181             cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
 6182                 cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
 6183 
 6184         if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
 6185             (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
 6186              (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
 6187                 cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
 6188 
 6189         if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
 6190             cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
 6191                 cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
 6192 
 6193         if (version > 1) {
 6194                 cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
 6195                 cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
 6196                 cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
 6197         }
 6198 #undef RREG
 6199 }
 6200 
 6201 static void
 6202 pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
 6203 {
 6204         dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
 6205             dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
 6206 }
 6207 
 6208 void
 6209 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
 6210 {
 6211         uint32_t cls;
 6212         int ps;
 6213 
 6214         /*
 6215          * Some drivers apparently write to these registers w/o updating our
 6216          * cached copy.  No harm happens if we update the copy, so do so here
 6217          * so we can restore them.  The COMMAND register is modified by the
 6218          * bus w/o updating the cache.  This should represent the normally
 6219          * writable portion of the 'defined' part of type 0/1/2 headers.
 6220          */
 6221         dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
 6222         dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
 6223         dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
 6224         dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
 6225         dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
 6226         dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
 6227         dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
 6228         dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
 6229         dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
 6230         dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
 6231         dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
 6232         switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
 6233         case PCIM_HDRTYPE_NORMAL:
 6234                 dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
 6235                 dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
 6236                 dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
 6237                 dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
 6238                 break;
 6239         case PCIM_HDRTYPE_BRIDGE:
 6240                 dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
 6241                     PCIR_SECLAT_1, 1);
 6242                 dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
 6243                     PCIR_SUBBUS_1, 1);
 6244                 dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
 6245                     PCIR_SECBUS_1, 1);
 6246                 dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
 6247                     PCIR_PRIBUS_1, 1);
 6248                 dinfo->cfg.bridge.br_control = pci_read_config(dev,
 6249                     PCIR_BRIDGECTL_1, 2);
 6250                 break;
 6251         case PCIM_HDRTYPE_CARDBUS:
 6252                 dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
 6253                     PCIR_SECLAT_2, 1);
 6254                 dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
 6255                     PCIR_SUBBUS_2, 1);
 6256                 dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
 6257                     PCIR_SECBUS_2, 1);
 6258                 dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
 6259                     PCIR_PRIBUS_2, 1);
 6260                 dinfo->cfg.bridge.br_control = pci_read_config(dev,
 6261                     PCIR_BRIDGECTL_2, 2);
 6262                 dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
 6263                 dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
 6264                 break;
 6265         }
 6266 
 6267         if (dinfo->cfg.pcie.pcie_location != 0)
 6268                 pci_cfg_save_pcie(dev, dinfo);
 6269 
 6270         if (dinfo->cfg.pcix.pcix_location != 0)
 6271                 pci_cfg_save_pcix(dev, dinfo);
 6272 
 6273 #ifdef PCI_IOV
 6274         if (dinfo->cfg.iov != NULL)
 6275                 pci_iov_cfg_save(dev, dinfo);
 6276 #endif
 6277 
 6278         /*
 6279          * don't set the state for display devices, base peripherals and
 6280          * memory devices since bad things happen when they are powered down.
 6281          * We should (a) have drivers that can easily detach and (b) use
 6282          * generic drivers for these devices so that some device actually
 6283          * attaches.  We need to make sure that when we implement (a) we don't
 6284          * power the device down on a reattach.
 6285          */
 6286         cls = pci_get_class(dev);
 6287         if (!setstate)
 6288                 return;
 6289         switch (pci_do_power_nodriver)
 6290         {
 6291                 case 0:         /* NO powerdown at all */
 6292                         return;
 6293                 case 1:         /* Conservative about what to power down */
 6294                         if (cls == PCIC_STORAGE)
 6295                                 return;
 6296                         /*FALLTHROUGH*/
 6297                 case 2:         /* Aggressive about what to power down */
 6298                         if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
 6299                             cls == PCIC_BASEPERIPH)
 6300                                 return;
 6301                         /*FALLTHROUGH*/
 6302                 case 3:         /* Power down everything */
 6303                         break;
 6304         }
 6305         /*
 6306          * PCI spec says we can only go into D3 state from D0 state.
 6307          * Transition from D[12] into D0 before going to D3 state.
 6308          */
 6309         ps = pci_get_powerstate(dev);
 6310         if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
 6311                 pci_set_powerstate(dev, PCI_POWERSTATE_D0);
 6312         if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
 6313                 pci_set_powerstate(dev, PCI_POWERSTATE_D3);
 6314 }
 6315 
 6316 /* Wrapper APIs suitable for device driver use. */
 6317 void
 6318 pci_save_state(device_t dev)
 6319 {
 6320         struct pci_devinfo *dinfo;
 6321 
 6322         dinfo = device_get_ivars(dev);
 6323         pci_cfg_save(dev, dinfo, 0);
 6324 }
 6325 
 6326 void
 6327 pci_restore_state(device_t dev)
 6328 {
 6329         struct pci_devinfo *dinfo;
 6330 
 6331         dinfo = device_get_ivars(dev);
 6332         pci_cfg_restore(dev, dinfo);
 6333 }
 6334 
 6335 static int
 6336 pci_get_id_method(device_t dev, device_t child, enum pci_id_type type,
 6337     uintptr_t *id)
 6338 {
 6339 
 6340         return (PCIB_GET_ID(device_get_parent(dev), child, type, id));
 6341 }
 6342 
 6343 /* Find the upstream port of a given PCI device in a root complex. */
 6344 device_t
 6345 pci_find_pcie_root_port(device_t dev)
 6346 {
 6347         struct pci_devinfo *dinfo;
 6348         devclass_t pci_class;
 6349         device_t pcib, bus;
 6350 
 6351         pci_class = devclass_find("pci");
 6352         KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
 6353             ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
 6354 
 6355         /*
 6356          * Walk the bridge hierarchy until we find a PCI-e root
 6357          * port or a non-PCI device.
 6358          */
 6359         for (;;) {
 6360                 bus = device_get_parent(dev);
 6361                 KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
 6362                     device_get_nameunit(dev)));
 6363 
 6364                 pcib = device_get_parent(bus);
 6365                 KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
 6366                     device_get_nameunit(bus)));
 6367 
 6368                 /*
 6369                  * pcib's parent must be a PCI bus for this to be a
 6370                  * PCI-PCI bridge.
 6371                  */
 6372                 if (device_get_devclass(device_get_parent(pcib)) != pci_class)
 6373                         return (NULL);
 6374 
 6375                 dinfo = device_get_ivars(pcib);
 6376                 if (dinfo->cfg.pcie.pcie_location != 0 &&
 6377                     dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
 6378                         return (pcib);
 6379 
 6380                 dev = pcib;
 6381         }
 6382 }
 6383 
 6384 /*
 6385  * Wait for pending transactions to complete on a PCI-express function.
 6386  *
 6387  * The maximum delay is specified in milliseconds in max_delay.  Note
 6388  * that this function may sleep.
 6389  *
 6390  * Returns true if the function is idle and false if the timeout is
 6391  * exceeded.  If dev is not a PCI-express function, this returns true.
 6392  */
 6393 bool
 6394 pcie_wait_for_pending_transactions(device_t dev, u_int max_delay)
 6395 {
 6396         struct pci_devinfo *dinfo = device_get_ivars(dev);
 6397         uint16_t sta;
 6398         int cap;
 6399 
 6400         cap = dinfo->cfg.pcie.pcie_location;
 6401         if (cap == 0)
 6402                 return (true);
 6403 
 6404         sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
 6405         while (sta & PCIEM_STA_TRANSACTION_PND) {
 6406                 if (max_delay == 0)
 6407                         return (false);
 6408 
 6409                 /* Poll once every 100 milliseconds up to the timeout. */
 6410                 if (max_delay > 100) {
 6411                         pause_sbt("pcietp", 100 * SBT_1MS, 0, C_HARDCLOCK);
 6412                         max_delay -= 100;
 6413                 } else {
 6414                         pause_sbt("pcietp", max_delay * SBT_1MS, 0,
 6415                             C_HARDCLOCK);
 6416                         max_delay = 0;
 6417                 }
 6418                 sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
 6419         }
 6420 
 6421         return (true);
 6422 }
 6423 
 6424 /*
 6425  * Determine the maximum Completion Timeout in microseconds.
 6426  *
 6427  * For non-PCI-express functions this returns 0.
 6428  */
 6429 int
 6430 pcie_get_max_completion_timeout(device_t dev)
 6431 {
 6432         struct pci_devinfo *dinfo = device_get_ivars(dev);
 6433         int cap;
 6434 
 6435         cap = dinfo->cfg.pcie.pcie_location;
 6436         if (cap == 0)
 6437                 return (0);
 6438 
 6439         /*
 6440          * Functions using the 1.x spec use the default timeout range of
 6441          * 50 microseconds to 50 milliseconds.  Functions that do not
 6442          * support programmable timeouts also use this range.
 6443          */
 6444         if ((dinfo->cfg.pcie.pcie_flags & PCIEM_FLAGS_VERSION) < 2 ||
 6445             (pci_read_config(dev, cap + PCIER_DEVICE_CAP2, 4) &
 6446             PCIEM_CAP2_COMP_TIMO_RANGES) == 0)
 6447                 return (50 * 1000);
 6448 
 6449         switch (pci_read_config(dev, cap + PCIER_DEVICE_CTL2, 2) &
 6450             PCIEM_CTL2_COMP_TIMO_VAL) {
 6451         case PCIEM_CTL2_COMP_TIMO_100US:
 6452                 return (100);
 6453         case PCIEM_CTL2_COMP_TIMO_10MS:
 6454                 return (10 * 1000);
 6455         case PCIEM_CTL2_COMP_TIMO_55MS:
 6456                 return (55 * 1000);
 6457         case PCIEM_CTL2_COMP_TIMO_210MS:
 6458                 return (210 * 1000);
 6459         case PCIEM_CTL2_COMP_TIMO_900MS:
 6460                 return (900 * 1000);
 6461         case PCIEM_CTL2_COMP_TIMO_3500MS:
 6462                 return (3500 * 1000);
 6463         case PCIEM_CTL2_COMP_TIMO_13S:
 6464                 return (13 * 1000 * 1000);
 6465         case PCIEM_CTL2_COMP_TIMO_64S:
 6466                 return (64 * 1000 * 1000);
 6467         default:
 6468                 return (50 * 1000);
 6469         }
 6470 }
 6471 
 6472 void
 6473 pcie_apei_error(device_t dev, int sev, uint8_t *aerp)
 6474 {
 6475         struct pci_devinfo *dinfo = device_get_ivars(dev);
 6476         const char *s;
 6477         int aer;
 6478         uint32_t r, r1;
 6479         uint16_t rs;
 6480 
 6481         if (sev == PCIEM_STA_CORRECTABLE_ERROR)
 6482                 s = "Correctable";
 6483         else if (sev == PCIEM_STA_NON_FATAL_ERROR)
 6484                 s = "Uncorrectable (Non-Fatal)";
 6485         else
 6486                 s = "Uncorrectable (Fatal)";
 6487         device_printf(dev, "%s PCIe error reported by APEI\n", s);
 6488         if (aerp) {
 6489                 if (sev == PCIEM_STA_CORRECTABLE_ERROR) {
 6490                         r = le32dec(aerp + PCIR_AER_COR_STATUS);
 6491                         r1 = le32dec(aerp + PCIR_AER_COR_MASK);
 6492                 } else {
 6493                         r = le32dec(aerp + PCIR_AER_UC_STATUS);
 6494                         r1 = le32dec(aerp + PCIR_AER_UC_MASK);
 6495                 }
 6496                 device_printf(dev, "status 0x%08x mask 0x%08x", r, r1);
 6497                 if (sev != PCIEM_STA_CORRECTABLE_ERROR) {
 6498                         r = le32dec(aerp + PCIR_AER_UC_SEVERITY);
 6499                         rs = le16dec(aerp + PCIR_AER_CAP_CONTROL);
 6500                         printf(" severity 0x%08x first %d\n",
 6501                             r, rs & 0x1f);
 6502                 } else
 6503                         printf("\n");
 6504         }
 6505 
 6506         /* As kind of recovery just report and clear the error statuses. */
 6507         if (pci_find_extcap(dev, PCIZ_AER, &aer) == 0) {
 6508                 r = pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
 6509                 if (r != 0) {
 6510                         pci_write_config(dev, aer + PCIR_AER_UC_STATUS, r, 4);
 6511                         device_printf(dev, "Clearing UC AER errors 0x%08x\n", r);
 6512                 }
 6513 
 6514                 r = pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
 6515                 if (r != 0) {
 6516                         pci_write_config(dev, aer + PCIR_AER_COR_STATUS, r, 4);
 6517                         device_printf(dev, "Clearing COR AER errors 0x%08x\n", r);
 6518                 }
 6519         }
 6520         if (dinfo->cfg.pcie.pcie_location != 0) {
 6521                 rs = pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
 6522                     PCIER_DEVICE_STA, 2);
 6523                 if ((rs & (PCIEM_STA_CORRECTABLE_ERROR |
 6524                     PCIEM_STA_NON_FATAL_ERROR | PCIEM_STA_FATAL_ERROR |
 6525                     PCIEM_STA_UNSUPPORTED_REQ)) != 0) {
 6526                         pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
 6527                             PCIER_DEVICE_STA, rs, 2);
 6528                         device_printf(dev, "Clearing PCIe errors 0x%04x\n", rs);
 6529                 }
 6530         }
 6531 }
 6532 
 6533 /*
 6534  * Perform a Function Level Reset (FLR) on a device.
 6535  *
 6536  * This function first waits for any pending transactions to complete
 6537  * within the timeout specified by max_delay.  If transactions are
 6538  * still pending, the function will return false without attempting a
 6539  * reset.
 6540  *
 6541  * If dev is not a PCI-express function or does not support FLR, this
 6542  * function returns false.
 6543  *
 6544  * Note that no registers are saved or restored.  The caller is
 6545  * responsible for saving and restoring any registers including
 6546  * PCI-standard registers via pci_save_state() and
 6547  * pci_restore_state().
 6548  */
 6549 bool
 6550 pcie_flr(device_t dev, u_int max_delay, bool force)
 6551 {
 6552         struct pci_devinfo *dinfo = device_get_ivars(dev);
 6553         uint16_t cmd, ctl;
 6554         int compl_delay;
 6555         int cap;
 6556 
 6557         cap = dinfo->cfg.pcie.pcie_location;
 6558         if (cap == 0)
 6559                 return (false);
 6560 
 6561         if (!(pci_read_config(dev, cap + PCIER_DEVICE_CAP, 4) & PCIEM_CAP_FLR))
 6562                 return (false);
 6563 
 6564         /*
 6565          * Disable busmastering to prevent generation of new
 6566          * transactions while waiting for the device to go idle.  If
 6567          * the idle timeout fails, the command register is restored
 6568          * which will re-enable busmastering.
 6569          */
 6570         cmd = pci_read_config(dev, PCIR_COMMAND, 2);
 6571         pci_write_config(dev, PCIR_COMMAND, cmd & ~(PCIM_CMD_BUSMASTEREN), 2);
 6572         if (!pcie_wait_for_pending_transactions(dev, max_delay)) {
 6573                 if (!force) {
 6574                         pci_write_config(dev, PCIR_COMMAND, cmd, 2);
 6575                         return (false);
 6576                 }
 6577                 pci_printf(&dinfo->cfg,
 6578                     "Resetting with transactions pending after %d ms\n",
 6579                     max_delay);
 6580 
 6581                 /*
 6582                  * Extend the post-FLR delay to cover the maximum
 6583                  * Completion Timeout delay of anything in flight
 6584                  * during the FLR delay.  Enforce a minimum delay of
 6585                  * at least 10ms.
 6586                  */
 6587                 compl_delay = pcie_get_max_completion_timeout(dev) / 1000;
 6588                 if (compl_delay < 10)
 6589                         compl_delay = 10;
 6590         } else
 6591                 compl_delay = 0;
 6592 
 6593         /* Initiate the reset. */
 6594         ctl = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
 6595         pci_write_config(dev, cap + PCIER_DEVICE_CTL, ctl |
 6596             PCIEM_CTL_INITIATE_FLR, 2);
 6597 
 6598         /* Wait for 100ms. */
 6599         pause_sbt("pcieflr", (100 + compl_delay) * SBT_1MS, 0, C_HARDCLOCK);
 6600 
 6601         if (pci_read_config(dev, cap + PCIER_DEVICE_STA, 2) &
 6602             PCIEM_STA_TRANSACTION_PND)
 6603                 pci_printf(&dinfo->cfg, "Transactions pending after FLR!\n");
 6604         return (true);
 6605 }
 6606 
 6607 /*
 6608  * Attempt a power-management reset by cycling the device in/out of D3
 6609  * state.  PCI spec says we can only go into D3 state from D0 state.
 6610  * Transition from D[12] into D0 before going to D3 state.
 6611  */
 6612 int
 6613 pci_power_reset(device_t dev)
 6614 {
 6615         int ps;
 6616 
 6617         ps = pci_get_powerstate(dev);
 6618         if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
 6619                 pci_set_powerstate(dev, PCI_POWERSTATE_D0);
 6620         pci_set_powerstate(dev, PCI_POWERSTATE_D3);
 6621         pci_set_powerstate(dev, ps);
 6622         return (0);
 6623 }
 6624 
 6625 /*
 6626  * Try link drop and retrain of the downstream port of upstream
 6627  * switch, for PCIe.  According to the PCIe 3.0 spec 6.6.1, this must
 6628  * cause Conventional Hot reset of the device in the slot.
 6629  * Alternative, for PCIe, could be the secondary bus reset initiatied
 6630  * on the upstream switch PCIR_BRIDGECTL_1, bit 6.
 6631  */
 6632 int
 6633 pcie_link_reset(device_t port, int pcie_location)
 6634 {
 6635         uint16_t v;
 6636 
 6637         v = pci_read_config(port, pcie_location + PCIER_LINK_CTL, 2);
 6638         v |= PCIEM_LINK_CTL_LINK_DIS;
 6639         pci_write_config(port, pcie_location + PCIER_LINK_CTL, v, 2);
 6640         pause_sbt("pcier1", mstosbt(20), 0, 0);
 6641         v &= ~PCIEM_LINK_CTL_LINK_DIS;
 6642         v |= PCIEM_LINK_CTL_RETRAIN_LINK;
 6643         pci_write_config(port, pcie_location + PCIER_LINK_CTL, v, 2);
 6644         pause_sbt("pcier2", mstosbt(100), 0, 0); /* 100 ms */
 6645         v = pci_read_config(port, pcie_location + PCIER_LINK_STA, 2);
 6646         return ((v & PCIEM_LINK_STA_TRAINING) != 0 ? ETIMEDOUT : 0);
 6647 }
 6648 
 6649 static int
 6650 pci_reset_post(device_t dev, device_t child)
 6651 {
 6652 
 6653         if (dev == device_get_parent(child))
 6654                 pci_restore_state(child);
 6655         return (0);
 6656 }
 6657 
 6658 static int
 6659 pci_reset_prepare(device_t dev, device_t child)
 6660 {
 6661 
 6662         if (dev == device_get_parent(child))
 6663                 pci_save_state(child);
 6664         return (0);
 6665 }
 6666 
 6667 static int
 6668 pci_reset_child(device_t dev, device_t child, int flags)
 6669 {
 6670         int error;
 6671 
 6672         if (dev == NULL || device_get_parent(child) != dev)
 6673                 return (0);
 6674         if ((flags & DEVF_RESET_DETACH) != 0) {
 6675                 error = device_get_state(child) == DS_ATTACHED ?
 6676                     device_detach(child) : 0;
 6677         } else {
 6678                 error = BUS_SUSPEND_CHILD(dev, child);
 6679         }
 6680         if (error == 0) {
 6681                 if (!pcie_flr(child, 1000, false)) {
 6682                         error = BUS_RESET_PREPARE(dev, child);
 6683                         if (error == 0)
 6684                                 pci_power_reset(child);
 6685                         BUS_RESET_POST(dev, child);
 6686                 }
 6687                 if ((flags & DEVF_RESET_DETACH) != 0)
 6688                         device_probe_and_attach(child);
 6689                 else
 6690                         BUS_RESUME_CHILD(dev, child);
 6691         }
 6692         return (error);
 6693 }
 6694 
 6695 const struct pci_device_table *
 6696 pci_match_device(device_t child, const struct pci_device_table *id, size_t nelt)
 6697 {
 6698         bool match;
 6699         uint16_t vendor, device, subvendor, subdevice, class, subclass, revid;
 6700 
 6701         vendor = pci_get_vendor(child);
 6702         device = pci_get_device(child);
 6703         subvendor = pci_get_subvendor(child);
 6704         subdevice = pci_get_subdevice(child);
 6705         class = pci_get_class(child);
 6706         subclass = pci_get_subclass(child);
 6707         revid = pci_get_revid(child);
 6708         while (nelt-- > 0) {
 6709                 match = true;
 6710                 if (id->match_flag_vendor)
 6711                         match &= vendor == id->vendor;
 6712                 if (id->match_flag_device)
 6713                         match &= device == id->device;
 6714                 if (id->match_flag_subvendor)
 6715                         match &= subvendor == id->subvendor;
 6716                 if (id->match_flag_subdevice)
 6717                         match &= subdevice == id->subdevice;
 6718                 if (id->match_flag_class)
 6719                         match &= class == id->class_id;
 6720                 if (id->match_flag_subclass)
 6721                         match &= subclass == id->subclass;
 6722                 if (id->match_flag_revid)
 6723                         match &= revid == id->revid;
 6724                 if (match)
 6725                         return (id);
 6726                 id++;
 6727         }
 6728         return (NULL);
 6729 }
 6730 
 6731 static void
 6732 pci_print_faulted_dev_name(const struct pci_devinfo *dinfo)
 6733 {
 6734         const char *dev_name;
 6735         device_t dev;
 6736 
 6737         dev = dinfo->cfg.dev;
 6738         printf("pci%d:%d:%d:%d", dinfo->cfg.domain, dinfo->cfg.bus,
 6739             dinfo->cfg.slot, dinfo->cfg.func);
 6740         dev_name = device_get_name(dev);
 6741         if (dev_name != NULL)
 6742                 printf(" (%s%d)", dev_name, device_get_unit(dev));
 6743 }
 6744 
 6745 void
 6746 pci_print_faulted_dev(void)
 6747 {
 6748         struct pci_devinfo *dinfo;
 6749         device_t dev;
 6750         int aer, i;
 6751         uint32_t r1, r2;
 6752         uint16_t status;
 6753 
 6754         STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
 6755                 dev = dinfo->cfg.dev;
 6756                 status = pci_read_config(dev, PCIR_STATUS, 2);
 6757                 status &= PCIM_STATUS_MDPERR | PCIM_STATUS_STABORT |
 6758                     PCIM_STATUS_RTABORT | PCIM_STATUS_RMABORT |
 6759                     PCIM_STATUS_SERR | PCIM_STATUS_PERR;
 6760                 if (status != 0) {
 6761                         pci_print_faulted_dev_name(dinfo);
 6762                         printf(" error 0x%04x\n", status);
 6763                 }
 6764                 if (dinfo->cfg.pcie.pcie_location != 0) {
 6765                         status = pci_read_config(dev,
 6766                             dinfo->cfg.pcie.pcie_location +
 6767                             PCIER_DEVICE_STA, 2);
 6768                         if ((status & (PCIEM_STA_CORRECTABLE_ERROR |
 6769                             PCIEM_STA_NON_FATAL_ERROR | PCIEM_STA_FATAL_ERROR |
 6770                             PCIEM_STA_UNSUPPORTED_REQ)) != 0) {
 6771                                 pci_print_faulted_dev_name(dinfo);
 6772                                 printf(" PCIe DEVCTL 0x%04x DEVSTA 0x%04x\n",
 6773                                     pci_read_config(dev,
 6774                                     dinfo->cfg.pcie.pcie_location +
 6775                                     PCIER_DEVICE_CTL, 2),
 6776                                     status);
 6777                         }
 6778                 }
 6779                 if (pci_find_extcap(dev, PCIZ_AER, &aer) == 0) {
 6780                         r1 = pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
 6781                         r2 = pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
 6782                         if (r1 != 0 || r2 != 0) {
 6783                                 pci_print_faulted_dev_name(dinfo);
 6784                                 printf(" AER UC 0x%08x Mask 0x%08x Svr 0x%08x\n"
 6785                                     "  COR 0x%08x Mask 0x%08x Ctl 0x%08x\n",
 6786                                     r1, pci_read_config(dev, aer +
 6787                                     PCIR_AER_UC_MASK, 4),
 6788                                     pci_read_config(dev, aer +
 6789                                     PCIR_AER_UC_SEVERITY, 4),
 6790                                     r2, pci_read_config(dev, aer +
 6791                                     PCIR_AER_COR_MASK, 4),
 6792                                     pci_read_config(dev, aer +
 6793                                     PCIR_AER_CAP_CONTROL, 4));
 6794                                 for (i = 0; i < 4; i++) {
 6795                                         r1 = pci_read_config(dev, aer +
 6796                                             PCIR_AER_HEADER_LOG + i * 4, 4);
 6797                                         printf("    HL%d: 0x%08x\n", i, r1);
 6798                                 }
 6799                         }
 6800                 }
 6801         }
 6802 }
 6803 
 6804 #ifdef DDB
 6805 DB_SHOW_COMMAND_FLAGS(pcierr, pci_print_faulted_dev_db, DB_CMD_MEMSAFE)
 6806 {
 6807 
 6808         pci_print_faulted_dev();
 6809 }
 6810 
 6811 static void
 6812 db_clear_pcie_errors(const struct pci_devinfo *dinfo)
 6813 {
 6814         device_t dev;
 6815         int aer;
 6816         uint32_t r;
 6817 
 6818         dev = dinfo->cfg.dev;
 6819         r = pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
 6820             PCIER_DEVICE_STA, 2);
 6821         pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
 6822             PCIER_DEVICE_STA, r, 2);
 6823 
 6824         if (pci_find_extcap(dev, PCIZ_AER, &aer) != 0)
 6825                 return;
 6826         r = pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
 6827         if (r != 0)
 6828                 pci_write_config(dev, aer + PCIR_AER_UC_STATUS, r, 4);
 6829         r = pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
 6830         if (r != 0)
 6831                 pci_write_config(dev, aer + PCIR_AER_COR_STATUS, r, 4);
 6832 }
 6833 
 6834 DB_COMMAND_FLAGS(pci_clearerr, db_pci_clearerr, DB_CMD_MEMSAFE)
 6835 {
 6836         struct pci_devinfo *dinfo;
 6837         device_t dev;
 6838         uint16_t status, status1;
 6839 
 6840         STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
 6841                 dev = dinfo->cfg.dev;
 6842                 status1 = status = pci_read_config(dev, PCIR_STATUS, 2);
 6843                 status1 &= PCIM_STATUS_MDPERR | PCIM_STATUS_STABORT |
 6844                     PCIM_STATUS_RTABORT | PCIM_STATUS_RMABORT |
 6845                     PCIM_STATUS_SERR | PCIM_STATUS_PERR;
 6846                 if (status1 != 0) {
 6847                         status &= ~status1;
 6848                         pci_write_config(dev, PCIR_STATUS, status, 2);
 6849                 }
 6850                 if (dinfo->cfg.pcie.pcie_location != 0)
 6851                         db_clear_pcie_errors(dinfo);
 6852         }
 6853 }
 6854 #endif

Cache object: 380bc6e751626dbaf39e5c133e598d4f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.