The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/iflib.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2014-2016, Matthew Macy <mmacy@nextbsd.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions are met:
    7  *
    8  *  1. Redistributions of source code must retain the above copyright notice,
    9  *     this list of conditions and the following disclaimer.
   10  *
   11  *  2. Neither the name of Matthew Macy nor the names of its
   12  *     contributors may be used to endorse or promote products derived from
   13  *     this software without specific prior written permission.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   19  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   22  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   24  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   25  * POSSIBILITY OF SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/11.1/sys/net/iflib.c 318237 2017-05-12 11:40:58Z bz $");
   30 
   31 #include "opt_inet.h"
   32 #include "opt_inet6.h"
   33 #include "opt_acpi.h"
   34 
   35 #include <sys/param.h>
   36 #include <sys/types.h>
   37 #include <sys/bus.h>
   38 #include <sys/eventhandler.h>
   39 #include <sys/sockio.h>
   40 #include <sys/kernel.h>
   41 #include <sys/lock.h>
   42 #include <sys/mutex.h>
   43 #include <sys/module.h>
   44 #include <sys/kobj.h>
   45 #include <sys/rman.h>
   46 #include <sys/sbuf.h>
   47 #include <sys/smp.h>
   48 #include <sys/socket.h>
   49 #include <sys/sysctl.h>
   50 #include <sys/syslog.h>
   51 #include <sys/taskqueue.h>
   52 #include <sys/limits.h>
   53 
   54 
   55 #include <net/if.h>
   56 #include <net/if_var.h>
   57 #include <net/if_types.h>
   58 #include <net/if_media.h>
   59 #include <net/bpf.h>
   60 #include <net/ethernet.h>
   61 #include <net/mp_ring.h>
   62 
   63 #include <netinet/in.h>
   64 #include <netinet/in_pcb.h>
   65 #include <netinet/tcp_lro.h>
   66 #include <netinet/in_systm.h>
   67 #include <netinet/if_ether.h>
   68 #include <netinet/ip.h>
   69 #include <netinet/ip6.h>
   70 #include <netinet/tcp.h>
   71 
   72 #include <machine/bus.h>
   73 #include <machine/in_cksum.h>
   74 
   75 #include <vm/vm.h>
   76 #include <vm/pmap.h>
   77 
   78 #include <dev/led/led.h>
   79 #include <dev/pci/pcireg.h>
   80 #include <dev/pci/pcivar.h>
   81 #include <dev/pci/pci_private.h>
   82 
   83 #include <net/iflib.h>
   84 
   85 #include "ifdi_if.h"
   86 
   87 #if defined(__i386__) || defined(__amd64__)
   88 #include <sys/memdesc.h>
   89 #include <machine/bus.h>
   90 #include <machine/md_var.h>
   91 #include <machine/specialreg.h>
   92 #include <x86/include/busdma_impl.h>
   93 #include <x86/iommu/busdma_dmar.h>
   94 #endif
   95 
   96 
   97 /*
   98  * enable accounting of every mbuf as it comes in to and goes out of iflib's software descriptor references
   99  */
  100 #define MEMORY_LOGGING 0
  101 /*
  102  * Enable mbuf vectors for compressing long mbuf chains
  103  */
  104 
  105 /*
  106  * NB:
  107  * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead
  108  *   we prefetch needs to be determined by the time spent in m_free vis a vis
  109  *   the cost of a prefetch. This will of course vary based on the workload:
  110  *      - NFLX's m_free path is dominated by vm-based M_EXT manipulation which
  111  *        is quite expensive, thus suggesting very little prefetch.
  112  *      - small packet forwarding which is just returning a single mbuf to
  113  *        UMA will typically be very fast vis a vis the cost of a memory
  114  *        access.
  115  */
  116 
  117 
  118 /*
  119  * File organization:
  120  *  - private structures
  121  *  - iflib private utility functions
  122  *  - ifnet functions
  123  *  - vlan registry and other exported functions
  124  *  - iflib public core functions
  125  *
  126  *
  127  */
  128 static MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library");
  129 
  130 struct iflib_txq;
  131 typedef struct iflib_txq *iflib_txq_t;
  132 struct iflib_rxq;
  133 typedef struct iflib_rxq *iflib_rxq_t;
  134 struct iflib_fl;
  135 typedef struct iflib_fl *iflib_fl_t;
  136 
  137 typedef struct iflib_filter_info {
  138         driver_filter_t *ifi_filter;
  139         void *ifi_filter_arg;
  140         struct grouptask *ifi_task;
  141 } *iflib_filter_info_t;
  142 
  143 struct iflib_ctx {
  144         KOBJ_FIELDS;
  145    /*
  146    * Pointer to hardware driver's softc
  147    */
  148         void *ifc_softc;
  149         device_t ifc_dev;
  150         if_t ifc_ifp;
  151 
  152         cpuset_t ifc_cpus;
  153         if_shared_ctx_t ifc_sctx;
  154         struct if_softc_ctx ifc_softc_ctx;
  155 
  156         struct mtx ifc_mtx;
  157 
  158         uint16_t ifc_nhwtxqs;
  159         uint16_t ifc_nhwrxqs;
  160 
  161         iflib_txq_t ifc_txqs;
  162         iflib_rxq_t ifc_rxqs;
  163         uint32_t ifc_if_flags;
  164         uint32_t ifc_flags;
  165         uint32_t ifc_max_fl_buf_size;
  166         int ifc_in_detach;
  167 
  168         int ifc_link_state;
  169         int ifc_link_irq;
  170         int ifc_pause_frames;
  171         int ifc_watchdog_events;
  172         struct cdev *ifc_led_dev;
  173         struct resource *ifc_msix_mem;
  174 
  175         struct if_irq ifc_legacy_irq;
  176         struct grouptask ifc_admin_task;
  177         struct grouptask ifc_vflr_task;
  178         struct iflib_filter_info ifc_filter_info;
  179         struct ifmedia  ifc_media;
  180 
  181         struct sysctl_oid *ifc_sysctl_node;
  182         uint16_t ifc_sysctl_ntxqs;
  183         uint16_t ifc_sysctl_nrxqs;
  184         uint16_t ifc_sysctl_qs_eq_override;
  185 
  186         uint16_t ifc_sysctl_ntxds[8];
  187         uint16_t ifc_sysctl_nrxds[8];
  188         struct if_txrx ifc_txrx;
  189 #define isc_txd_encap  ifc_txrx.ift_txd_encap
  190 #define isc_txd_flush  ifc_txrx.ift_txd_flush
  191 #define isc_txd_credits_update  ifc_txrx.ift_txd_credits_update
  192 #define isc_rxd_available ifc_txrx.ift_rxd_available
  193 #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get
  194 #define isc_rxd_refill ifc_txrx.ift_rxd_refill
  195 #define isc_rxd_flush ifc_txrx.ift_rxd_flush
  196 #define isc_rxd_refill ifc_txrx.ift_rxd_refill
  197 #define isc_rxd_refill ifc_txrx.ift_rxd_refill
  198 #define isc_legacy_intr ifc_txrx.ift_legacy_intr
  199         eventhandler_tag ifc_vlan_attach_event;
  200         eventhandler_tag ifc_vlan_detach_event;
  201         uint8_t ifc_mac[ETHER_ADDR_LEN];
  202         char ifc_mtx_name[16];
  203 };
  204 
  205 
  206 void *
  207 iflib_get_softc(if_ctx_t ctx)
  208 {
  209 
  210         return (ctx->ifc_softc);
  211 }
  212 
  213 device_t
  214 iflib_get_dev(if_ctx_t ctx)
  215 {
  216 
  217         return (ctx->ifc_dev);
  218 }
  219 
  220 if_t
  221 iflib_get_ifp(if_ctx_t ctx)
  222 {
  223 
  224         return (ctx->ifc_ifp);
  225 }
  226 
  227 struct ifmedia *
  228 iflib_get_media(if_ctx_t ctx)
  229 {
  230 
  231         return (&ctx->ifc_media);
  232 }
  233 
  234 void
  235 iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN])
  236 {
  237 
  238         bcopy(mac, ctx->ifc_mac, ETHER_ADDR_LEN);
  239 }
  240 
  241 if_softc_ctx_t
  242 iflib_get_softc_ctx(if_ctx_t ctx)
  243 {
  244 
  245         return (&ctx->ifc_softc_ctx);
  246 }
  247 
  248 if_shared_ctx_t
  249 iflib_get_sctx(if_ctx_t ctx)
  250 {
  251 
  252         return (ctx->ifc_sctx);
  253 }
  254 
  255 #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*))
  256 
  257 #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP)
  258 #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF)
  259 
  260 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
  261 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
  262 #define RX_SW_DESC_INUSE        (1 << 3)
  263 #define TX_SW_DESC_MAPPED       (1 << 4)
  264 
  265 typedef struct iflib_sw_rx_desc {
  266         bus_dmamap_t    ifsd_map;         /* bus_dma map for packet */
  267         struct mbuf    *ifsd_m;           /* rx: uninitialized mbuf */
  268         caddr_t         ifsd_cl;          /* direct cluster pointer for rx */
  269         uint16_t        ifsd_flags;
  270 } *iflib_rxsd_t;
  271 
  272 typedef struct iflib_sw_tx_desc_val {
  273         bus_dmamap_t    ifsd_map;         /* bus_dma map for packet */
  274         struct mbuf    *ifsd_m;           /* pkthdr mbuf */
  275         uint8_t         ifsd_flags;
  276 } *iflib_txsd_val_t;
  277 
  278 typedef struct iflib_sw_tx_desc_array {
  279         bus_dmamap_t    *ifsd_map;         /* bus_dma maps for packet */
  280         struct mbuf    **ifsd_m;           /* pkthdr mbufs */
  281         uint8_t         *ifsd_flags;
  282 } iflib_txsd_array_t;
  283 
  284 
  285 /* magic number that should be high enough for any hardware */
  286 #define IFLIB_MAX_TX_SEGS               128
  287 #define IFLIB_MAX_RX_SEGS               32
  288 #define IFLIB_RX_COPY_THRESH            128
  289 #define IFLIB_MAX_RX_REFRESH            32
  290 #define IFLIB_QUEUE_IDLE                0
  291 #define IFLIB_QUEUE_HUNG                1
  292 #define IFLIB_QUEUE_WORKING             2
  293 
  294 /* this should really scale with ring size - 32 is a fairly arbitrary value for this */
  295 #define TX_BATCH_SIZE                   16
  296 
  297 #define IFLIB_RESTART_BUDGET            8
  298 
  299 #define IFC_LEGACY              0x01
  300 #define IFC_QFLUSH              0x02
  301 #define IFC_MULTISEG            0x04
  302 #define IFC_DMAR                0x08
  303 #define IFC_SC_ALLOCATED        0x10
  304 
  305 #define CSUM_OFFLOAD            (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \
  306                                  CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \
  307                                  CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP)
  308 struct iflib_txq {
  309         uint16_t        ift_in_use;
  310         uint16_t        ift_cidx;
  311         uint16_t        ift_cidx_processed;
  312         uint16_t        ift_pidx;
  313         uint8_t         ift_gen;
  314         uint8_t         ift_db_pending;
  315         uint8_t         ift_db_pending_queued;
  316         uint8_t         ift_npending;
  317         uint8_t         ift_br_offset;
  318         /* implicit pad */
  319         uint64_t        ift_processed;
  320         uint64_t        ift_cleaned;
  321 #if MEMORY_LOGGING
  322         uint64_t        ift_enqueued;
  323         uint64_t        ift_dequeued;
  324 #endif
  325         uint64_t        ift_no_tx_dma_setup;
  326         uint64_t        ift_no_desc_avail;
  327         uint64_t        ift_mbuf_defrag_failed;
  328         uint64_t        ift_mbuf_defrag;
  329         uint64_t        ift_map_failed;
  330         uint64_t        ift_txd_encap_efbig;
  331         uint64_t        ift_pullups;
  332 
  333         struct mtx      ift_mtx;
  334         struct mtx      ift_db_mtx;
  335 
  336         /* constant values */
  337         if_ctx_t        ift_ctx;
  338         struct ifmp_ring        **ift_br;
  339         struct grouptask        ift_task;
  340         uint16_t        ift_size;
  341         uint16_t        ift_id;
  342         struct callout  ift_timer;
  343         struct callout  ift_db_check;
  344 
  345         iflib_txsd_array_t      ift_sds;
  346         uint8_t                 ift_nbr;
  347         uint8_t                 ift_qstatus;
  348         uint8_t                 ift_active;
  349         uint8_t                 ift_closed;
  350         int                     ift_watchdog_time;
  351         struct iflib_filter_info ift_filter_info;
  352         bus_dma_tag_t           ift_desc_tag;
  353         bus_dma_tag_t           ift_tso_desc_tag;
  354         iflib_dma_info_t        ift_ifdi;
  355 #define MTX_NAME_LEN 16
  356         char                    ift_mtx_name[MTX_NAME_LEN];
  357         char                    ift_db_mtx_name[MTX_NAME_LEN];
  358         bus_dma_segment_t       ift_segs[IFLIB_MAX_TX_SEGS]  __aligned(CACHE_LINE_SIZE);
  359 } __aligned(CACHE_LINE_SIZE);
  360 
  361 struct iflib_fl {
  362         uint16_t        ifl_cidx;
  363         uint16_t        ifl_pidx;
  364         uint16_t        ifl_credits;
  365         uint8_t         ifl_gen;
  366 #if MEMORY_LOGGING
  367         uint64_t        ifl_m_enqueued;
  368         uint64_t        ifl_m_dequeued;
  369         uint64_t        ifl_cl_enqueued;
  370         uint64_t        ifl_cl_dequeued;
  371 #endif
  372         /* implicit pad */
  373 
  374         /* constant */
  375         uint16_t        ifl_size;
  376         uint16_t        ifl_buf_size;
  377         uint16_t        ifl_cltype;
  378         uma_zone_t      ifl_zone;
  379         iflib_rxsd_t    ifl_sds;
  380         iflib_rxq_t     ifl_rxq;
  381         uint8_t         ifl_id;
  382         bus_dma_tag_t           ifl_desc_tag;
  383         iflib_dma_info_t        ifl_ifdi;
  384         uint64_t        ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE);
  385         caddr_t         ifl_vm_addrs[IFLIB_MAX_RX_REFRESH];
  386 }  __aligned(CACHE_LINE_SIZE);
  387 
  388 static inline int
  389 get_inuse(int size, int cidx, int pidx, int gen)
  390 {
  391         int used;
  392 
  393         if (pidx > cidx)
  394                 used = pidx - cidx;
  395         else if (pidx < cidx)
  396                 used = size - cidx + pidx;
  397         else if (gen == 0 && pidx == cidx)
  398                 used = 0;
  399         else if (gen == 1 && pidx == cidx)
  400                 used = size;
  401         else
  402                 panic("bad state");
  403 
  404         return (used);
  405 }
  406 
  407 #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen))
  408 
  409 #define IDXDIFF(head, tail, wrap) \
  410         ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head))
  411 
  412 struct iflib_rxq {
  413         /* If there is a separate completion queue -
  414          * these are the cq cidx and pidx. Otherwise
  415          * these are unused.
  416          */
  417         uint16_t        ifr_size;
  418         uint16_t        ifr_cq_cidx;
  419         uint16_t        ifr_cq_pidx;
  420         uint8_t         ifr_cq_gen;
  421         uint8_t         ifr_fl_offset;
  422 
  423         if_ctx_t        ifr_ctx;
  424         iflib_fl_t      ifr_fl;
  425         uint64_t        ifr_rx_irq;
  426         uint16_t        ifr_id;
  427         uint8_t         ifr_lro_enabled;
  428         uint8_t         ifr_nfl;
  429         struct lro_ctrl                 ifr_lc;
  430         struct grouptask        ifr_task;
  431         struct iflib_filter_info ifr_filter_info;
  432         iflib_dma_info_t                ifr_ifdi;
  433         /* dynamically allocate if any drivers need a value substantially larger than this */
  434         struct if_rxd_frag      ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE);
  435 }  __aligned(CACHE_LINE_SIZE);
  436 
  437 /*
  438  * Only allow a single packet to take up most 1/nth of the tx ring
  439  */
  440 #define MAX_SINGLE_PACKET_FRACTION 12
  441 #define IF_BAD_DMA (bus_addr_t)-1
  442 
  443 static int enable_msix = 1;
  444 
  445 #define mtx_held(m)     (((m)->mtx_lock & ~MTX_FLAGMASK) != (uintptr_t)0)
  446 
  447 
  448 
  449 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING))
  450 
  451 #define CTX_LOCK_INIT(_sc, _name)  mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF)
  452 
  453 #define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx)
  454 #define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx)
  455 #define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx)
  456 
  457 
  458 #define TXDB_LOCK_INIT(txq)  mtx_init(&(txq)->ift_db_mtx, (txq)->ift_db_mtx_name, NULL, MTX_DEF)
  459 #define TXDB_TRYLOCK(txq) mtx_trylock(&(txq)->ift_db_mtx)
  460 #define TXDB_LOCK(txq) mtx_lock(&(txq)->ift_db_mtx)
  461 #define TXDB_UNLOCK(txq) mtx_unlock(&(txq)->ift_db_mtx)
  462 #define TXDB_LOCK_DESTROY(txq) mtx_destroy(&(txq)->ift_db_mtx)
  463 
  464 #define CALLOUT_LOCK(txq)       mtx_lock(&txq->ift_mtx)
  465 #define CALLOUT_UNLOCK(txq)     mtx_unlock(&txq->ift_mtx)
  466 
  467 
  468 /* Our boot-time initialization hook */
  469 static int      iflib_module_event_handler(module_t, int, void *);
  470 
  471 static moduledata_t iflib_moduledata = {
  472         "iflib",
  473         iflib_module_event_handler,
  474         NULL
  475 };
  476 
  477 DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY);
  478 MODULE_VERSION(iflib, 1);
  479 
  480 MODULE_DEPEND(iflib, pci, 1, 1, 1);
  481 MODULE_DEPEND(iflib, ether, 1, 1, 1);
  482 
  483 TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1);
  484 TASKQGROUP_DEFINE(if_config_tqg, 1, 1);
  485 
  486 #ifndef IFLIB_DEBUG_COUNTERS
  487 #ifdef INVARIANTS
  488 #define IFLIB_DEBUG_COUNTERS 1
  489 #else
  490 #define IFLIB_DEBUG_COUNTERS 0
  491 #endif /* !INVARIANTS */
  492 #endif
  493 
  494 static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0,
  495                    "iflib driver parameters");
  496 
  497 /*
  498  * XXX need to ensure that this can't accidentally cause the head to be moved backwards 
  499  */
  500 static int iflib_min_tx_latency = 0;
  501 
  502 SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW,
  503                    &iflib_min_tx_latency, 0, "minimize transmit latency at the possibel expense of throughput");
  504 
  505 
  506 #if IFLIB_DEBUG_COUNTERS
  507 
  508 static int iflib_tx_seen;
  509 static int iflib_tx_sent;
  510 static int iflib_tx_encap;
  511 static int iflib_rx_allocs;
  512 static int iflib_fl_refills;
  513 static int iflib_fl_refills_large;
  514 static int iflib_tx_frees;
  515 
  516 SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD,
  517                    &iflib_tx_seen, 0, "# tx mbufs seen");
  518 SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD,
  519                    &iflib_tx_sent, 0, "# tx mbufs sent");
  520 SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD,
  521                    &iflib_tx_encap, 0, "# tx mbufs encapped");
  522 SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD,
  523                    &iflib_tx_frees, 0, "# tx frees");
  524 SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD,
  525                    &iflib_rx_allocs, 0, "# rx allocations");
  526 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD,
  527                    &iflib_fl_refills, 0, "# refills");
  528 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD,
  529                    &iflib_fl_refills_large, 0, "# large refills");
  530 
  531 
  532 static int iflib_txq_drain_flushing;
  533 static int iflib_txq_drain_oactive;
  534 static int iflib_txq_drain_notready;
  535 static int iflib_txq_drain_encapfail;
  536 
  537 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD,
  538                    &iflib_txq_drain_flushing, 0, "# drain flushes");
  539 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD,
  540                    &iflib_txq_drain_oactive, 0, "# drain oactives");
  541 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD,
  542                    &iflib_txq_drain_notready, 0, "# drain notready");
  543 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_encapfail, CTLFLAG_RD,
  544                    &iflib_txq_drain_encapfail, 0, "# drain encap fails");
  545 
  546 
  547 static int iflib_encap_load_mbuf_fail;
  548 static int iflib_encap_txq_avail_fail;
  549 static int iflib_encap_txd_encap_fail;
  550 
  551 SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD,
  552                    &iflib_encap_load_mbuf_fail, 0, "# busdma load failures");
  553 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD,
  554                    &iflib_encap_txq_avail_fail, 0, "# txq avail failures");
  555 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD,
  556                    &iflib_encap_txd_encap_fail, 0, "# driver encap failures");
  557 
  558 static int iflib_task_fn_rxs;
  559 static int iflib_rx_intr_enables;
  560 static int iflib_fast_intrs;
  561 static int iflib_intr_link;
  562 static int iflib_intr_msix; 
  563 static int iflib_rx_unavail;
  564 static int iflib_rx_ctx_inactive;
  565 static int iflib_rx_zero_len;
  566 static int iflib_rx_if_input;
  567 static int iflib_rx_mbuf_null;
  568 static int iflib_rxd_flush;
  569 
  570 static int iflib_verbose_debug;
  571 
  572 SYSCTL_INT(_net_iflib, OID_AUTO, intr_link, CTLFLAG_RD,
  573                    &iflib_intr_link, 0, "# intr link calls");
  574 SYSCTL_INT(_net_iflib, OID_AUTO, intr_msix, CTLFLAG_RD,
  575                    &iflib_intr_msix, 0, "# intr msix calls");
  576 SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD,
  577                    &iflib_task_fn_rxs, 0, "# task_fn_rx calls");
  578 SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD,
  579                    &iflib_rx_intr_enables, 0, "# rx intr enables");
  580 SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD,
  581                    &iflib_fast_intrs, 0, "# fast_intr calls");
  582 SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD,
  583                    &iflib_rx_unavail, 0, "# times rxeof called with no available data");
  584 SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD,
  585                    &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context");
  586 SYSCTL_INT(_net_iflib, OID_AUTO, rx_zero_len, CTLFLAG_RD,
  587                    &iflib_rx_zero_len, 0, "# times rxeof saw zero len mbuf");
  588 SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD,
  589                    &iflib_rx_if_input, 0, "# times rxeof called if_input");
  590 SYSCTL_INT(_net_iflib, OID_AUTO, rx_mbuf_null, CTLFLAG_RD,
  591                    &iflib_rx_mbuf_null, 0, "# times rxeof got null mbuf");
  592 SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD,
  593                  &iflib_rxd_flush, 0, "# times rxd_flush called");
  594 SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW,
  595                    &iflib_verbose_debug, 0, "enable verbose debugging");
  596 
  597 #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1)
  598 
  599 #else
  600 #define DBG_COUNTER_INC(name)
  601 
  602 #endif
  603 
  604 
  605 
  606 #define IFLIB_DEBUG 0
  607 
  608 static void iflib_tx_structures_free(if_ctx_t ctx);
  609 static void iflib_rx_structures_free(if_ctx_t ctx);
  610 static int iflib_queues_alloc(if_ctx_t ctx);
  611 static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq);
  612 static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget);
  613 static int iflib_qset_structures_setup(if_ctx_t ctx);
  614 static int iflib_msix_init(if_ctx_t ctx);
  615 static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str);
  616 static void iflib_txq_check_drain(iflib_txq_t txq, int budget);
  617 static uint32_t iflib_txq_can_drain(struct ifmp_ring *);
  618 static int iflib_register(if_ctx_t);
  619 static void iflib_init_locked(if_ctx_t ctx);
  620 static void iflib_add_device_sysctl_pre(if_ctx_t ctx);
  621 static void iflib_add_device_sysctl_post(if_ctx_t ctx);
  622 
  623 
  624 #ifdef DEV_NETMAP
  625 #include <sys/selinfo.h>
  626 #include <net/netmap.h>
  627 #include <dev/netmap/netmap_kern.h>
  628 
  629 MODULE_DEPEND(iflib, netmap, 1, 1, 1);
  630 
  631 /*
  632  * device-specific sysctl variables:
  633  *
  634  * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
  635  *      During regular operations the CRC is stripped, but on some
  636  *      hardware reception of frames not multiple of 64 is slower,
  637  *      so using crcstrip=0 helps in benchmarks.
  638  *
  639  * iflib_rx_miss, iflib_rx_miss_bufs:
  640  *      count packets that might be missed due to lost interrupts.
  641  */
  642 SYSCTL_DECL(_dev_netmap);
  643 /*
  644  * The xl driver by default strips CRCs and we do not override it.
  645  */
  646 
  647 int iflib_crcstrip = 1;
  648 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip,
  649     CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on rx frames");
  650 
  651 int iflib_rx_miss, iflib_rx_miss_bufs;
  652 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss,
  653     CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed rx intr");
  654 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs,
  655     CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed rx intr bufs");
  656 
  657 /*
  658  * Register/unregister. We are already under netmap lock.
  659  * Only called on the first register or the last unregister.
  660  */
  661 static int
  662 iflib_netmap_register(struct netmap_adapter *na, int onoff)
  663 {
  664         struct ifnet *ifp = na->ifp;
  665         if_ctx_t ctx = ifp->if_softc;
  666 
  667         CTX_LOCK(ctx);
  668         IFDI_INTR_DISABLE(ctx);
  669 
  670         /* Tell the stack that the interface is no longer active */
  671         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
  672 
  673         if (!CTX_IS_VF(ctx))
  674                 IFDI_CRCSTRIP_SET(ctx, onoff);
  675 
  676         /* enable or disable flags and callbacks in na and ifp */
  677         if (onoff) {
  678                 nm_set_native_flags(na);
  679         } else {
  680                 nm_clear_native_flags(na);
  681         }
  682         IFDI_INIT(ctx);
  683         IFDI_CRCSTRIP_SET(ctx, onoff); // XXX why twice ?
  684         CTX_UNLOCK(ctx);
  685         return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
  686 }
  687 
  688 /*
  689  * Reconcile kernel and user view of the transmit ring.
  690  *
  691  * All information is in the kring.
  692  * Userspace wants to send packets up to the one before kring->rhead,
  693  * kernel knows kring->nr_hwcur is the first unsent packet.
  694  *
  695  * Here we push packets out (as many as possible), and possibly
  696  * reclaim buffers from previously completed transmission.
  697  *
  698  * The caller (netmap) guarantees that there is only one instance
  699  * running at any time. Any interference with other driver
  700  * methods should be handled by the individual drivers.
  701  */
  702 static int
  703 iflib_netmap_txsync(struct netmap_kring *kring, int flags)
  704 {
  705         struct netmap_adapter *na = kring->na;
  706         struct ifnet *ifp = na->ifp;
  707         struct netmap_ring *ring = kring->ring;
  708         u_int nm_i;     /* index into the netmap ring */
  709         u_int nic_i;    /* index into the NIC ring */
  710         u_int n;
  711         u_int const lim = kring->nkr_num_slots - 1;
  712         u_int const head = kring->rhead;
  713         struct if_pkt_info pi;
  714 
  715         /*
  716          * interrupts on every tx packet are expensive so request
  717          * them every half ring, or where NS_REPORT is set
  718          */
  719         u_int report_frequency = kring->nkr_num_slots >> 1;
  720         /* device-specific */
  721         if_ctx_t ctx = ifp->if_softc;
  722         iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id];
  723 
  724         pi.ipi_segs = txq->ift_segs;
  725         pi.ipi_qsidx = kring->ring_id;
  726         pi.ipi_ndescs = 0;
  727 
  728         bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
  729                                         BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
  730 
  731 
  732         /*
  733          * First part: process new packets to send.
  734          * nm_i is the current index in the netmap ring,
  735          * nic_i is the corresponding index in the NIC ring.
  736          *
  737          * If we have packets to send (nm_i != head)
  738          * iterate over the netmap ring, fetch length and update
  739          * the corresponding slot in the NIC ring. Some drivers also
  740          * need to update the buffer's physical address in the NIC slot
  741          * even NS_BUF_CHANGED is not set (PNMB computes the addresses).
  742          *
  743          * The netmap_reload_map() calls is especially expensive,
  744          * even when (as in this case) the tag is 0, so do only
  745          * when the buffer has actually changed.
  746          *
  747          * If possible do not set the report/intr bit on all slots,
  748          * but only a few times per ring or when NS_REPORT is set.
  749          *
  750          * Finally, on 10G and faster drivers, it might be useful
  751          * to prefetch the next slot and txr entry.
  752          */
  753 
  754         nm_i = kring->nr_hwcur;
  755         if (nm_i != head) {     /* we have new packets to send */
  756                 nic_i = netmap_idx_k2n(kring, nm_i);
  757 
  758                 __builtin_prefetch(&ring->slot[nm_i]);
  759                 __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]);
  760                 __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]);
  761 
  762                 for (n = 0; nm_i != head; n++) {
  763                         struct netmap_slot *slot = &ring->slot[nm_i];
  764                         u_int len = slot->len;
  765                         uint64_t paddr;
  766                         void *addr = PNMB(na, slot, &paddr);
  767                         int flags = (slot->flags & NS_REPORT ||
  768                                 nic_i == 0 || nic_i == report_frequency) ?
  769                                 IPI_TX_INTR : 0;
  770 
  771                         /* device-specific */
  772                         pi.ipi_pidx = nic_i;
  773                         pi.ipi_flags = flags;
  774 
  775                         /* Fill the slot in the NIC ring. */
  776                         ctx->isc_txd_encap(ctx->ifc_softc, &pi);
  777 
  778                         /* prefetch for next round */
  779                         __builtin_prefetch(&ring->slot[nm_i + 1]);
  780                         __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]);
  781                         __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]);
  782 
  783                         NM_CHECK_ADDR_LEN(na, addr, len);
  784 
  785                         if (slot->flags & NS_BUF_CHANGED) {
  786                                 /* buffer has changed, reload map */
  787                                 netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr);
  788                         }
  789                         slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
  790 
  791                         /* make sure changes to the buffer are synced */
  792                         bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i],
  793                                                         BUS_DMASYNC_PREWRITE);
  794 
  795                         nm_i = nm_next(nm_i, lim);
  796                         nic_i = nm_next(nic_i, lim);
  797                 }
  798                 kring->nr_hwcur = head;
  799 
  800                 /* synchronize the NIC ring */
  801                 bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
  802                                                 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
  803 
  804                 /* (re)start the tx unit up to slot nic_i (excluded) */
  805                 ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i);
  806         }
  807 
  808         /*
  809          * Second part: reclaim buffers for completed transmissions.
  810          */
  811         if (iflib_tx_credits_update(ctx, txq)) {
  812                 /* some tx completed, increment avail */
  813                 nic_i = txq->ift_cidx_processed;
  814                 kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
  815         }
  816         return (0);
  817 }
  818 
  819 /*
  820  * Reconcile kernel and user view of the receive ring.
  821  * Same as for the txsync, this routine must be efficient.
  822  * The caller guarantees a single invocations, but races against
  823  * the rest of the driver should be handled here.
  824  *
  825  * On call, kring->rhead is the first packet that userspace wants
  826  * to keep, and kring->rcur is the wakeup point.
  827  * The kernel has previously reported packets up to kring->rtail.
  828  *
  829  * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
  830  * of whether or not we received an interrupt.
  831  */
  832 static int
  833 iflib_netmap_rxsync(struct netmap_kring *kring, int flags)
  834 {
  835         struct netmap_adapter *na = kring->na;
  836         struct ifnet *ifp = na->ifp;
  837         struct netmap_ring *ring = kring->ring;
  838         u_int nm_i;     /* index into the netmap ring */
  839         u_int nic_i;    /* index into the NIC ring */
  840         u_int i, n;
  841         u_int const lim = kring->nkr_num_slots - 1;
  842         u_int const head = kring->rhead;
  843         int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
  844         struct if_rxd_info ri;
  845         /* device-specific */
  846         if_ctx_t ctx = ifp->if_softc;
  847         iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id];
  848         iflib_fl_t fl = rxq->ifr_fl;
  849         if (head > lim)
  850                 return netmap_ring_reinit(kring);
  851 
  852         bzero(&ri, sizeof(ri));
  853         ri.iri_qsidx = kring->ring_id;
  854         ri.iri_ifp = ctx->ifc_ifp;
  855         /* XXX check sync modes */
  856         for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++)
  857                 bus_dmamap_sync(rxq->ifr_fl[i].ifl_desc_tag, fl->ifl_ifdi->idi_map,
  858                                 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
  859 
  860         /*
  861          * First part: import newly received packets.
  862          *
  863          * nm_i is the index of the next free slot in the netmap ring,
  864          * nic_i is the index of the next received packet in the NIC ring,
  865          * and they may differ in case if_init() has been called while
  866          * in netmap mode. For the receive ring we have
  867          *
  868          *      nic_i = rxr->next_check;
  869          *      nm_i = kring->nr_hwtail (previous)
  870          * and
  871          *      nm_i == (nic_i + kring->nkr_hwofs) % ring_size
  872          *
  873          * rxr->next_check is set to 0 on a ring reinit
  874          */
  875         if (netmap_no_pendintr || force_update) {
  876                 int crclen = iflib_crcstrip ? 0 : 4;
  877                 int error, avail;
  878                 uint16_t slot_flags = kring->nkr_slot_flags;
  879 
  880                 for (fl = rxq->ifr_fl, i = 0; i < rxq->ifr_nfl; i++, fl++) {
  881                         nic_i = fl->ifl_cidx;
  882                         nm_i = netmap_idx_n2k(kring, nic_i);
  883                         avail = ctx->isc_rxd_available(ctx->ifc_softc, kring->ring_id, nic_i, INT_MAX);
  884                         for (n = 0; avail > 0; n++, avail--) {
  885                                 error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
  886                                 if (error)
  887                                         ring->slot[nm_i].len = 0;
  888                                 else
  889                                         ring->slot[nm_i].len = ri.iri_len - crclen;
  890                                 ring->slot[nm_i].flags = slot_flags;
  891                                 bus_dmamap_sync(fl->ifl_ifdi->idi_tag,
  892                                                                 fl->ifl_sds[nic_i].ifsd_map, BUS_DMASYNC_POSTREAD);
  893                                 nm_i = nm_next(nm_i, lim);
  894                                 nic_i = nm_next(nic_i, lim);
  895                         }
  896                         if (n) { /* update the state variables */
  897                                 if (netmap_no_pendintr && !force_update) {
  898                                         /* diagnostics */
  899                                         iflib_rx_miss ++;
  900                                         iflib_rx_miss_bufs += n;
  901                                 }
  902                                 fl->ifl_cidx = nic_i;
  903                                 kring->nr_hwtail = nm_i;
  904                         }
  905                         kring->nr_kflags &= ~NKR_PENDINTR;
  906                 }
  907         }
  908         /*
  909          * Second part: skip past packets that userspace has released.
  910          * (kring->nr_hwcur to head excluded),
  911          * and make the buffers available for reception.
  912          * As usual nm_i is the index in the netmap ring,
  913          * nic_i is the index in the NIC ring, and
  914          * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
  915          */
  916         /* XXX not sure how this will work with multiple free lists */
  917         nm_i = kring->nr_hwcur;
  918         if (nm_i != head) {
  919                 nic_i = netmap_idx_k2n(kring, nm_i);
  920                 for (n = 0; nm_i != head; n++) {
  921                         struct netmap_slot *slot = &ring->slot[nm_i];
  922                         uint64_t paddr;
  923                         caddr_t vaddr;
  924                         void *addr = PNMB(na, slot, &paddr);
  925 
  926                         if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
  927                                 goto ring_reset;
  928 
  929                         vaddr = addr;
  930                         if (slot->flags & NS_BUF_CHANGED) {
  931                                 /* buffer has changed, reload map */
  932                                 netmap_reload_map(na, fl->ifl_ifdi->idi_tag, fl->ifl_sds[nic_i].ifsd_map, addr);
  933                                 slot->flags &= ~NS_BUF_CHANGED;
  934                         }
  935                         /*
  936                          * XXX we should be batching this operation - TODO
  937                          */
  938                         ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i, &paddr, &vaddr, 1, fl->ifl_buf_size);
  939                         bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds[nic_i].ifsd_map,
  940                             BUS_DMASYNC_PREREAD);
  941                         nm_i = nm_next(nm_i, lim);
  942                         nic_i = nm_next(nic_i, lim);
  943                 }
  944                 kring->nr_hwcur = head;
  945 
  946                 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
  947                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
  948                 /*
  949                  * IMPORTANT: we must leave one free slot in the ring,
  950                  * so move nic_i back by one unit
  951                  */
  952                 nic_i = nm_prev(nic_i, lim);
  953                 ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i);
  954         }
  955 
  956         return 0;
  957 
  958 ring_reset:
  959         return netmap_ring_reinit(kring);
  960 }
  961 
  962 static int
  963 iflib_netmap_attach(if_ctx_t ctx)
  964 {
  965         struct netmap_adapter na;
  966         if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
  967 
  968         bzero(&na, sizeof(na));
  969 
  970         na.ifp = ctx->ifc_ifp;
  971         na.na_flags = NAF_BDG_MAYSLEEP;
  972         MPASS(ctx->ifc_softc_ctx.isc_ntxqsets);
  973         MPASS(ctx->ifc_softc_ctx.isc_nrxqsets);
  974 
  975         na.num_tx_desc = scctx->isc_ntxd[0];
  976         na.num_rx_desc = scctx->isc_nrxd[0];
  977         na.nm_txsync = iflib_netmap_txsync;
  978         na.nm_rxsync = iflib_netmap_rxsync;
  979         na.nm_register = iflib_netmap_register;
  980         na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets;
  981         na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets;
  982         return (netmap_attach(&na));
  983 }
  984 
  985 static void
  986 iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq)
  987 {
  988         struct netmap_adapter *na = NA(ctx->ifc_ifp);
  989         struct netmap_slot *slot;
  990 
  991         slot = netmap_reset(na, NR_TX, txq->ift_id, 0);
  992         if (slot == NULL)
  993                 return;
  994 
  995         for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) {
  996 
  997                 /*
  998                  * In netmap mode, set the map for the packet buffer.
  999                  * NOTE: Some drivers (not this one) also need to set
 1000                  * the physical buffer address in the NIC ring.
 1001                  * netmap_idx_n2k() maps a nic index, i, into the corresponding
 1002                  * netmap slot index, si
 1003                  */
 1004                 int si = netmap_idx_n2k(&na->tx_rings[txq->ift_id], i);
 1005                 netmap_load_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si));
 1006         }
 1007 }
 1008 static void
 1009 iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq)
 1010 {
 1011         struct netmap_adapter *na = NA(ctx->ifc_ifp);
 1012         struct netmap_slot *slot;
 1013         iflib_rxsd_t sd;
 1014         int nrxd;
 1015 
 1016         slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0);
 1017         if (slot == NULL)
 1018                 return;
 1019         sd = rxq->ifr_fl[0].ifl_sds;
 1020         nrxd = ctx->ifc_softc_ctx.isc_nrxd[0];
 1021         for (int i = 0; i < nrxd; i++, sd++) {
 1022                         int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i);
 1023                         uint64_t paddr;
 1024                         void *addr;
 1025                         caddr_t vaddr;
 1026 
 1027                         vaddr = addr = PNMB(na, slot + sj, &paddr);
 1028                         netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, sd->ifsd_map, addr);
 1029                         /* Update descriptor and the cached value */
 1030                         ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, i, &paddr, &vaddr, 1, rxq->ifr_fl[0].ifl_buf_size);
 1031         }
 1032         /* preserve queue */
 1033         if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) {
 1034                 struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id];
 1035                 int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
 1036                 ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, t);
 1037         } else
 1038                 ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, nrxd-1);
 1039 }
 1040 
 1041 #define iflib_netmap_detach(ifp) netmap_detach(ifp)
 1042 
 1043 #else
 1044 #define iflib_netmap_txq_init(ctx, txq)
 1045 #define iflib_netmap_rxq_init(ctx, rxq)
 1046 #define iflib_netmap_detach(ifp)
 1047 
 1048 #define iflib_netmap_attach(ctx) (0)
 1049 #define netmap_rx_irq(ifp, qid, budget) (0)
 1050 
 1051 #endif
 1052 
 1053 #if defined(__i386__) || defined(__amd64__)
 1054 static __inline void
 1055 prefetch(void *x)
 1056 {
 1057         __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
 1058 }
 1059 #else
 1060 #define prefetch(x)
 1061 #endif
 1062 
 1063 static void
 1064 _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
 1065 {
 1066         if (err)
 1067                 return;
 1068         *(bus_addr_t *) arg = segs[0].ds_addr;
 1069 }
 1070 
 1071 int
 1072 iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags)
 1073 {
 1074         int err;
 1075         if_shared_ctx_t sctx = ctx->ifc_sctx;
 1076         device_t dev = ctx->ifc_dev;
 1077 
 1078         KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized"));
 1079 
 1080         err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
 1081                                 sctx->isc_q_align, 0,   /* alignment, bounds */
 1082                                 BUS_SPACE_MAXADDR,      /* lowaddr */
 1083                                 BUS_SPACE_MAXADDR,      /* highaddr */
 1084                                 NULL, NULL,             /* filter, filterarg */
 1085                                 size,                   /* maxsize */
 1086                                 1,                      /* nsegments */
 1087                                 size,                   /* maxsegsize */
 1088                                 BUS_DMA_ALLOCNOW,       /* flags */
 1089                                 NULL,                   /* lockfunc */
 1090                                 NULL,                   /* lockarg */
 1091                                 &dma->idi_tag);
 1092         if (err) {
 1093                 device_printf(dev,
 1094                     "%s: bus_dma_tag_create failed: %d\n",
 1095                     __func__, err);
 1096                 goto fail_0;
 1097         }
 1098 
 1099         err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr,
 1100             BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map);
 1101         if (err) {
 1102                 device_printf(dev,
 1103                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
 1104                     __func__, (uintmax_t)size, err);
 1105                 goto fail_1;
 1106         }
 1107 
 1108         dma->idi_paddr = IF_BAD_DMA;
 1109         err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr,
 1110             size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT);
 1111         if (err || dma->idi_paddr == IF_BAD_DMA) {
 1112                 device_printf(dev,
 1113                     "%s: bus_dmamap_load failed: %d\n",
 1114                     __func__, err);
 1115                 goto fail_2;
 1116         }
 1117 
 1118         dma->idi_size = size;
 1119         return (0);
 1120 
 1121 fail_2:
 1122         bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map);
 1123 fail_1:
 1124         bus_dma_tag_destroy(dma->idi_tag);
 1125 fail_0:
 1126         dma->idi_tag = NULL;
 1127 
 1128         return (err);
 1129 }
 1130 
 1131 int
 1132 iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count)
 1133 {
 1134         int i, err;
 1135         iflib_dma_info_t *dmaiter;
 1136 
 1137         dmaiter = dmalist;
 1138         for (i = 0; i < count; i++, dmaiter++) {
 1139                 if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0)
 1140                         break;
 1141         }
 1142         if (err)
 1143                 iflib_dma_free_multi(dmalist, i);
 1144         return (err);
 1145 }
 1146 
 1147 void
 1148 iflib_dma_free(iflib_dma_info_t dma)
 1149 {
 1150         if (dma->idi_tag == NULL)
 1151                 return;
 1152         if (dma->idi_paddr != IF_BAD_DMA) {
 1153                 bus_dmamap_sync(dma->idi_tag, dma->idi_map,
 1154                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 1155                 bus_dmamap_unload(dma->idi_tag, dma->idi_map);
 1156                 dma->idi_paddr = IF_BAD_DMA;
 1157         }
 1158         if (dma->idi_vaddr != NULL) {
 1159                 bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map);
 1160                 dma->idi_vaddr = NULL;
 1161         }
 1162         bus_dma_tag_destroy(dma->idi_tag);
 1163         dma->idi_tag = NULL;
 1164 }
 1165 
 1166 void
 1167 iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count)
 1168 {
 1169         int i;
 1170         iflib_dma_info_t *dmaiter = dmalist;
 1171 
 1172         for (i = 0; i < count; i++, dmaiter++)
 1173                 iflib_dma_free(*dmaiter);
 1174 }
 1175 
 1176 static int
 1177 iflib_fast_intr(void *arg)
 1178 {
 1179         iflib_filter_info_t info = arg;
 1180         struct grouptask *gtask = info->ifi_task;
 1181 
 1182         DBG_COUNTER_INC(fast_intrs);
 1183         if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
 1184                 return (FILTER_HANDLED);
 1185 
 1186         GROUPTASK_ENQUEUE(gtask);
 1187         return (FILTER_HANDLED);
 1188 }
 1189 
 1190 static int
 1191 _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
 1192         driver_filter_t filter, driver_intr_t handler, void *arg,
 1193                                  char *name)
 1194 {
 1195         int rc;
 1196         struct resource *res;
 1197         void *tag;
 1198         device_t dev = ctx->ifc_dev;
 1199 
 1200         MPASS(rid < 512);
 1201         irq->ii_rid = rid;
 1202         res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid,
 1203                                      RF_SHAREABLE | RF_ACTIVE);
 1204         if (res == NULL) {
 1205                 device_printf(dev,
 1206                     "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
 1207                 return (ENOMEM);
 1208         }
 1209         irq->ii_res = res;
 1210         KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL"));
 1211         rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET,
 1212                                                 filter, handler, arg, &tag);
 1213         if (rc != 0) {
 1214                 device_printf(dev,
 1215                     "failed to setup interrupt for rid %d, name %s: %d\n",
 1216                                           rid, name ? name : "unknown", rc);
 1217                 return (rc);
 1218         } else if (name)
 1219                 bus_describe_intr(dev, res, tag, "%s", name);
 1220 
 1221         irq->ii_tag = tag;
 1222         return (0);
 1223 }
 1224 
 1225 
 1226 /*********************************************************************
 1227  *
 1228  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
 1229  *  the information needed to transmit a packet on the wire. This is
 1230  *  called only once at attach, setup is done every reset.
 1231  *
 1232  **********************************************************************/
 1233 
 1234 static int
 1235 iflib_txsd_alloc(iflib_txq_t txq)
 1236 {
 1237         if_ctx_t ctx = txq->ift_ctx;
 1238         if_shared_ctx_t sctx = ctx->ifc_sctx;
 1239         if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 1240         device_t dev = ctx->ifc_dev;
 1241         int err, nsegments, ntsosegments;
 1242 
 1243         nsegments = scctx->isc_tx_nsegments;
 1244         ntsosegments = scctx->isc_tx_tso_segments_max;
 1245         MPASS(scctx->isc_ntxd[0] > 0);
 1246         MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0);
 1247         MPASS(nsegments > 0);
 1248         MPASS(ntsosegments > 0);
 1249         /*
 1250          * Setup DMA descriptor areas.
 1251          */
 1252         if ((err = bus_dma_tag_create(bus_get_dma_tag(dev),
 1253                                1, 0,                    /* alignment, bounds */
 1254                                BUS_SPACE_MAXADDR,       /* lowaddr */
 1255                                BUS_SPACE_MAXADDR,       /* highaddr */
 1256                                NULL, NULL,              /* filter, filterarg */
 1257                                sctx->isc_tx_maxsize,            /* maxsize */
 1258                                nsegments,       /* nsegments */
 1259                                sctx->isc_tx_maxsegsize, /* maxsegsize */
 1260                                0,                       /* flags */
 1261                                NULL,                    /* lockfunc */
 1262                                NULL,                    /* lockfuncarg */
 1263                                &txq->ift_desc_tag))) {
 1264                 device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err);
 1265                 device_printf(dev,"maxsize: %zd nsegments: %d maxsegsize: %zd\n",
 1266                                           sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize);
 1267                 goto fail;
 1268         }
 1269 #ifdef IFLIB_DIAGNOSTICS
 1270         device_printf(dev,"maxsize: %zd nsegments: %d maxsegsize: %zd\n",
 1271                       sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize);
 1272 
 1273 #endif
 1274         if ((err = bus_dma_tag_create(bus_get_dma_tag(dev),
 1275                                1, 0,                    /* alignment, bounds */
 1276                                BUS_SPACE_MAXADDR,       /* lowaddr */
 1277                                BUS_SPACE_MAXADDR,       /* highaddr */
 1278                                NULL, NULL,              /* filter, filterarg */
 1279                                scctx->isc_tx_tso_size_max,              /* maxsize */
 1280                                ntsosegments,    /* nsegments */
 1281                                scctx->isc_tx_tso_segsize_max,   /* maxsegsize */
 1282                                0,                       /* flags */
 1283                                NULL,                    /* lockfunc */
 1284                                NULL,                    /* lockfuncarg */
 1285                                &txq->ift_tso_desc_tag))) {
 1286                 device_printf(dev,"Unable to allocate TX TSO DMA tag: %d\n", err);
 1287 
 1288                 goto fail;
 1289         }
 1290 #ifdef IFLIB_DIAGNOSTICS
 1291         device_printf(dev,"TSO maxsize: %d ntsosegments: %d maxsegsize: %d\n",
 1292                       scctx->isc_tx_tso_size_max, ntsosegments,
 1293                       scctx->isc_tx_tso_segsize_max);
 1294 #endif
 1295         if (!(txq->ift_sds.ifsd_flags =
 1296             (uint8_t *) malloc(sizeof(uint8_t) *
 1297             scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 1298                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
 1299                 err = ENOMEM;
 1300                 goto fail;
 1301         }
 1302         if (!(txq->ift_sds.ifsd_m =
 1303             (struct mbuf **) malloc(sizeof(struct mbuf *) *
 1304             scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 1305                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
 1306                 err = ENOMEM;
 1307                 goto fail;
 1308         }
 1309 
 1310         /* Create the descriptor buffer dma maps */
 1311 #if defined(ACPI_DMAR) || (!(defined(__i386__) && !defined(__amd64__)))
 1312         if ((ctx->ifc_flags & IFC_DMAR) == 0)
 1313                 return (0);
 1314 
 1315         if (!(txq->ift_sds.ifsd_map =
 1316             (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 1317                 device_printf(dev, "Unable to allocate tx_buffer map memory\n");
 1318                 err = ENOMEM;
 1319                 goto fail;
 1320         }
 1321 
 1322         for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) {
 1323                 err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]);
 1324                 if (err != 0) {
 1325                         device_printf(dev, "Unable to create TX DMA map\n");
 1326                         goto fail;
 1327                 }
 1328         }
 1329 #endif
 1330         return (0);
 1331 fail:
 1332         /* We free all, it handles case where we are in the middle */
 1333         iflib_tx_structures_free(ctx);
 1334         return (err);
 1335 }
 1336 
 1337 static void
 1338 iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i)
 1339 {
 1340         bus_dmamap_t map;
 1341 
 1342         map = NULL;
 1343         if (txq->ift_sds.ifsd_map != NULL)
 1344                 map = txq->ift_sds.ifsd_map[i];
 1345         if (map != NULL) {
 1346                 bus_dmamap_unload(txq->ift_desc_tag, map);
 1347                 bus_dmamap_destroy(txq->ift_desc_tag, map);
 1348                 txq->ift_sds.ifsd_map[i] = NULL;
 1349         }
 1350 }
 1351 
 1352 static void
 1353 iflib_txq_destroy(iflib_txq_t txq)
 1354 {
 1355         if_ctx_t ctx = txq->ift_ctx;
 1356 
 1357         for (int i = 0; i < txq->ift_size; i++)
 1358                 iflib_txsd_destroy(ctx, txq, i);
 1359         if (txq->ift_sds.ifsd_map != NULL) {
 1360                 free(txq->ift_sds.ifsd_map, M_IFLIB);
 1361                 txq->ift_sds.ifsd_map = NULL;
 1362         }
 1363         if (txq->ift_sds.ifsd_m != NULL) {
 1364                 free(txq->ift_sds.ifsd_m, M_IFLIB);
 1365                 txq->ift_sds.ifsd_m = NULL;
 1366         }
 1367         if (txq->ift_sds.ifsd_flags != NULL) {
 1368                 free(txq->ift_sds.ifsd_flags, M_IFLIB);
 1369                 txq->ift_sds.ifsd_flags = NULL;
 1370         }
 1371         if (txq->ift_desc_tag != NULL) {
 1372                 bus_dma_tag_destroy(txq->ift_desc_tag);
 1373                 txq->ift_desc_tag = NULL;
 1374         }
 1375         if (txq->ift_tso_desc_tag != NULL) {
 1376                 bus_dma_tag_destroy(txq->ift_tso_desc_tag);
 1377                 txq->ift_tso_desc_tag = NULL;
 1378         }
 1379 }
 1380 
 1381 static void
 1382 iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i)
 1383 {
 1384         struct mbuf **mp;
 1385 
 1386         mp = &txq->ift_sds.ifsd_m[i];
 1387         if (*mp == NULL)
 1388                 return;
 1389 
 1390         if (txq->ift_sds.ifsd_map != NULL) {
 1391                 bus_dmamap_sync(txq->ift_desc_tag,
 1392                                 txq->ift_sds.ifsd_map[i],
 1393                                 BUS_DMASYNC_POSTWRITE);
 1394                 bus_dmamap_unload(txq->ift_desc_tag,
 1395                                   txq->ift_sds.ifsd_map[i]);
 1396         }
 1397         m_free(*mp);
 1398         DBG_COUNTER_INC(tx_frees);
 1399         *mp = NULL;
 1400 }
 1401 
 1402 static int
 1403 iflib_txq_setup(iflib_txq_t txq)
 1404 {
 1405         if_ctx_t ctx = txq->ift_ctx;
 1406         if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 1407         iflib_dma_info_t di;
 1408         int i;
 1409 
 1410     /* Set number of descriptors available */
 1411         txq->ift_qstatus = IFLIB_QUEUE_IDLE;
 1412 
 1413         /* Reset indices */
 1414         txq->ift_cidx_processed = txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0;
 1415         txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset];
 1416 
 1417         for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++)
 1418                 bzero((void *)di->idi_vaddr, di->idi_size);
 1419 
 1420         IFDI_TXQ_SETUP(ctx, txq->ift_id);
 1421         for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++)
 1422                 bus_dmamap_sync(di->idi_tag, di->idi_map,
 1423                                                 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 1424         return (0);
 1425 }
 1426 
 1427 /*********************************************************************
 1428  *
 1429  *  Allocate memory for rx_buffer structures. Since we use one
 1430  *  rx_buffer per received packet, the maximum number of rx_buffer's
 1431  *  that we'll need is equal to the number of receive descriptors
 1432  *  that we've allocated.
 1433  *
 1434  **********************************************************************/
 1435 static int
 1436 iflib_rxsd_alloc(iflib_rxq_t rxq)
 1437 {
 1438         if_ctx_t ctx = rxq->ifr_ctx;
 1439         if_shared_ctx_t sctx = ctx->ifc_sctx;
 1440         if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 1441         device_t dev = ctx->ifc_dev;
 1442         iflib_fl_t fl;
 1443         iflib_rxsd_t    rxsd;
 1444         int                     err;
 1445 
 1446         MPASS(scctx->isc_nrxd[0] > 0);
 1447         MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0);
 1448 
 1449         fl = rxq->ifr_fl;
 1450         for (int i = 0; i <  rxq->ifr_nfl; i++, fl++) {
 1451                 fl->ifl_sds = malloc(sizeof(struct iflib_sw_rx_desc) *
 1452                     scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB,
 1453                     M_WAITOK | M_ZERO);
 1454                 if (fl->ifl_sds == NULL) {
 1455                         device_printf(dev, "Unable to allocate rx sw desc memory\n");
 1456                         return (ENOMEM);
 1457                 }
 1458                 fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */
 1459                 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
 1460                                          1, 0,                  /* alignment, bounds */
 1461                                          BUS_SPACE_MAXADDR,     /* lowaddr */
 1462                                          BUS_SPACE_MAXADDR,     /* highaddr */
 1463                                          NULL, NULL,            /* filter, filterarg */
 1464                                          sctx->isc_rx_maxsize,  /* maxsize */
 1465                                          sctx->isc_rx_nsegments,        /* nsegments */
 1466                                          sctx->isc_rx_maxsegsize,       /* maxsegsize */
 1467                                          0,                     /* flags */
 1468                                          NULL,                  /* lockfunc */
 1469                                          NULL,                  /* lockarg */
 1470                                          &fl->ifl_desc_tag);
 1471                 if (err) {
 1472                         device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
 1473                                 __func__, err);
 1474                         goto fail;
 1475                 }
 1476 
 1477                 rxsd = fl->ifl_sds;
 1478                 for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++, rxsd++) {
 1479                         err = bus_dmamap_create(fl->ifl_desc_tag, 0, &rxsd->ifsd_map);
 1480                         if (err) {
 1481                                 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
 1482                                         __func__, err);
 1483                                 goto fail;
 1484                         }
 1485                 }
 1486         }
 1487         return (0);
 1488 
 1489 fail:
 1490         iflib_rx_structures_free(ctx);
 1491         return (err);
 1492 }
 1493 
 1494 
 1495 /*
 1496  * Internal service routines
 1497  */
 1498 
 1499 struct rxq_refill_cb_arg {
 1500         int               error;
 1501         bus_dma_segment_t seg;
 1502         int               nseg;
 1503 };
 1504 
 1505 static void
 1506 _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 1507 {
 1508         struct rxq_refill_cb_arg *cb_arg = arg;
 1509 
 1510         cb_arg->error = error;
 1511         cb_arg->seg = segs[0];
 1512         cb_arg->nseg = nseg;
 1513 }
 1514 
 1515 
 1516 #ifdef ACPI_DMAR
 1517 #define IS_DMAR(ctx) (ctx->ifc_flags & IFC_DMAR)
 1518 #else
 1519 #define IS_DMAR(ctx) (0)
 1520 #endif
 1521 
 1522 /**
 1523  *      rxq_refill - refill an rxq  free-buffer list
 1524  *      @ctx: the iflib context
 1525  *      @rxq: the free-list to refill
 1526  *      @n: the number of new buffers to allocate
 1527  *
 1528  *      (Re)populate an rxq free-buffer list with up to @n new packet buffers.
 1529  *      The caller must assure that @n does not exceed the queue's capacity.
 1530  */
 1531 static void
 1532 _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
 1533 {
 1534         struct mbuf *m;
 1535         int pidx = fl->ifl_pidx;
 1536         iflib_rxsd_t rxsd = &fl->ifl_sds[pidx];
 1537         caddr_t cl;
 1538         int n, i = 0;
 1539         uint64_t bus_addr;
 1540         int err;
 1541 
 1542         n  = count;
 1543         MPASS(n > 0);
 1544         MPASS(fl->ifl_credits + n <= fl->ifl_size);
 1545 
 1546         if (pidx < fl->ifl_cidx)
 1547                 MPASS(pidx + n <= fl->ifl_cidx);
 1548         if (pidx == fl->ifl_cidx && (fl->ifl_credits < fl->ifl_size))
 1549                 MPASS(fl->ifl_gen == 0);
 1550         if (pidx > fl->ifl_cidx)
 1551                 MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx);
 1552 
 1553         DBG_COUNTER_INC(fl_refills);
 1554         if (n > 8)
 1555                 DBG_COUNTER_INC(fl_refills_large);
 1556 
 1557         while (n--) {
 1558                 /*
 1559                  * We allocate an uninitialized mbuf + cluster, mbuf is
 1560                  * initialized after rx.
 1561                  *
 1562                  * If the cluster is still set then we know a minimum sized packet was received
 1563                  */
 1564                 if ((cl = rxsd->ifsd_cl) == NULL) {
 1565                         if ((cl = rxsd->ifsd_cl = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL)
 1566                                 break;
 1567 #if MEMORY_LOGGING
 1568                         fl->ifl_cl_enqueued++;
 1569 #endif
 1570                 }
 1571                 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
 1572                         break;
 1573                 }
 1574 #if MEMORY_LOGGING
 1575                 fl->ifl_m_enqueued++;
 1576 #endif
 1577 
 1578                 DBG_COUNTER_INC(rx_allocs);
 1579 #ifdef notyet
 1580                 if ((rxsd->ifsd_flags & RX_SW_DESC_MAP_CREATED) == 0) {
 1581                         int err;
 1582 
 1583                         if ((err = bus_dmamap_create(fl->ifl_ifdi->idi_tag, 0, &rxsd->ifsd_map))) {
 1584                                 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
 1585                                 uma_zfree(fl->ifl_zone, cl);
 1586                                 n = 0;
 1587                                 goto done;
 1588                         }
 1589                         rxsd->ifsd_flags |= RX_SW_DESC_MAP_CREATED;
 1590                 }
 1591 #endif
 1592 #if defined(__i386__) || defined(__amd64__)
 1593                 if (!IS_DMAR(ctx)) {
 1594                         bus_addr = pmap_kextract((vm_offset_t)cl);
 1595                 } else
 1596 #endif
 1597                 {
 1598                         struct rxq_refill_cb_arg cb_arg;
 1599                         iflib_rxq_t q;
 1600 
 1601                         cb_arg.error = 0;
 1602                         q = fl->ifl_rxq;
 1603                         err = bus_dmamap_load(fl->ifl_desc_tag, rxsd->ifsd_map,
 1604                          cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0);
 1605 
 1606                         if (err != 0 || cb_arg.error) {
 1607                                 /*
 1608                                  * !zone_pack ?
 1609                                  */
 1610                                 if (fl->ifl_zone == zone_pack)
 1611                                         uma_zfree(fl->ifl_zone, cl);
 1612                                 m_free(m);
 1613                                 n = 0;
 1614                                 goto done;
 1615                         }
 1616                         bus_addr = cb_arg.seg.ds_addr;
 1617                 }
 1618                 rxsd->ifsd_flags |= RX_SW_DESC_INUSE;
 1619 
 1620                 MPASS(rxsd->ifsd_m == NULL);
 1621                 rxsd->ifsd_cl = cl;
 1622                 rxsd->ifsd_m = m;
 1623                 fl->ifl_bus_addrs[i] = bus_addr;
 1624                 fl->ifl_vm_addrs[i] = cl;
 1625                 rxsd++;
 1626                 fl->ifl_credits++;
 1627                 i++;
 1628                 MPASS(fl->ifl_credits <= fl->ifl_size);
 1629                 if (++fl->ifl_pidx == fl->ifl_size) {
 1630                         fl->ifl_pidx = 0;
 1631                         fl->ifl_gen = 1;
 1632                         rxsd = fl->ifl_sds;
 1633                 }
 1634                 if (n == 0 || i == IFLIB_MAX_RX_REFRESH) {
 1635                         ctx->isc_rxd_refill(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx,
 1636                                                                  fl->ifl_bus_addrs, fl->ifl_vm_addrs, i, fl->ifl_buf_size);
 1637                         i = 0;
 1638                         pidx = fl->ifl_pidx;
 1639                 }
 1640         }
 1641 done:
 1642         DBG_COUNTER_INC(rxd_flush);
 1643         if (fl->ifl_pidx == 0)
 1644                 pidx = fl->ifl_size - 1;
 1645         else
 1646                 pidx = fl->ifl_pidx - 1;
 1647         ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx);
 1648 }
 1649 
 1650 static __inline void
 1651 __iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max)
 1652 {
 1653         /* we avoid allowing pidx to catch up with cidx as it confuses ixl */
 1654         int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1;
 1655 #ifdef INVARIANTS
 1656         int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1;
 1657 #endif
 1658 
 1659         MPASS(fl->ifl_credits <= fl->ifl_size);
 1660         MPASS(reclaimable == delta);
 1661 
 1662         if (reclaimable > 0)
 1663                 _iflib_fl_refill(ctx, fl, min(max, reclaimable));
 1664 }
 1665 
 1666 static void
 1667 iflib_fl_bufs_free(iflib_fl_t fl)
 1668 {
 1669         iflib_dma_info_t idi = fl->ifl_ifdi;
 1670         uint32_t i;
 1671 
 1672         for (i = 0; i < fl->ifl_size; i++) {
 1673                 iflib_rxsd_t d = &fl->ifl_sds[i];
 1674 
 1675                 if (d->ifsd_flags & RX_SW_DESC_INUSE) {
 1676                         bus_dmamap_unload(fl->ifl_desc_tag, d->ifsd_map);
 1677                         bus_dmamap_destroy(fl->ifl_desc_tag, d->ifsd_map);
 1678                         if (d->ifsd_m != NULL) {
 1679                                 m_init(d->ifsd_m, M_NOWAIT, MT_DATA, 0);
 1680                                 uma_zfree(zone_mbuf, d->ifsd_m);
 1681                         }
 1682                         if (d->ifsd_cl != NULL)
 1683                                 uma_zfree(fl->ifl_zone, d->ifsd_cl);
 1684                         d->ifsd_flags = 0;
 1685                 } else {
 1686                         MPASS(d->ifsd_cl == NULL);
 1687                         MPASS(d->ifsd_m == NULL);
 1688                 }
 1689 #if MEMORY_LOGGING
 1690                 fl->ifl_m_dequeued++;
 1691                 fl->ifl_cl_dequeued++;
 1692 #endif
 1693                 d->ifsd_cl = NULL;
 1694                 d->ifsd_m = NULL;
 1695         }
 1696         /*
 1697          * Reset free list values
 1698          */
 1699         fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = 0;;
 1700         bzero(idi->idi_vaddr, idi->idi_size);
 1701 }
 1702 
 1703 /*********************************************************************
 1704  *
 1705  *  Initialize a receive ring and its buffers.
 1706  *
 1707  **********************************************************************/
 1708 static int
 1709 iflib_fl_setup(iflib_fl_t fl)
 1710 {
 1711         iflib_rxq_t rxq = fl->ifl_rxq;
 1712         if_ctx_t ctx = rxq->ifr_ctx;
 1713         if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 1714 
 1715         /*
 1716         ** Free current RX buffer structs and their mbufs
 1717         */
 1718         iflib_fl_bufs_free(fl);
 1719         /* Now replenish the mbufs */
 1720         MPASS(fl->ifl_credits == 0);
 1721         /*
 1722          * XXX don't set the max_frame_size to larger
 1723          * than the hardware can handle
 1724          */
 1725         if (sctx->isc_max_frame_size <= 2048)
 1726                 fl->ifl_buf_size = MCLBYTES;
 1727         else if (sctx->isc_max_frame_size <= 4096)
 1728                 fl->ifl_buf_size = MJUMPAGESIZE;
 1729         else if (sctx->isc_max_frame_size <= 9216)
 1730                 fl->ifl_buf_size = MJUM9BYTES;
 1731         else
 1732                 fl->ifl_buf_size = MJUM16BYTES;
 1733         if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size)
 1734                 ctx->ifc_max_fl_buf_size = fl->ifl_buf_size;
 1735         fl->ifl_cltype = m_gettype(fl->ifl_buf_size);
 1736         fl->ifl_zone = m_getzone(fl->ifl_buf_size);
 1737 
 1738 
 1739         /* avoid pre-allocating zillions of clusters to an idle card
 1740          * potentially speeding up attach
 1741          */
 1742         _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size));
 1743         MPASS(min(128, fl->ifl_size) == fl->ifl_credits);
 1744         if (min(128, fl->ifl_size) != fl->ifl_credits)
 1745                 return (ENOBUFS);
 1746         /*
 1747          * handle failure
 1748          */
 1749         MPASS(rxq != NULL);
 1750         MPASS(fl->ifl_ifdi != NULL);
 1751         bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
 1752             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 1753         return (0);
 1754 }
 1755 
 1756 /*********************************************************************
 1757  *
 1758  *  Free receive ring data structures
 1759  *
 1760  **********************************************************************/
 1761 static void
 1762 iflib_rx_sds_free(iflib_rxq_t rxq)
 1763 {
 1764         iflib_fl_t fl;
 1765         int i;
 1766 
 1767         if (rxq->ifr_fl != NULL) {
 1768                 for (i = 0; i < rxq->ifr_nfl; i++) {
 1769                         fl = &rxq->ifr_fl[i];
 1770                         if (fl->ifl_desc_tag != NULL) {
 1771                                 bus_dma_tag_destroy(fl->ifl_desc_tag);
 1772                                 fl->ifl_desc_tag = NULL;
 1773                         }
 1774                 }
 1775                 if (rxq->ifr_fl->ifl_sds != NULL)
 1776                         free(rxq->ifr_fl->ifl_sds, M_IFLIB);
 1777 
 1778                 free(rxq->ifr_fl, M_IFLIB);
 1779                 rxq->ifr_fl = NULL;
 1780                 rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0;
 1781         }
 1782 }
 1783 
 1784 /*
 1785  * MI independent logic
 1786  *
 1787  */
 1788 static void
 1789 iflib_timer(void *arg)
 1790 {
 1791         iflib_txq_t txq = arg;
 1792         if_ctx_t ctx = txq->ift_ctx;
 1793         if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 1794 
 1795         if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 1796                 return;
 1797         /*
 1798         ** Check on the state of the TX queue(s), this
 1799         ** can be done without the lock because its RO
 1800         ** and the HUNG state will be static if set.
 1801         */
 1802         IFDI_TIMER(ctx, txq->ift_id);
 1803         if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) &&
 1804                 (ctx->ifc_pause_frames == 0))
 1805                 goto hung;
 1806 
 1807         if (TXQ_AVAIL(txq) <= 2*scctx->isc_tx_nsegments ||
 1808             ifmp_ring_is_stalled(txq->ift_br[0]))
 1809                 GROUPTASK_ENQUEUE(&txq->ift_task);
 1810 
 1811         ctx->ifc_pause_frames = 0;
 1812         if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) 
 1813                 callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
 1814         return;
 1815 hung:
 1816         CTX_LOCK(ctx);
 1817         if_setdrvflagbits(ctx->ifc_ifp, 0, IFF_DRV_RUNNING);
 1818         device_printf(ctx->ifc_dev,  "TX(%d) desc avail = %d, pidx = %d\n",
 1819                                   txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx);
 1820 
 1821         IFDI_WATCHDOG_RESET(ctx);
 1822         ctx->ifc_watchdog_events++;
 1823         ctx->ifc_pause_frames = 0;
 1824 
 1825         iflib_init_locked(ctx);
 1826         CTX_UNLOCK(ctx);
 1827 }
 1828 
 1829 static void
 1830 iflib_init_locked(if_ctx_t ctx)
 1831 {
 1832         if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 1833         if_t ifp = ctx->ifc_ifp;
 1834         iflib_fl_t fl;
 1835         iflib_txq_t txq;
 1836         iflib_rxq_t rxq;
 1837         int i, j;
 1838 
 1839 
 1840         if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 1841         IFDI_INTR_DISABLE(ctx);
 1842 
 1843         /* Set hardware offload abilities */
 1844         if_clearhwassist(ifp);
 1845         if (if_getcapenable(ifp) & IFCAP_TXCSUM)
 1846                 if_sethwassistbits(ifp, CSUM_IP | CSUM_TCP | CSUM_UDP, 0);
 1847         if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
 1848                 if_sethwassistbits(ifp,  (CSUM_TCP_IPV6 | CSUM_UDP_IPV6), 0);
 1849         if (if_getcapenable(ifp) & IFCAP_TSO4)
 1850                 if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
 1851         if (if_getcapenable(ifp) & IFCAP_TSO6)
 1852                 if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
 1853 
 1854         for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) {
 1855                 CALLOUT_LOCK(txq);
 1856                 callout_stop(&txq->ift_timer);
 1857                 callout_stop(&txq->ift_db_check);
 1858                 CALLOUT_UNLOCK(txq);
 1859                 iflib_netmap_txq_init(ctx, txq);
 1860         }
 1861         for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
 1862                 iflib_netmap_rxq_init(ctx, rxq);
 1863         }
 1864 #ifdef INVARIANTS
 1865         i = if_getdrvflags(ifp);
 1866 #endif
 1867         IFDI_INIT(ctx);
 1868         MPASS(if_getdrvflags(ifp) == i);
 1869         for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
 1870                 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
 1871                         if (iflib_fl_setup(fl)) {
 1872                                 device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n");
 1873                                 goto done;
 1874                         }
 1875                 }
 1876         }
 1877         done:
 1878         if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
 1879         IFDI_INTR_ENABLE(ctx);
 1880         txq = ctx->ifc_txqs;
 1881         for (i = 0; i < sctx->isc_ntxqsets; i++, txq++)
 1882                 callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq,
 1883                         txq->ift_timer.c_cpu);
 1884 }
 1885 
 1886 static int
 1887 iflib_media_change(if_t ifp)
 1888 {
 1889         if_ctx_t ctx = if_getsoftc(ifp);
 1890         int err;
 1891 
 1892         CTX_LOCK(ctx);
 1893         if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0)
 1894                 iflib_init_locked(ctx);
 1895         CTX_UNLOCK(ctx);
 1896         return (err);
 1897 }
 1898 
 1899 static void
 1900 iflib_media_status(if_t ifp, struct ifmediareq *ifmr)
 1901 {
 1902         if_ctx_t ctx = if_getsoftc(ifp);
 1903 
 1904         CTX_LOCK(ctx);
 1905         IFDI_UPDATE_ADMIN_STATUS(ctx);
 1906         IFDI_MEDIA_STATUS(ctx, ifmr);
 1907         CTX_UNLOCK(ctx);
 1908 }
 1909 
 1910 static void
 1911 iflib_stop(if_ctx_t ctx)
 1912 {
 1913         iflib_txq_t txq = ctx->ifc_txqs;
 1914         iflib_rxq_t rxq = ctx->ifc_rxqs;
 1915         if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 1916         iflib_dma_info_t di;
 1917         iflib_fl_t fl;
 1918         int i, j;
 1919 
 1920         /* Tell the stack that the interface is no longer active */
 1921         if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 1922 
 1923         IFDI_INTR_DISABLE(ctx);
 1924         msleep(ctx, &ctx->ifc_mtx, PUSER, "iflib_init", hz);
 1925 
 1926         /* Wait for current tx queue users to exit to disarm watchdog timer. */
 1927         for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) {
 1928                 /* make sure all transmitters have completed before proceeding XXX */
 1929 
 1930                 /* clean any enqueued buffers */
 1931                 iflib_txq_check_drain(txq, 0);
 1932                 /* Free any existing tx buffers. */
 1933                 for (j = 0; j < txq->ift_size; j++) {
 1934                         iflib_txsd_free(ctx, txq, j);
 1935                 }
 1936                 txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0;
 1937                 txq->ift_in_use = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0;
 1938                 txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0;
 1939                 txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0;
 1940                 txq->ift_pullups = 0;
 1941                 ifmp_ring_reset_stats(txq->ift_br[0]);
 1942                 for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++)
 1943                         bzero((void *)di->idi_vaddr, di->idi_size);
 1944         }
 1945         for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) {
 1946                 /* make sure all transmitters have completed before proceeding XXX */
 1947 
 1948                 for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwrxqs; j++, di++)
 1949                         bzero((void *)di->idi_vaddr, di->idi_size);
 1950                 /* also resets the free lists pidx/cidx */
 1951                 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
 1952                         iflib_fl_bufs_free(fl);
 1953         }
 1954         IFDI_STOP(ctx);
 1955 }
 1956 
 1957 static iflib_rxsd_t
 1958 rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int *cltype, int unload)
 1959 {
 1960         int flid, cidx;
 1961         iflib_rxsd_t sd;
 1962         iflib_fl_t fl;
 1963         iflib_dma_info_t di;
 1964 
 1965         flid = irf->irf_flid;
 1966         cidx = irf->irf_idx;
 1967         fl = &rxq->ifr_fl[flid];
 1968         fl->ifl_credits--;
 1969 #if MEMORY_LOGGING
 1970         fl->ifl_m_dequeued++;
 1971         if (cltype)
 1972                 fl->ifl_cl_dequeued++;
 1973 #endif
 1974         sd = &fl->ifl_sds[cidx];
 1975         di = fl->ifl_ifdi;
 1976         bus_dmamap_sync(di->idi_tag, di->idi_map,
 1977                         BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 1978 
 1979         /* not valid assert if bxe really does SGE from non-contiguous elements */
 1980         MPASS(fl->ifl_cidx == cidx);
 1981         if (unload)
 1982                 bus_dmamap_unload(fl->ifl_desc_tag, sd->ifsd_map);
 1983 
 1984         if (__predict_false(++fl->ifl_cidx == fl->ifl_size)) {
 1985                 fl->ifl_cidx = 0;
 1986                 fl->ifl_gen = 0;
 1987         }
 1988         /* YES ick */
 1989         if (cltype)
 1990                 *cltype = fl->ifl_cltype;
 1991         return (sd);
 1992 }
 1993 
 1994 static struct mbuf *
 1995 assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri)
 1996 {
 1997         int i, padlen , flags, cltype;
 1998         struct mbuf *m, *mh, *mt;
 1999         iflib_rxsd_t sd;
 2000         caddr_t cl;
 2001 
 2002         i = 0;
 2003         mh = NULL;
 2004         do {
 2005                 sd = rxd_frag_to_sd(rxq, &ri->iri_frags[i], &cltype, TRUE);
 2006 
 2007                 MPASS(sd->ifsd_cl != NULL);
 2008                 MPASS(sd->ifsd_m != NULL);
 2009 
 2010                 /* Don't include zero-length frags */
 2011                 if (ri->iri_frags[i].irf_len == 0) {
 2012                         /* XXX we can save the cluster here, but not the mbuf */
 2013                         m_init(sd->ifsd_m, M_NOWAIT, MT_DATA, 0);
 2014                         m_free(sd->ifsd_m);
 2015                         sd->ifsd_m = NULL;
 2016                         continue;
 2017                 }
 2018 
 2019                 m = sd->ifsd_m;
 2020                 if (mh == NULL) {
 2021                         flags = M_PKTHDR|M_EXT;
 2022                         mh = mt = m;
 2023                         padlen = ri->iri_pad;
 2024                 } else {
 2025                         flags = M_EXT;
 2026                         mt->m_next = m;
 2027                         mt = m;
 2028                         /* assuming padding is only on the first fragment */
 2029                         padlen = 0;
 2030                 }
 2031                 sd->ifsd_m = NULL;
 2032                 cl = sd->ifsd_cl;
 2033                 sd->ifsd_cl = NULL;
 2034 
 2035                 /* Can these two be made one ? */
 2036                 m_init(m, M_NOWAIT, MT_DATA, flags);
 2037                 m_cljset(m, cl, cltype);
 2038                 /*
 2039                  * These must follow m_init and m_cljset
 2040                  */
 2041                 m->m_data += padlen;
 2042                 ri->iri_len -= padlen;
 2043                 m->m_len = ri->iri_frags[i].irf_len;
 2044         } while (++i < ri->iri_nfrags);
 2045 
 2046         return (mh);
 2047 }
 2048 
 2049 /*
 2050  * Process one software descriptor
 2051  */
 2052 static struct mbuf *
 2053 iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri)
 2054 {
 2055         struct mbuf *m;
 2056         iflib_rxsd_t sd;
 2057 
 2058         /* should I merge this back in now that the two paths are basically duplicated? */
 2059         if (ri->iri_nfrags == 1 &&
 2060             ri->iri_frags[0].irf_len <= IFLIB_RX_COPY_THRESH) {
 2061                 sd = rxd_frag_to_sd(rxq, &ri->iri_frags[0], NULL, FALSE);
 2062                 m = sd->ifsd_m;
 2063                 sd->ifsd_m = NULL;
 2064                 m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR);
 2065                 memcpy(m->m_data, sd->ifsd_cl, ri->iri_len);
 2066                 m->m_len = ri->iri_frags[0].irf_len;
 2067        } else {
 2068                 m = assemble_segments(rxq, ri);
 2069         }
 2070         m->m_pkthdr.len = ri->iri_len;
 2071         m->m_pkthdr.rcvif = ri->iri_ifp;
 2072         m->m_flags |= ri->iri_flags;
 2073         m->m_pkthdr.ether_vtag = ri->iri_vtag;
 2074         m->m_pkthdr.flowid = ri->iri_flowid;
 2075         M_HASHTYPE_SET(m, ri->iri_rsstype);
 2076         m->m_pkthdr.csum_flags = ri->iri_csum_flags;
 2077         m->m_pkthdr.csum_data = ri->iri_csum_data;
 2078         return (m);
 2079 }
 2080 
 2081 static bool
 2082 iflib_rxeof(iflib_rxq_t rxq, int budget)
 2083 {
 2084         if_ctx_t ctx = rxq->ifr_ctx;
 2085         if_shared_ctx_t sctx = ctx->ifc_sctx;
 2086         if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 2087         int avail, i;
 2088         uint16_t *cidxp;
 2089         struct if_rxd_info ri;
 2090         int err, budget_left, rx_bytes, rx_pkts;
 2091         iflib_fl_t fl;
 2092         struct ifnet *ifp;
 2093         int lro_enabled;
 2094         /*
 2095          * XXX early demux data packets so that if_input processing only handles
 2096          * acks in interrupt context
 2097          */
 2098         struct mbuf *m, *mh, *mt;
 2099 
 2100         if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &budget)) {
 2101                 return (FALSE);
 2102         }
 2103 
 2104         mh = mt = NULL;
 2105         MPASS(budget > 0);
 2106         rx_pkts = rx_bytes = 0;
 2107         if (sctx->isc_flags & IFLIB_HAS_RXCQ)
 2108                 cidxp = &rxq->ifr_cq_cidx;
 2109         else
 2110                 cidxp = &rxq->ifr_fl[0].ifl_cidx;
 2111         if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) {
 2112                 for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
 2113                         __iflib_fl_refill_lt(ctx, fl, budget + 8);
 2114                 DBG_COUNTER_INC(rx_unavail);
 2115                 return (false);
 2116         }
 2117 
 2118         for (budget_left = budget; (budget_left > 0) && (avail > 0); budget_left--, avail--) {
 2119                 if (__predict_false(!CTX_ACTIVE(ctx))) {
 2120                         DBG_COUNTER_INC(rx_ctx_inactive);
 2121                         break;
 2122                 }
 2123                 /*
 2124                  * Reset client set fields to their default values
 2125                  */
 2126                 bzero(&ri, sizeof(ri));
 2127                 ri.iri_qsidx = rxq->ifr_id;
 2128                 ri.iri_cidx = *cidxp;
 2129                 ri.iri_ifp = ctx->ifc_ifp;
 2130                 ri.iri_frags = rxq->ifr_frags;
 2131                 err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
 2132 
 2133                 /* in lieu of handling correctly - make sure it isn't being unhandled */
 2134                 MPASS(err == 0);
 2135                 if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
 2136                         *cidxp = ri.iri_cidx;
 2137                         /* Update our consumer index */
 2138                         while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) {
 2139                                 rxq->ifr_cq_cidx -= scctx->isc_nrxd[0];
 2140                                 rxq->ifr_cq_gen = 0;
 2141                         }
 2142                         /* was this only a completion queue message? */
 2143                         if (__predict_false(ri.iri_nfrags == 0))
 2144                                 continue;
 2145                 }
 2146                 MPASS(ri.iri_nfrags != 0);
 2147                 MPASS(ri.iri_len != 0);
 2148 
 2149                 /* will advance the cidx on the corresponding free lists */
 2150                 m = iflib_rxd_pkt_get(rxq, &ri);
 2151                 if (avail == 0 && budget_left)
 2152                         avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left);
 2153 
 2154                 if (__predict_false(m == NULL)) {
 2155                         DBG_COUNTER_INC(rx_mbuf_null);
 2156                         continue;
 2157                 }
 2158                 /* imm_pkt: -- cxgb */
 2159                 if (mh == NULL)
 2160                         mh = mt = m;
 2161                 else {
 2162                         mt->m_nextpkt = m;
 2163                         mt = m;
 2164                 }
 2165         }
 2166         /* make sure that we can refill faster than drain */
 2167         for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
 2168                 __iflib_fl_refill_lt(ctx, fl, budget + 8);
 2169 
 2170         ifp = ctx->ifc_ifp;
 2171         lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO);
 2172         while (mh != NULL) {
 2173                 m = mh;
 2174                 mh = mh->m_nextpkt;
 2175                 m->m_nextpkt = NULL;
 2176                 rx_bytes += m->m_pkthdr.len;
 2177                 rx_pkts++;
 2178 #if defined(INET6) || defined(INET)
 2179                 if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0)
 2180                         continue;
 2181 #endif
 2182                 DBG_COUNTER_INC(rx_if_input);
 2183                 ifp->if_input(ifp, m);
 2184         }
 2185 
 2186         if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes);
 2187         if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts);
 2188 
 2189         /*
 2190          * Flush any outstanding LRO work
 2191          */
 2192 #if defined(INET6) || defined(INET)
 2193         tcp_lro_flush_all(&rxq->ifr_lc);
 2194 #endif
 2195         if (avail)
 2196                 return true;
 2197         return (iflib_rxd_avail(ctx, rxq, *cidxp, 1));
 2198 }
 2199 
 2200 #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags)
 2201 #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG)
 2202 #define TXQ_MAX_DB_DEFERRED(size) (size >> 5)
 2203 #define TXQ_MAX_DB_CONSUMED(size) (size >> 4)
 2204 
 2205 static __inline void
 2206 iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring)
 2207 {
 2208         uint32_t dbval;
 2209 
 2210         if (ring || txq->ift_db_pending >=
 2211             TXQ_MAX_DB_DEFERRED(txq->ift_size)) {
 2212 
 2213                 /* the lock will only ever be contended in the !min_latency case */
 2214                 if (!TXDB_TRYLOCK(txq))
 2215                         return;
 2216                 dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx;
 2217                 ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval);
 2218                 txq->ift_db_pending = txq->ift_npending = 0;
 2219                 TXDB_UNLOCK(txq);
 2220         }
 2221 }
 2222 
 2223 static void
 2224 iflib_txd_deferred_db_check(void * arg)
 2225 {
 2226         iflib_txq_t txq = arg;
 2227 
 2228         /* simple non-zero boolean so use bitwise OR */
 2229         if ((txq->ift_db_pending | txq->ift_npending) &&
 2230             txq->ift_db_pending >= txq->ift_db_pending_queued)
 2231                 iflib_txd_db_check(txq->ift_ctx, txq, TRUE);
 2232         txq->ift_db_pending_queued = 0;
 2233         if (ifmp_ring_is_stalled(txq->ift_br[0]))
 2234                 iflib_txq_check_drain(txq, 4);
 2235 }
 2236 
 2237 #ifdef PKT_DEBUG
 2238 static void
 2239 print_pkt(if_pkt_info_t pi)
 2240 {
 2241         printf("pi len:  %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n",
 2242                pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx);
 2243         printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n",
 2244                pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag);
 2245         printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n",
 2246                pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto);
 2247 }
 2248 #endif
 2249 
 2250 #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO)
 2251 #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO)
 2252 
 2253 static int
 2254 iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
 2255 {
 2256         struct ether_vlan_header *eh;
 2257         struct mbuf *m, *n;
 2258 
 2259         n = m = *mp;
 2260         /*
 2261          * Determine where frame payload starts.
 2262          * Jump over vlan headers if already present,
 2263          * helpful for QinQ too.
 2264          */
 2265         if (__predict_false(m->m_len < sizeof(*eh))) {
 2266                 txq->ift_pullups++;
 2267                 if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL))
 2268                         return (ENOMEM);
 2269         }
 2270         eh = mtod(m, struct ether_vlan_header *);
 2271         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 2272                 pi->ipi_etype = ntohs(eh->evl_proto);
 2273                 pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 2274         } else {
 2275                 pi->ipi_etype = ntohs(eh->evl_encap_proto);
 2276                 pi->ipi_ehdrlen = ETHER_HDR_LEN;
 2277         }
 2278 
 2279         switch (pi->ipi_etype) {
 2280 #ifdef INET
 2281         case ETHERTYPE_IP:
 2282         {
 2283                 struct ip *ip = NULL;
 2284                 struct tcphdr *th = NULL;
 2285                 int minthlen;
 2286 
 2287                 minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th));
 2288                 if (__predict_false(m->m_len < minthlen)) {
 2289                         /*
 2290                          * if this code bloat is causing too much of a hit
 2291                          * move it to a separate function and mark it noinline
 2292                          */
 2293                         if (m->m_len == pi->ipi_ehdrlen) {
 2294                                 n = m->m_next;
 2295                                 MPASS(n);
 2296                                 if (n->m_len >= sizeof(*ip))  {
 2297                                         ip = (struct ip *)n->m_data;
 2298                                         if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th))
 2299                                                 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 2300                                 } else {
 2301                                         txq->ift_pullups++;
 2302                                         if (__predict_false((m = m_pullup(m, minthlen)) == NULL))
 2303                                                 return (ENOMEM);
 2304                                         ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
 2305                                 }
 2306                         } else {
 2307                                 txq->ift_pullups++;
 2308                                 if (__predict_false((m = m_pullup(m, minthlen)) == NULL))
 2309                                         return (ENOMEM);
 2310                                 ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
 2311                                 if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
 2312                                         th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 2313                         }
 2314                 } else {
 2315                         ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
 2316                         if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
 2317                                 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 2318                 }
 2319                 pi->ipi_ip_hlen = ip->ip_hl << 2;
 2320                 pi->ipi_ipproto = ip->ip_p;
 2321                 pi->ipi_flags |= IPI_TX_IPV4;
 2322 
 2323                 if (pi->ipi_csum_flags & CSUM_IP)
 2324                        ip->ip_sum = 0;
 2325 
 2326                 if (pi->ipi_ipproto == IPPROTO_TCP) {
 2327                         if (__predict_false(th == NULL)) {
 2328                                 txq->ift_pullups++;
 2329                                 if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
 2330                                         return (ENOMEM);
 2331                                 th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
 2332                         }
 2333                         pi->ipi_tcp_hflags = th->th_flags;
 2334                         pi->ipi_tcp_hlen = th->th_off << 2;
 2335                         pi->ipi_tcp_seq = th->th_seq;
 2336                 }
 2337                 if (IS_TSO4(pi)) {
 2338                         if (__predict_false(ip->ip_p != IPPROTO_TCP))
 2339                                 return (ENXIO);
 2340                         th->th_sum = in_pseudo(ip->ip_src.s_addr,
 2341                                                ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 2342                         pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
 2343                 }
 2344                 break;
 2345         }
 2346 #endif
 2347 #ifdef INET6
 2348         case ETHERTYPE_IPV6:
 2349         {
 2350                 struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen);
 2351                 struct tcphdr *th;
 2352                 pi->ipi_ip_hlen = sizeof(struct ip6_hdr);
 2353 
 2354                 if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) {
 2355                         if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL))
 2356                                 return (ENOMEM);
 2357                 }
 2358                 th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen);
 2359 
 2360                 /* XXX-BZ this will go badly in case of ext hdrs. */
 2361                 pi->ipi_ipproto = ip6->ip6_nxt;
 2362                 pi->ipi_flags |= IPI_TX_IPV6;
 2363 
 2364                 if (pi->ipi_ipproto == IPPROTO_TCP) {
 2365                         if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
 2366                                 if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
 2367                                         return (ENOMEM);
 2368                         }
 2369                         pi->ipi_tcp_hflags = th->th_flags;
 2370                         pi->ipi_tcp_hlen = th->th_off << 2;
 2371                 }
 2372                 if (IS_TSO6(pi)) {
 2373 
 2374                         if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP))
 2375                                 return (ENXIO);
 2376                         /*
 2377                          * The corresponding flag is set by the stack in the IPv4
 2378                          * TSO case, but not in IPv6 (at least in FreeBSD 10.2).
 2379                          * So, set it here because the rest of the flow requires it.
 2380                          */
 2381                         pi->ipi_csum_flags |= CSUM_TCP_IPV6;
 2382                         th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
 2383                         pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
 2384                 }
 2385                 break;
 2386         }
 2387 #endif
 2388         default:
 2389                 pi->ipi_csum_flags &= ~CSUM_OFFLOAD;
 2390                 pi->ipi_ip_hlen = 0;
 2391                 break;
 2392         }
 2393         *mp = m;
 2394         return (0);
 2395 }
 2396 
 2397 
 2398 static  __noinline  struct mbuf *
 2399 collapse_pkthdr(struct mbuf *m0)
 2400 {
 2401         struct mbuf *m, *m_next, *tmp;
 2402 
 2403         m = m0;
 2404         m_next = m->m_next;
 2405         while (m_next != NULL && m_next->m_len == 0) {
 2406                 m = m_next;
 2407                 m->m_next = NULL;
 2408                 m_free(m);
 2409                 m_next = m_next->m_next;
 2410         }
 2411         m = m0;
 2412         m->m_next = m_next;
 2413         if ((m_next->m_flags & M_EXT) == 0) {
 2414                 m = m_defrag(m, M_NOWAIT);
 2415         } else {
 2416                 tmp = m_next->m_next;
 2417                 memcpy(m_next, m, MPKTHSIZE);
 2418                 m = m_next;
 2419                 m->m_next = tmp;
 2420         }
 2421         return (m);
 2422 }
 2423 
 2424 /*
 2425  * If dodgy hardware rejects the scatter gather chain we've handed it
 2426  * we'll need to remove the mbuf chain from ifsg_m[] before we can add the
 2427  * m_defrag'd mbufs
 2428  */
 2429 static __noinline struct mbuf *
 2430 iflib_remove_mbuf(iflib_txq_t txq)
 2431 {
 2432         int ntxd, i, pidx;
 2433         struct mbuf *m, *mh, **ifsd_m;
 2434 
 2435         pidx = txq->ift_pidx;
 2436         ifsd_m = txq->ift_sds.ifsd_m;
 2437         ntxd = txq->ift_size;
 2438         mh = m = ifsd_m[pidx];
 2439         ifsd_m[pidx] = NULL;
 2440 #if MEMORY_LOGGING
 2441         txq->ift_dequeued++;
 2442 #endif
 2443         i = 1;
 2444 
 2445         while (m) {
 2446                 ifsd_m[(pidx + i) & (ntxd -1)] = NULL;
 2447 #if MEMORY_LOGGING
 2448                 txq->ift_dequeued++;
 2449 #endif
 2450                 m = m->m_next;
 2451                 i++;
 2452         }
 2453         return (mh);
 2454 }
 2455 
 2456 static int
 2457 iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map,
 2458                           struct mbuf **m0, bus_dma_segment_t *segs, int *nsegs,
 2459                           int max_segs, int flags)
 2460 {
 2461         if_ctx_t ctx;
 2462         if_shared_ctx_t         sctx;
 2463         if_softc_ctx_t          scctx;
 2464         int i, next, pidx, mask, err, maxsegsz, ntxd, count;
 2465         struct mbuf *m, *tmp, **ifsd_m, **mp;
 2466 
 2467         m = *m0;
 2468 
 2469         /*
 2470          * Please don't ever do this
 2471          */
 2472         if (__predict_false(m->m_len == 0))
 2473                 *m0 = m = collapse_pkthdr(m);
 2474 
 2475         ctx = txq->ift_ctx;
 2476         sctx = ctx->ifc_sctx;
 2477         scctx = &ctx->ifc_softc_ctx;
 2478         ifsd_m = txq->ift_sds.ifsd_m;
 2479         ntxd = txq->ift_size;
 2480         pidx = txq->ift_pidx;
 2481         if (map != NULL) {
 2482                 uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags;
 2483 
 2484                 err = bus_dmamap_load_mbuf_sg(tag, map,
 2485                                               *m0, segs, nsegs, BUS_DMA_NOWAIT);
 2486                 if (err)
 2487                         return (err);
 2488                 ifsd_flags[pidx] |= TX_SW_DESC_MAPPED;
 2489                 i = 0;
 2490                 next = pidx;
 2491                 mask = (txq->ift_size-1);
 2492                 m = *m0;
 2493                 do {
 2494                         mp = &ifsd_m[next];
 2495                         *mp = m;
 2496                         m = m->m_next;
 2497                         if (__predict_false((*mp)->m_len == 0)) {
 2498                                 m_free(*mp);
 2499                                 *mp = NULL;
 2500                         } else
 2501                                 next = (pidx + i) & (ntxd-1);
 2502                 } while (m != NULL);
 2503         } else {
 2504                 int buflen, sgsize, max_sgsize;
 2505                 vm_offset_t vaddr;
 2506                 vm_paddr_t curaddr;
 2507 
 2508                 count = i = 0;
 2509                 maxsegsz = sctx->isc_tx_maxsize;
 2510                 m = *m0;
 2511                 do {
 2512                         if (__predict_false(m->m_len <= 0)) {
 2513                                 tmp = m;
 2514                                 m = m->m_next;
 2515                                 tmp->m_next = NULL;
 2516                                 m_free(tmp);
 2517                                 continue;
 2518                         }
 2519                         buflen = m->m_len;
 2520                         vaddr = (vm_offset_t)m->m_data;
 2521                         /*
 2522                          * see if we can't be smarter about physically
 2523                          * contiguous mappings
 2524                          */
 2525                         next = (pidx + count) & (ntxd-1);
 2526                         MPASS(ifsd_m[next] == NULL);
 2527 #if MEMORY_LOGGING
 2528                         txq->ift_enqueued++;
 2529 #endif
 2530                         ifsd_m[next] = m;
 2531                         while (buflen > 0) {
 2532                                 max_sgsize = MIN(buflen, maxsegsz);
 2533                                 curaddr = pmap_kextract(vaddr);
 2534                                 sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
 2535                                 sgsize = MIN(sgsize, max_sgsize);
 2536                                 segs[i].ds_addr = curaddr;
 2537                                 segs[i].ds_len = sgsize;
 2538                                 vaddr += sgsize;
 2539                                 buflen -= sgsize;
 2540                                 i++;
 2541                                 if (i >= max_segs)
 2542                                         goto err;
 2543                         }
 2544                         count++;
 2545                         tmp = m;
 2546                         m = m->m_next;
 2547                 } while (m != NULL);
 2548                 *nsegs = i;
 2549         }
 2550         return (0);
 2551 err:
 2552         *m0 = iflib_remove_mbuf(txq);
 2553         return (EFBIG);
 2554 }
 2555 
 2556 static int
 2557 iflib_encap(iflib_txq_t txq, struct mbuf **m_headp)
 2558 {
 2559         if_ctx_t                ctx;
 2560         if_shared_ctx_t         sctx;
 2561         if_softc_ctx_t          scctx;
 2562         bus_dma_segment_t       *segs;
 2563         struct mbuf             *m_head;
 2564         bus_dmamap_t            map;
 2565         struct if_pkt_info      pi;
 2566         int remap = 0;
 2567         int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd;
 2568         bus_dma_tag_t desc_tag;
 2569 
 2570         segs = txq->ift_segs;
 2571         ctx = txq->ift_ctx;
 2572         sctx = ctx->ifc_sctx;
 2573         scctx = &ctx->ifc_softc_ctx;
 2574         segs = txq->ift_segs;
 2575         ntxd = txq->ift_size;
 2576         m_head = *m_headp;
 2577         map = NULL;
 2578 
 2579         /*
 2580          * If we're doing TSO the next descriptor to clean may be quite far ahead
 2581          */
 2582         cidx = txq->ift_cidx;
 2583         pidx = txq->ift_pidx;
 2584         next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1);
 2585 
 2586         /* prefetch the next cache line of mbuf pointers and flags */
 2587         prefetch(&txq->ift_sds.ifsd_m[next]);
 2588         if (txq->ift_sds.ifsd_map != NULL) {
 2589                 prefetch(&txq->ift_sds.ifsd_map[next]);
 2590                 map = txq->ift_sds.ifsd_map[pidx];
 2591                 next = (cidx + CACHE_LINE_SIZE) & (ntxd-1);
 2592                 prefetch(&txq->ift_sds.ifsd_flags[next]);
 2593         }
 2594 
 2595 
 2596         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 2597                 desc_tag = txq->ift_tso_desc_tag;
 2598                 max_segs = scctx->isc_tx_tso_segments_max;
 2599         } else {
 2600                 desc_tag = txq->ift_desc_tag;
 2601                 max_segs = scctx->isc_tx_nsegments;
 2602         }
 2603         m_head = *m_headp;
 2604         bzero(&pi, sizeof(pi));
 2605         pi.ipi_len = m_head->m_pkthdr.len;
 2606         pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST));
 2607         pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags;
 2608         pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0;
 2609         pi.ipi_pidx = pidx;
 2610         pi.ipi_qsidx = txq->ift_id;
 2611 
 2612         /* deliberate bitwise OR to make one condition */
 2613         if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) {
 2614                 if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0))
 2615                         return (err);
 2616                 m_head = *m_headp;
 2617         }
 2618 
 2619 retry:
 2620         err = iflib_busdma_load_mbuf_sg(txq, desc_tag, map, m_headp, segs, &nsegs, max_segs, BUS_DMA_NOWAIT);
 2621 defrag:
 2622         if (__predict_false(err)) {
 2623                 switch (err) {
 2624                 case EFBIG:
 2625                         /* try collapse once and defrag once */
 2626                         if (remap == 0)
 2627                                 m_head = m_collapse(*m_headp, M_NOWAIT, max_segs);
 2628                         if (remap == 1)
 2629                                 m_head = m_defrag(*m_headp, M_NOWAIT);
 2630                         remap++;
 2631                         if (__predict_false(m_head == NULL))
 2632                                 goto defrag_failed;
 2633                         txq->ift_mbuf_defrag++;
 2634                         *m_headp = m_head;
 2635                         goto retry;
 2636                         break;
 2637                 case ENOMEM:
 2638                         txq->ift_no_tx_dma_setup++;
 2639                         break;
 2640                 default:
 2641                         txq->ift_no_tx_dma_setup++;
 2642                         m_freem(*m_headp);
 2643                         DBG_COUNTER_INC(tx_frees);
 2644                         *m_headp = NULL;
 2645                         break;
 2646                 }
 2647                 txq->ift_map_failed++;
 2648                 DBG_COUNTER_INC(encap_load_mbuf_fail);
 2649                 return (err);
 2650         }
 2651 
 2652         /*
 2653          * XXX assumes a 1 to 1 relationship between segments and
 2654          *        descriptors - this does not hold true on all drivers, e.g.
 2655          *        cxgb
 2656          */
 2657         if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) {
 2658                 txq->ift_no_desc_avail++;
 2659                 if (map != NULL)
 2660                         bus_dmamap_unload(desc_tag, map);
 2661                 DBG_COUNTER_INC(encap_txq_avail_fail);
 2662                 if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0)
 2663                         GROUPTASK_ENQUEUE(&txq->ift_task);
 2664                 return (ENOBUFS);
 2665         }
 2666         pi.ipi_segs = segs;
 2667         pi.ipi_nsegs = nsegs;
 2668 
 2669         MPASS(pidx >= 0 && pidx < txq->ift_size);
 2670 #ifdef PKT_DEBUG
 2671         print_pkt(&pi);
 2672 #endif
 2673         if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) {
 2674                 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
 2675                                                 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 2676 
 2677                 DBG_COUNTER_INC(tx_encap);
 2678                 MPASS(pi.ipi_new_pidx >= 0 &&
 2679                     pi.ipi_new_pidx < txq->ift_size);
 2680 
 2681                 ndesc = pi.ipi_new_pidx - pi.ipi_pidx;
 2682                 if (pi.ipi_new_pidx < pi.ipi_pidx) {
 2683                         ndesc += txq->ift_size;
 2684                         txq->ift_gen = 1;
 2685                 }
 2686                 MPASS(pi.ipi_new_pidx != pidx);
 2687                 MPASS(ndesc > 0);
 2688                 txq->ift_in_use += ndesc;
 2689                 /*
 2690                  * We update the last software descriptor again here because there may
 2691                  * be a sentinel and/or there may be more mbufs than segments
 2692                  */
 2693                 txq->ift_pidx = pi.ipi_new_pidx;
 2694                 txq->ift_npending += pi.ipi_ndescs;
 2695         } else if (__predict_false(err == EFBIG && remap < 2)) {
 2696                 *m_headp = m_head = iflib_remove_mbuf(txq);
 2697                 remap = 1;
 2698                 txq->ift_txd_encap_efbig++;
 2699                 goto defrag;
 2700         } else
 2701                 DBG_COUNTER_INC(encap_txd_encap_fail);
 2702         return (err);
 2703 
 2704 defrag_failed:
 2705         txq->ift_mbuf_defrag_failed++;
 2706         txq->ift_map_failed++;
 2707         m_freem(*m_headp);
 2708         DBG_COUNTER_INC(tx_frees);
 2709         *m_headp = NULL;
 2710         return (ENOMEM);
 2711 }
 2712 
 2713 /* forward compatibility for cxgb */
 2714 #define FIRST_QSET(ctx) 0
 2715 
 2716 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets)
 2717 #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets)
 2718 #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx))
 2719 #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments))
 2720 #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh)
 2721 #define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max)
 2722 
 2723 
 2724 
 2725 /* if there are more than TXQ_MIN_OCCUPANCY packets pending we consider deferring
 2726  * doorbell writes
 2727  *
 2728  * ORing with 2 assures that min occupancy is never less than 2 without any conditional logic
 2729  */
 2730 #define TXQ_MIN_OCCUPANCY(size) ((size >> 6)| 0x2)
 2731 
 2732 static inline int
 2733 iflib_txq_min_occupancy(iflib_txq_t txq)
 2734 {
 2735         if_ctx_t ctx;
 2736 
 2737         ctx = txq->ift_ctx;
 2738         return (get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx,
 2739             txq->ift_gen) < TXQ_MIN_OCCUPANCY(txq->ift_size) +
 2740             MAX_TX_DESC(ctx));
 2741 }
 2742 
 2743 static void
 2744 iflib_tx_desc_free(iflib_txq_t txq, int n)
 2745 {
 2746         int hasmap;
 2747         uint32_t qsize, cidx, mask, gen;
 2748         struct mbuf *m, **ifsd_m;
 2749         uint8_t *ifsd_flags;
 2750         bus_dmamap_t *ifsd_map;
 2751 
 2752         cidx = txq->ift_cidx;
 2753         gen = txq->ift_gen;
 2754         qsize = txq->ift_size;
 2755         mask = qsize-1;
 2756         hasmap = txq->ift_sds.ifsd_map != NULL;
 2757         ifsd_flags = txq->ift_sds.ifsd_flags;
 2758         ifsd_m = txq->ift_sds.ifsd_m;
 2759         ifsd_map = txq->ift_sds.ifsd_map;
 2760 
 2761         while (n--) {
 2762                 prefetch(ifsd_m[(cidx + 3) & mask]);
 2763                 prefetch(ifsd_m[(cidx + 4) & mask]);
 2764 
 2765                 if (ifsd_m[cidx] != NULL) {
 2766                         prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]);
 2767                         prefetch(&ifsd_flags[(cidx + CACHE_PTR_INCREMENT) & mask]);
 2768                         if (hasmap && (ifsd_flags[cidx] & TX_SW_DESC_MAPPED)) {
 2769                                 /*
 2770                                  * does it matter if it's not the TSO tag? If so we'll
 2771                                  * have to add the type to flags
 2772                                  */
 2773                                 bus_dmamap_unload(txq->ift_desc_tag, ifsd_map[cidx]);
 2774                                 ifsd_flags[cidx] &= ~TX_SW_DESC_MAPPED;
 2775                         }
 2776                         if ((m = ifsd_m[cidx]) != NULL) {
 2777                                 /* XXX we don't support any drivers that batch packets yet */
 2778                                 MPASS(m->m_nextpkt == NULL);
 2779 
 2780                                 m_free(m);
 2781                                 ifsd_m[cidx] = NULL;
 2782 #if MEMORY_LOGGING
 2783                                 txq->ift_dequeued++;
 2784 #endif
 2785                                 DBG_COUNTER_INC(tx_frees);
 2786                         }
 2787                 }
 2788                 if (__predict_false(++cidx == qsize)) {
 2789                         cidx = 0;
 2790                         gen = 0;
 2791                 }
 2792         }
 2793         txq->ift_cidx = cidx;
 2794         txq->ift_gen = gen;
 2795 }
 2796 
 2797 static __inline int
 2798 iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh)
 2799 {
 2800         int reclaim;
 2801         if_ctx_t ctx = txq->ift_ctx;
 2802 
 2803         KASSERT(thresh >= 0, ("invalid threshold to reclaim"));
 2804         MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size);
 2805 
 2806         /*
 2807          * Need a rate-limiting check so that this isn't called every time
 2808          */
 2809         iflib_tx_credits_update(ctx, txq);
 2810         reclaim = DESC_RECLAIMABLE(txq);
 2811 
 2812         if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) {
 2813 #ifdef INVARIANTS
 2814                 if (iflib_verbose_debug) {
 2815                         printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__,
 2816                                txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments,
 2817                                reclaim, thresh);
 2818 
 2819                 }
 2820 #endif
 2821                 return (0);
 2822         }
 2823         iflib_tx_desc_free(txq, reclaim);
 2824         txq->ift_cleaned += reclaim;
 2825         txq->ift_in_use -= reclaim;
 2826 
 2827         if (txq->ift_active == FALSE)
 2828                 txq->ift_active = TRUE;
 2829 
 2830         return (reclaim);
 2831 }
 2832 
 2833 static struct mbuf **
 2834 _ring_peek_one(struct ifmp_ring *r, int cidx, int offset)
 2835 {
 2836 
 2837         return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (r->size-1)]));
 2838 }
 2839 
 2840 static void
 2841 iflib_txq_check_drain(iflib_txq_t txq, int budget)
 2842 {
 2843 
 2844         ifmp_ring_check_drainage(txq->ift_br[0], budget);
 2845 }
 2846 
 2847 static uint32_t
 2848 iflib_txq_can_drain(struct ifmp_ring *r)
 2849 {
 2850         iflib_txq_t txq = r->cookie;
 2851         if_ctx_t ctx = txq->ift_ctx;
 2852 
 2853         return ((TXQ_AVAIL(txq) >= MAX_TX_DESC(ctx)) ||
 2854                 ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, txq->ift_cidx_processed, false));
 2855 }
 2856 
 2857 static uint32_t
 2858 iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
 2859 {
 2860         iflib_txq_t txq = r->cookie;
 2861         if_ctx_t ctx = txq->ift_ctx;
 2862         if_t ifp = ctx->ifc_ifp;
 2863         struct mbuf **mp, *m;
 2864         int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail, err, in_use_prev, desc_used;
 2865 
 2866         if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) ||
 2867                             !LINK_ACTIVE(ctx))) {
 2868                 DBG_COUNTER_INC(txq_drain_notready);
 2869                 return (0);
 2870         }
 2871 
 2872         avail = IDXDIFF(pidx, cidx, r->size);
 2873         if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) {
 2874                 DBG_COUNTER_INC(txq_drain_flushing);
 2875                 for (i = 0; i < avail; i++) {
 2876                         m_free(r->items[(cidx + i) & (r->size-1)]);
 2877                         r->items[(cidx + i) & (r->size-1)] = NULL;
 2878                 }
 2879                 return (avail);
 2880         }
 2881         iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));
 2882         if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) {
 2883                 txq->ift_qstatus = IFLIB_QUEUE_IDLE;
 2884                 CALLOUT_LOCK(txq);
 2885                 callout_stop(&txq->ift_timer);
 2886                 callout_stop(&txq->ift_db_check);
 2887                 CALLOUT_UNLOCK(txq);
 2888                 DBG_COUNTER_INC(txq_drain_oactive);
 2889                 return (0);
 2890         }
 2891         consumed = mcast_sent = bytes_sent = pkt_sent = 0;
 2892         count = MIN(avail, TX_BATCH_SIZE);
 2893 
 2894         for (desc_used = i = 0; i < count && TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2; i++) {
 2895                 mp = _ring_peek_one(r, cidx, i);
 2896                 in_use_prev = txq->ift_in_use;
 2897                 err = iflib_encap(txq, mp);
 2898                 /*
 2899                  * What other errors should we bail out for?
 2900                  */
 2901                 if (err == ENOBUFS) {
 2902                         DBG_COUNTER_INC(txq_drain_encapfail);
 2903                         break;
 2904                 }
 2905                 consumed++;
 2906                 if (err)
 2907                         continue;
 2908 
 2909                 pkt_sent++;
 2910                 m = *mp;
 2911                 DBG_COUNTER_INC(tx_sent);
 2912                 bytes_sent += m->m_pkthdr.len;
 2913                 if (m->m_flags & M_MCAST)
 2914                         mcast_sent++;
 2915 
 2916                 txq->ift_db_pending += (txq->ift_in_use - in_use_prev);
 2917                 desc_used += (txq->ift_in_use - in_use_prev);
 2918                 iflib_txd_db_check(ctx, txq, FALSE);
 2919                 ETHER_BPF_MTAP(ifp, m);
 2920                 if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
 2921                         break;
 2922 
 2923                 if (desc_used > TXQ_MAX_DB_CONSUMED(txq->ift_size))
 2924                         break;
 2925         }
 2926 
 2927         if ((iflib_min_tx_latency || iflib_txq_min_occupancy(txq)) && txq->ift_db_pending)
 2928                 iflib_txd_db_check(ctx, txq, TRUE);
 2929         else if ((txq->ift_db_pending || TXQ_AVAIL(txq) < MAX_TX_DESC(ctx)) &&
 2930                  (callout_pending(&txq->ift_db_check) == 0)) {
 2931                 txq->ift_db_pending_queued = txq->ift_db_pending;
 2932                 callout_reset_on(&txq->ift_db_check, 1, iflib_txd_deferred_db_check,
 2933                                  txq, txq->ift_db_check.c_cpu);
 2934         }
 2935         if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent);
 2936         if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent);
 2937         if (mcast_sent)
 2938                 if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent);
 2939 
 2940         return (consumed);
 2941 }
 2942 
 2943 static void
 2944 _task_fn_tx(void *context)
 2945 {
 2946         iflib_txq_t txq = context;
 2947         if_ctx_t ctx = txq->ift_ctx;
 2948 
 2949         if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 2950                 return;
 2951         ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE);
 2952 }
 2953 
 2954 static void
 2955 _task_fn_rx(void *context)
 2956 {
 2957         iflib_rxq_t rxq = context;
 2958         if_ctx_t ctx = rxq->ifr_ctx;
 2959         bool more;
 2960         int rc;
 2961 
 2962         DBG_COUNTER_INC(task_fn_rxs);
 2963         if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
 2964                 return;
 2965 
 2966         if ((more = iflib_rxeof(rxq, 16 /* XXX */)) == false) {
 2967                 if (ctx->ifc_flags & IFC_LEGACY)
 2968                         IFDI_INTR_ENABLE(ctx);
 2969                 else {
 2970                         DBG_COUNTER_INC(rx_intr_enables);
 2971                         rc = IFDI_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
 2972                         KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver"));
 2973                 }
 2974         }
 2975         if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
 2976                 return;
 2977         if (more)
 2978                 GROUPTASK_ENQUEUE(&rxq->ifr_task);
 2979 }
 2980 
 2981 static void
 2982 _task_fn_admin(void *context)
 2983 {
 2984         if_ctx_t ctx = context;
 2985         if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 2986         iflib_txq_t txq;
 2987         int i;
 2988 
 2989         if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 2990                 return;
 2991 
 2992         CTX_LOCK(ctx);
 2993         for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) {
 2994                 CALLOUT_LOCK(txq);
 2995                 callout_stop(&txq->ift_timer);
 2996                 CALLOUT_UNLOCK(txq);
 2997         }
 2998         IFDI_UPDATE_ADMIN_STATUS(ctx);
 2999         for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
 3000                 callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
 3001         IFDI_LINK_INTR_ENABLE(ctx);
 3002         CTX_UNLOCK(ctx);
 3003 
 3004         if (LINK_ACTIVE(ctx) == 0)
 3005                 return;
 3006         for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
 3007                 iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET);
 3008 }
 3009 
 3010 
 3011 static void
 3012 _task_fn_iov(void *context)
 3013 {
 3014         if_ctx_t ctx = context;
 3015 
 3016         if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 3017                 return;
 3018 
 3019         CTX_LOCK(ctx);
 3020         IFDI_VFLR_HANDLE(ctx);
 3021         CTX_UNLOCK(ctx);
 3022 }
 3023 
 3024 static int
 3025 iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
 3026 {
 3027         int err;
 3028         if_int_delay_info_t info;
 3029         if_ctx_t ctx;
 3030 
 3031         info = (if_int_delay_info_t)arg1;
 3032         ctx = info->iidi_ctx;
 3033         info->iidi_req = req;
 3034         info->iidi_oidp = oidp;
 3035         CTX_LOCK(ctx);
 3036         err = IFDI_SYSCTL_INT_DELAY(ctx, info);
 3037         CTX_UNLOCK(ctx);
 3038         return (err);
 3039 }
 3040 
 3041 /*********************************************************************
 3042  *
 3043  *  IFNET FUNCTIONS
 3044  *
 3045  **********************************************************************/
 3046 
 3047 static void
 3048 iflib_if_init_locked(if_ctx_t ctx)
 3049 {
 3050         iflib_stop(ctx);
 3051         iflib_init_locked(ctx);
 3052 }
 3053 
 3054 
 3055 static void
 3056 iflib_if_init(void *arg)
 3057 {
 3058         if_ctx_t ctx = arg;
 3059 
 3060         CTX_LOCK(ctx);
 3061         iflib_if_init_locked(ctx);
 3062         CTX_UNLOCK(ctx);
 3063 }
 3064 
 3065 static int
 3066 iflib_if_transmit(if_t ifp, struct mbuf *m)
 3067 {
 3068         if_ctx_t        ctx = if_getsoftc(ifp);
 3069 
 3070         iflib_txq_t txq;
 3071         int err, qidx;
 3072 
 3073         if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) {
 3074                 DBG_COUNTER_INC(tx_frees);
 3075                 m_freem(m);
 3076                 return (0);
 3077         }
 3078 
 3079         MPASS(m->m_nextpkt == NULL);
 3080         qidx = 0;
 3081         if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m))
 3082                 qidx = QIDX(ctx, m);
 3083         /*
 3084          * XXX calculate buf_ring based on flowid (divvy up bits?)
 3085          */
 3086         txq = &ctx->ifc_txqs[qidx];
 3087 
 3088 #ifdef DRIVER_BACKPRESSURE
 3089         if (txq->ift_closed) {
 3090                 while (m != NULL) {
 3091                         next = m->m_nextpkt;
 3092                         m->m_nextpkt = NULL;
 3093                         m_freem(m);
 3094                         m = next;
 3095                 }
 3096                 return (ENOBUFS);
 3097         }
 3098 #endif
 3099 #ifdef notyet
 3100         qidx = count = 0;
 3101         mp = marr;
 3102         next = m;
 3103         do {
 3104                 count++;
 3105                 next = next->m_nextpkt;
 3106         } while (next != NULL);
 3107 
 3108         if (count > nitems(marr))
 3109                 if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) {
 3110                         /* XXX check nextpkt */
 3111                         m_freem(m);
 3112                         /* XXX simplify for now */
 3113                         DBG_COUNTER_INC(tx_frees);
 3114                         return (ENOBUFS);
 3115                 }
 3116         for (next = m, i = 0; next != NULL; i++) {
 3117                 mp[i] = next;
 3118                 next = next->m_nextpkt;
 3119                 mp[i]->m_nextpkt = NULL;
 3120         }
 3121 #endif
 3122         DBG_COUNTER_INC(tx_seen);
 3123         err = ifmp_ring_enqueue(txq->ift_br[0], (void **)&m, 1, TX_BATCH_SIZE);
 3124 
 3125         if (err) {
 3126                 GROUPTASK_ENQUEUE(&txq->ift_task);
 3127                 /* support forthcoming later */
 3128 #ifdef DRIVER_BACKPRESSURE
 3129                 txq->ift_closed = TRUE;
 3130 #endif
 3131                 ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE);
 3132                 m_freem(m);
 3133         } else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) {
 3134                 GROUPTASK_ENQUEUE(&txq->ift_task);
 3135         }
 3136 
 3137         return (err);
 3138 }
 3139 
 3140 static void
 3141 iflib_if_qflush(if_t ifp)
 3142 {
 3143         if_ctx_t ctx = if_getsoftc(ifp);
 3144         iflib_txq_t txq = ctx->ifc_txqs;
 3145         int i;
 3146 
 3147         CTX_LOCK(ctx);
 3148         ctx->ifc_flags |= IFC_QFLUSH;
 3149         CTX_UNLOCK(ctx);
 3150         for (i = 0; i < NTXQSETS(ctx); i++, txq++)
 3151                 while (!(ifmp_ring_is_idle(txq->ift_br[0]) || ifmp_ring_is_stalled(txq->ift_br[0])))
 3152                         iflib_txq_check_drain(txq, 0);
 3153         CTX_LOCK(ctx);
 3154         ctx->ifc_flags &= ~IFC_QFLUSH;
 3155         CTX_UNLOCK(ctx);
 3156 
 3157         if_qflush(ifp);
 3158 }
 3159 
 3160 
 3161 #define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
 3162                      IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING |   \
 3163                      IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO)
 3164 
 3165 #define IFCAP_REINIT IFCAP_FLAGS
 3166 
 3167 static int
 3168 iflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
 3169 {
 3170         if_ctx_t ctx = if_getsoftc(ifp);
 3171         struct ifreq    *ifr = (struct ifreq *)data;
 3172 #if defined(INET) || defined(INET6)
 3173         struct ifaddr   *ifa = (struct ifaddr *)data;
 3174 #endif
 3175         bool            avoid_reset = FALSE;
 3176         int             err = 0, reinit = 0, bits;
 3177 
 3178         switch (command) {
 3179         case SIOCSIFADDR:
 3180 #ifdef INET
 3181                 if (ifa->ifa_addr->sa_family == AF_INET)
 3182                         avoid_reset = TRUE;
 3183 #endif
 3184 #ifdef INET6
 3185                 if (ifa->ifa_addr->sa_family == AF_INET6)
 3186                         avoid_reset = TRUE;
 3187 #endif
 3188                 /*
 3189                 ** Calling init results in link renegotiation,
 3190                 ** so we avoid doing it when possible.
 3191                 */
 3192                 if (avoid_reset) {
 3193                         if_setflagbits(ifp, IFF_UP,0);
 3194                         if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
 3195                                 reinit = 1;
 3196 #ifdef INET
 3197                         if (!(if_getflags(ifp) & IFF_NOARP))
 3198                                 arp_ifinit(ifp, ifa);
 3199 #endif
 3200                 } else
 3201                         err = ether_ioctl(ifp, command, data);
 3202                 break;
 3203         case SIOCSIFMTU:
 3204                 CTX_LOCK(ctx);
 3205                 if (ifr->ifr_mtu == if_getmtu(ifp)) {
 3206                         CTX_UNLOCK(ctx);
 3207                         break;
 3208                 }
 3209                 bits = if_getdrvflags(ifp);
 3210                 /* stop the driver and free any clusters before proceeding */
 3211                 iflib_stop(ctx);
 3212 
 3213                 if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) {
 3214                         if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size)
 3215                                 ctx->ifc_flags |= IFC_MULTISEG;
 3216                         else
 3217                                 ctx->ifc_flags &= ~IFC_MULTISEG;
 3218                         err = if_setmtu(ifp, ifr->ifr_mtu);
 3219                 }
 3220                 iflib_init_locked(ctx);
 3221                 if_setdrvflags(ifp, bits);
 3222                 CTX_UNLOCK(ctx);
 3223                 break;
 3224         case SIOCSIFFLAGS:
 3225                 CTX_LOCK(ctx);
 3226                 if (if_getflags(ifp) & IFF_UP) {
 3227                         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 3228                                 if ((if_getflags(ifp) ^ ctx->ifc_if_flags) &
 3229                                     (IFF_PROMISC | IFF_ALLMULTI)) {
 3230                                         err = IFDI_PROMISC_SET(ctx, if_getflags(ifp));
 3231                                 }
 3232                         } else
 3233                                 reinit = 1;
 3234                 } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 3235                         iflib_stop(ctx);
 3236                 }
 3237                 ctx->ifc_if_flags = if_getflags(ifp);
 3238                 CTX_UNLOCK(ctx);
 3239                 break;
 3240 
 3241                 break;
 3242         case SIOCADDMULTI:
 3243         case SIOCDELMULTI:
 3244                 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 3245                         CTX_LOCK(ctx);
 3246                         IFDI_INTR_DISABLE(ctx);
 3247                         IFDI_MULTI_SET(ctx);
 3248                         IFDI_INTR_ENABLE(ctx);
 3249                         CTX_UNLOCK(ctx);
 3250                 }
 3251                 break;
 3252         case SIOCSIFMEDIA:
 3253                 CTX_LOCK(ctx);
 3254                 IFDI_MEDIA_SET(ctx);
 3255                 CTX_UNLOCK(ctx);
 3256                 /* falls thru */
 3257         case SIOCGIFMEDIA:
 3258                 err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command);
 3259                 break;
 3260         case SIOCGI2C:
 3261         {
 3262                 struct ifi2creq i2c;
 3263 
 3264                 err = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
 3265                 if (err != 0)
 3266                         break;
 3267                 if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
 3268                         err = EINVAL;
 3269                         break;
 3270                 }
 3271                 if (i2c.len > sizeof(i2c.data)) {
 3272                         err = EINVAL;
 3273                         break;
 3274                 }
 3275 
 3276                 if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0)
 3277                         err = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
 3278                 break;
 3279         }
 3280         case SIOCSIFCAP:
 3281         {
 3282                 int mask, setmask;
 3283 
 3284                 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
 3285                 setmask = 0;
 3286 #ifdef TCP_OFFLOAD
 3287                 setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6);
 3288 #endif
 3289                 setmask |= (mask & IFCAP_FLAGS);
 3290 
 3291                 if ((mask & IFCAP_WOL) &&
 3292                     (if_getcapabilities(ifp) & IFCAP_WOL) != 0)
 3293                         setmask |= (mask & (IFCAP_WOL_MCAST|IFCAP_WOL_MAGIC));
 3294                 if_vlancap(ifp);
 3295                 /*
 3296                  * want to ensure that traffic has stopped before we change any of the flags
 3297                  */
 3298                 if (setmask) {
 3299                         CTX_LOCK(ctx);
 3300                         bits = if_getdrvflags(ifp);
 3301                         if (setmask & IFCAP_REINIT)
 3302                                 iflib_stop(ctx);
 3303                         if_togglecapenable(ifp, setmask);
 3304                         if (setmask & IFCAP_REINIT)
 3305                                 iflib_init_locked(ctx);
 3306                         if_setdrvflags(ifp, bits);
 3307                         CTX_UNLOCK(ctx);
 3308                 }
 3309                 break;
 3310             }
 3311         case SIOCGPRIVATE_0:
 3312         case SIOCSDRVSPEC:
 3313         case SIOCGDRVSPEC:
 3314                 CTX_LOCK(ctx);
 3315                 err = IFDI_PRIV_IOCTL(ctx, command, data);
 3316                 CTX_UNLOCK(ctx);
 3317                 break;
 3318         default:
 3319                 err = ether_ioctl(ifp, command, data);
 3320                 break;
 3321         }
 3322         if (reinit)
 3323                 iflib_if_init(ctx);
 3324         return (err);
 3325 }
 3326 
 3327 static uint64_t
 3328 iflib_if_get_counter(if_t ifp, ift_counter cnt)
 3329 {
 3330         if_ctx_t ctx = if_getsoftc(ifp);
 3331 
 3332         return (IFDI_GET_COUNTER(ctx, cnt));
 3333 }
 3334 
 3335 /*********************************************************************
 3336  *
 3337  *  OTHER FUNCTIONS EXPORTED TO THE STACK
 3338  *
 3339  **********************************************************************/
 3340 
 3341 static void
 3342 iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag)
 3343 {
 3344         if_ctx_t ctx = if_getsoftc(ifp);
 3345 
 3346         if ((void *)ctx != arg)
 3347                 return;
 3348 
 3349         if ((vtag == 0) || (vtag > 4095))
 3350                 return;
 3351 
 3352         CTX_LOCK(ctx);
 3353         IFDI_VLAN_REGISTER(ctx, vtag);
 3354         /* Re-init to load the changes */
 3355         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 3356                 iflib_init_locked(ctx);
 3357         CTX_UNLOCK(ctx);
 3358 }
 3359 
 3360 static void
 3361 iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag)
 3362 {
 3363         if_ctx_t ctx = if_getsoftc(ifp);
 3364 
 3365         if ((void *)ctx != arg)
 3366                 return;
 3367 
 3368         if ((vtag == 0) || (vtag > 4095))
 3369                 return;
 3370 
 3371         CTX_LOCK(ctx);
 3372         IFDI_VLAN_UNREGISTER(ctx, vtag);
 3373         /* Re-init to load the changes */
 3374         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 3375                 iflib_init_locked(ctx);
 3376         CTX_UNLOCK(ctx);
 3377 }
 3378 
 3379 static void
 3380 iflib_led_func(void *arg, int onoff)
 3381 {
 3382         if_ctx_t ctx = arg;
 3383 
 3384         CTX_LOCK(ctx);
 3385         IFDI_LED_FUNC(ctx, onoff);
 3386         CTX_UNLOCK(ctx);
 3387 }
 3388 
 3389 /*********************************************************************
 3390  *
 3391  *  BUS FUNCTION DEFINITIONS
 3392  *
 3393  **********************************************************************/
 3394 
 3395 int
 3396 iflib_device_probe(device_t dev)
 3397 {
 3398         pci_vendor_info_t *ent;
 3399 
 3400         uint16_t        pci_vendor_id, pci_device_id;
 3401         uint16_t        pci_subvendor_id, pci_subdevice_id;
 3402         uint16_t        pci_rev_id;
 3403         if_shared_ctx_t sctx;
 3404 
 3405         if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC)
 3406                 return (ENOTSUP);
 3407 
 3408         pci_vendor_id = pci_get_vendor(dev);
 3409         pci_device_id = pci_get_device(dev);
 3410         pci_subvendor_id = pci_get_subvendor(dev);
 3411         pci_subdevice_id = pci_get_subdevice(dev);
 3412         pci_rev_id = pci_get_revid(dev);
 3413         if (sctx->isc_parse_devinfo != NULL)
 3414                 sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id);
 3415 
 3416         ent = sctx->isc_vendor_info;
 3417         while (ent->pvi_vendor_id != 0) {
 3418                 if (pci_vendor_id != ent->pvi_vendor_id) {
 3419                         ent++;
 3420                         continue;
 3421                 }
 3422                 if ((pci_device_id == ent->pvi_device_id) &&
 3423                     ((pci_subvendor_id == ent->pvi_subvendor_id) ||
 3424                      (ent->pvi_subvendor_id == 0)) &&
 3425                     ((pci_subdevice_id == ent->pvi_subdevice_id) ||
 3426                      (ent->pvi_subdevice_id == 0)) &&
 3427                     ((pci_rev_id == ent->pvi_rev_id) ||
 3428                      (ent->pvi_rev_id == 0))) {
 3429 
 3430                         device_set_desc_copy(dev, ent->pvi_name);
 3431                         /* this needs to be changed to zero if the bus probing code
 3432                          * ever stops re-probing on best match because the sctx
 3433                          * may have its values over written by register calls
 3434                          * in subsequent probes
 3435                          */
 3436                         return (BUS_PROBE_DEFAULT);
 3437                 }
 3438                 ent++;
 3439         }
 3440         return (ENXIO);
 3441 }
 3442 
 3443 int
 3444 iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp)
 3445 {
 3446         int err, rid, msix, msix_bar;
 3447         if_ctx_t ctx;
 3448         if_t ifp;
 3449         if_softc_ctx_t scctx;
 3450         int i;
 3451         uint16_t main_txq;
 3452         uint16_t main_rxq;
 3453 
 3454 
 3455         ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO);
 3456 
 3457         if (sc == NULL) {
 3458                 sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO);
 3459                 device_set_softc(dev, ctx);
 3460                 ctx->ifc_flags |= IFC_SC_ALLOCATED;
 3461         }
 3462 
 3463         ctx->ifc_sctx = sctx;
 3464         ctx->ifc_dev = dev;
 3465         ctx->ifc_txrx = *sctx->isc_txrx;
 3466         ctx->ifc_softc = sc;
 3467 
 3468         if ((err = iflib_register(ctx)) != 0) {
 3469                 device_printf(dev, "iflib_register failed %d\n", err);
 3470                 return (err);
 3471         }
 3472         iflib_add_device_sysctl_pre(ctx);
 3473 
 3474         scctx = &ctx->ifc_softc_ctx;
 3475         /*
 3476          * XXX sanity check that ntxd & nrxd are a power of 2
 3477          */
 3478         if (ctx->ifc_sysctl_ntxqs != 0)
 3479                 scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs;
 3480         if (ctx->ifc_sysctl_nrxqs != 0)
 3481                 scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs;
 3482 
 3483         for (i = 0; i < sctx->isc_ntxqs; i++) {
 3484                 if (ctx->ifc_sysctl_ntxds[i] != 0)
 3485                         scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i];
 3486                 else
 3487                         scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i];
 3488         }
 3489 
 3490         for (i = 0; i < sctx->isc_nrxqs; i++) {
 3491                 if (ctx->ifc_sysctl_nrxds[i] != 0)
 3492                         scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i];
 3493                 else
 3494                         scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i];
 3495         }
 3496 
 3497         for (i = 0; i < sctx->isc_nrxqs; i++) {
 3498                 if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) {
 3499                         device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n",
 3500                                       i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]);
 3501                         scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i];
 3502                 }
 3503                 if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) {
 3504                         device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n",
 3505                                       i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]);
 3506                         scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i];
 3507                 }
 3508         }
 3509 
 3510         for (i = 0; i < sctx->isc_ntxqs; i++) {
 3511                 if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) {
 3512                         device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n",
 3513                                       i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]);
 3514                         scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i];
 3515                 }
 3516                 if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) {
 3517                         device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n",
 3518                                       i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]);
 3519                         scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i];
 3520                 }
 3521         }
 3522 
 3523         if ((err = IFDI_ATTACH_PRE(ctx)) != 0) {
 3524                 device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err);
 3525                 return (err);
 3526         }
 3527         if (scctx->isc_ntxqsets_max)
 3528                 scctx->isc_ntxqsets = min(scctx->isc_ntxqsets, scctx->isc_ntxqsets_max);
 3529         if (scctx->isc_nrxqsets_max)
 3530                 scctx->isc_nrxqsets = min(scctx->isc_nrxqsets, scctx->isc_nrxqsets_max);
 3531 
 3532 #ifdef ACPI_DMAR
 3533         if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL)
 3534                 ctx->ifc_flags |= IFC_DMAR;
 3535 #endif
 3536 
 3537         msix_bar = scctx->isc_msix_bar;
 3538 
 3539         ifp = ctx->ifc_ifp;
 3540 
 3541         if(sctx->isc_flags & IFLIB_HAS_TXCQ)
 3542                 main_txq = 1;
 3543         else
 3544                 main_txq = 0;
 3545 
 3546         if(sctx->isc_flags & IFLIB_HAS_RXCQ)
 3547                 main_rxq = 1;
 3548         else
 3549                 main_rxq = 0;
 3550 
 3551         /* XXX change for per-queue sizes */
 3552         device_printf(dev, "using %d tx descriptors and %d rx descriptors\n",
 3553                       scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]);
 3554         for (i = 0; i < sctx->isc_nrxqs; i++) {
 3555                 if (!powerof2(scctx->isc_nrxd[i])) {
 3556                         /* round down instead? */
 3557                         device_printf(dev, "# rx descriptors must be a power of 2\n");
 3558                         err = EINVAL;
 3559                         goto fail;
 3560                 }
 3561         }
 3562         for (i = 0; i < sctx->isc_ntxqs; i++) {
 3563                 if (!powerof2(scctx->isc_ntxd[i])) {
 3564                         device_printf(dev,
 3565                             "# tx descriptors must be a power of 2");
 3566                         err = EINVAL;
 3567                         goto fail;
 3568                 }
 3569         }
 3570 
 3571         if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] /
 3572             MAX_SINGLE_PACKET_FRACTION)
 3573                 scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] /
 3574                     MAX_SINGLE_PACKET_FRACTION);
 3575         if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] /
 3576             MAX_SINGLE_PACKET_FRACTION)
 3577                 scctx->isc_tx_tso_segments_max = max(1,
 3578                     scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION);
 3579 
 3580         /*
 3581          * Protect the stack against modern hardware
 3582          */
 3583         if (scctx->isc_tx_tso_size_max > FREEBSD_TSO_SIZE_MAX)
 3584                 scctx->isc_tx_tso_size_max = FREEBSD_TSO_SIZE_MAX;
 3585 
 3586         /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */
 3587         ifp->if_hw_tsomaxsegcount = scctx->isc_tx_tso_segments_max;
 3588         ifp->if_hw_tsomax = scctx->isc_tx_tso_size_max;
 3589         ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max;
 3590         if (scctx->isc_rss_table_size == 0)
 3591                 scctx->isc_rss_table_size = 64;
 3592         scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;
 3593         /*
 3594         ** Now setup MSI or MSI/X, should
 3595         ** return us the number of supported
 3596         ** vectors. (Will be 1 for MSI)
 3597         */
 3598         if (sctx->isc_flags & IFLIB_SKIP_MSIX) {
 3599                 msix = scctx->isc_vectors;
 3600         } else if (scctx->isc_msix_bar != 0)
 3601                 msix = iflib_msix_init(ctx);
 3602         else {
 3603                 scctx->isc_vectors = 1;
 3604                 scctx->isc_ntxqsets = 1;
 3605                 scctx->isc_nrxqsets = 1;
 3606                 scctx->isc_intr = IFLIB_INTR_LEGACY;
 3607                 msix = 0;
 3608         }
 3609         /* Get memory for the station queues */
 3610         if ((err = iflib_queues_alloc(ctx))) {
 3611                 device_printf(dev, "Unable to allocate queue memory\n");
 3612                 goto fail;
 3613         }
 3614 
 3615         if ((err = iflib_qset_structures_setup(ctx))) {
 3616                 device_printf(dev, "qset structure setup failed %d\n", err);
 3617                 goto fail_queues;
 3618         }
 3619 
 3620         if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) {
 3621                 device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err);
 3622                 goto fail_intr_free;
 3623         }
 3624         if (msix <= 1) {
 3625                 rid = 0;
 3626                 if (scctx->isc_intr == IFLIB_INTR_MSI) {
 3627                         MPASS(msix == 1);
 3628                         rid = 1;
 3629                 }
 3630                 if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) {
 3631                         device_printf(dev, "iflib_legacy_setup failed %d\n", err);
 3632                         goto fail_intr_free;
 3633                 }
 3634         }
 3635         ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac);
 3636         if ((err = IFDI_ATTACH_POST(ctx)) != 0) {
 3637                 device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err);
 3638                 goto fail_detach;
 3639         }
 3640         if ((err = iflib_netmap_attach(ctx))) {
 3641                 device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err);
 3642                 goto fail_detach;
 3643         }
 3644         *ctxp = ctx;
 3645 
 3646         if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
 3647         iflib_add_device_sysctl_post(ctx);
 3648         return (0);
 3649 fail_detach:
 3650         ether_ifdetach(ctx->ifc_ifp);
 3651 fail_intr_free:
 3652         if (scctx->isc_intr == IFLIB_INTR_MSIX || scctx->isc_intr == IFLIB_INTR_MSI)
 3653                 pci_release_msi(ctx->ifc_dev);
 3654 fail_queues:
 3655         /* XXX free queues */
 3656 fail:
 3657         IFDI_DETACH(ctx);
 3658         return (err);
 3659 }
 3660 
 3661 int
 3662 iflib_device_attach(device_t dev)
 3663 {
 3664         if_ctx_t ctx;
 3665         if_shared_ctx_t sctx;
 3666 
 3667         if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC)
 3668                 return (ENOTSUP);
 3669 
 3670         pci_enable_busmaster(dev);
 3671 
 3672         return (iflib_device_register(dev, NULL, sctx, &ctx));
 3673 }
 3674 
 3675 int
 3676 iflib_device_deregister(if_ctx_t ctx)
 3677 {
 3678         if_t ifp = ctx->ifc_ifp;
 3679         iflib_txq_t txq;
 3680         iflib_rxq_t rxq;
 3681         device_t dev = ctx->ifc_dev;
 3682         int i;
 3683         struct taskqgroup *tqg;
 3684 
 3685         /* Make sure VLANS are not using driver */
 3686         if (if_vlantrunkinuse(ifp)) {
 3687                 device_printf(dev,"Vlan in use, detach first\n");
 3688                 return (EBUSY);
 3689         }
 3690 
 3691         CTX_LOCK(ctx);
 3692         ctx->ifc_in_detach = 1;
 3693         iflib_stop(ctx);
 3694         CTX_UNLOCK(ctx);
 3695 
 3696         /* Unregister VLAN events */
 3697         if (ctx->ifc_vlan_attach_event != NULL)
 3698                 EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event);
 3699         if (ctx->ifc_vlan_detach_event != NULL)
 3700                 EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event);
 3701 
 3702         iflib_netmap_detach(ifp);
 3703         ether_ifdetach(ifp);
 3704         /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/
 3705         CTX_LOCK_DESTROY(ctx);
 3706         if (ctx->ifc_led_dev != NULL)
 3707                 led_destroy(ctx->ifc_led_dev);
 3708         /* XXX drain any dependent tasks */
 3709         tqg = qgroup_if_io_tqg;
 3710         for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) {
 3711                 callout_drain(&txq->ift_timer);
 3712                 callout_drain(&txq->ift_db_check);
 3713                 if (txq->ift_task.gt_uniq != NULL)
 3714                         taskqgroup_detach(tqg, &txq->ift_task);
 3715         }
 3716         for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) {
 3717                 if (rxq->ifr_task.gt_uniq != NULL)
 3718                         taskqgroup_detach(tqg, &rxq->ifr_task);
 3719         }
 3720         tqg = qgroup_if_config_tqg;
 3721         if (ctx->ifc_admin_task.gt_uniq != NULL)
 3722                 taskqgroup_detach(tqg, &ctx->ifc_admin_task);
 3723         if (ctx->ifc_vflr_task.gt_uniq != NULL)
 3724                 taskqgroup_detach(tqg, &ctx->ifc_vflr_task);
 3725 
 3726         IFDI_DETACH(ctx);
 3727         device_set_softc(ctx->ifc_dev, NULL);
 3728         if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) {
 3729                 pci_release_msi(dev);
 3730         }
 3731         if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) {
 3732                 iflib_irq_free(ctx, &ctx->ifc_legacy_irq);
 3733         }
 3734         if (ctx->ifc_msix_mem != NULL) {
 3735                 bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY,
 3736                         ctx->ifc_softc_ctx.isc_msix_bar, ctx->ifc_msix_mem);
 3737                 ctx->ifc_msix_mem = NULL;
 3738         }
 3739 
 3740         bus_generic_detach(dev);
 3741         if_free(ifp);
 3742 
 3743         iflib_tx_structures_free(ctx);
 3744         iflib_rx_structures_free(ctx);
 3745         if (ctx->ifc_flags & IFC_SC_ALLOCATED)
 3746                 free(ctx->ifc_softc, M_IFLIB);
 3747         free(ctx, M_IFLIB);
 3748         return (0);
 3749 }
 3750 
 3751 
 3752 int
 3753 iflib_device_detach(device_t dev)
 3754 {
 3755         if_ctx_t ctx = device_get_softc(dev);
 3756 
 3757         return (iflib_device_deregister(ctx));
 3758 }
 3759 
 3760 int
 3761 iflib_device_suspend(device_t dev)
 3762 {
 3763         if_ctx_t ctx = device_get_softc(dev);
 3764 
 3765         CTX_LOCK(ctx);
 3766         IFDI_SUSPEND(ctx);
 3767         CTX_UNLOCK(ctx);
 3768 
 3769         return bus_generic_suspend(dev);
 3770 }
 3771 int
 3772 iflib_device_shutdown(device_t dev)
 3773 {
 3774         if_ctx_t ctx = device_get_softc(dev);
 3775 
 3776         CTX_LOCK(ctx);
 3777         IFDI_SHUTDOWN(ctx);
 3778         CTX_UNLOCK(ctx);
 3779 
 3780         return bus_generic_suspend(dev);
 3781 }
 3782 
 3783 
 3784 int
 3785 iflib_device_resume(device_t dev)
 3786 {
 3787         if_ctx_t ctx = device_get_softc(dev);
 3788         iflib_txq_t txq = ctx->ifc_txqs;
 3789 
 3790         CTX_LOCK(ctx);
 3791         IFDI_RESUME(ctx);
 3792         iflib_init_locked(ctx);
 3793         CTX_UNLOCK(ctx);
 3794         for (int i = 0; i < NTXQSETS(ctx); i++, txq++)
 3795                 iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET);
 3796 
 3797         return (bus_generic_resume(dev));
 3798 }
 3799 
 3800 int
 3801 iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params)
 3802 {
 3803         int error;
 3804         if_ctx_t ctx = device_get_softc(dev);
 3805 
 3806         CTX_LOCK(ctx);
 3807         error = IFDI_IOV_INIT(ctx, num_vfs, params);
 3808         CTX_UNLOCK(ctx);
 3809 
 3810         return (error);
 3811 }
 3812 
 3813 void
 3814 iflib_device_iov_uninit(device_t dev)
 3815 {
 3816         if_ctx_t ctx = device_get_softc(dev);
 3817 
 3818         CTX_LOCK(ctx);
 3819         IFDI_IOV_UNINIT(ctx);
 3820         CTX_UNLOCK(ctx);
 3821 }
 3822 
 3823 int
 3824 iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params)
 3825 {
 3826         int error;
 3827         if_ctx_t ctx = device_get_softc(dev);
 3828 
 3829         CTX_LOCK(ctx);
 3830         error = IFDI_IOV_VF_ADD(ctx, vfnum, params);
 3831         CTX_UNLOCK(ctx);
 3832 
 3833         return (error);
 3834 }
 3835 
 3836 /*********************************************************************
 3837  *
 3838  *  MODULE FUNCTION DEFINITIONS
 3839  *
 3840  **********************************************************************/
 3841 
 3842 /*
 3843  * - Start a fast taskqueue thread for each core
 3844  * - Start a taskqueue for control operations
 3845  */
 3846 static int
 3847 iflib_module_init(void)
 3848 {
 3849         return (0);
 3850 }
 3851 
 3852 static int
 3853 iflib_module_event_handler(module_t mod, int what, void *arg)
 3854 {
 3855         int err;
 3856 
 3857         switch (what) {
 3858         case MOD_LOAD:
 3859                 if ((err = iflib_module_init()) != 0)
 3860                         return (err);
 3861                 break;
 3862         case MOD_UNLOAD:
 3863                 return (EBUSY);
 3864         default:
 3865                 return (EOPNOTSUPP);
 3866         }
 3867 
 3868         return (0);
 3869 }
 3870 
 3871 /*********************************************************************
 3872  *
 3873  *  PUBLIC FUNCTION DEFINITIONS
 3874  *     ordered as in iflib.h
 3875  *
 3876  **********************************************************************/
 3877 
 3878 
 3879 static void
 3880 _iflib_assert(if_shared_ctx_t sctx)
 3881 {
 3882         MPASS(sctx->isc_tx_maxsize);
 3883         MPASS(sctx->isc_tx_maxsegsize);
 3884 
 3885         MPASS(sctx->isc_rx_maxsize);
 3886         MPASS(sctx->isc_rx_nsegments);
 3887         MPASS(sctx->isc_rx_maxsegsize);
 3888 
 3889 
 3890         MPASS(sctx->isc_txrx->ift_txd_encap);
 3891         MPASS(sctx->isc_txrx->ift_txd_flush);
 3892         MPASS(sctx->isc_txrx->ift_txd_credits_update);
 3893         MPASS(sctx->isc_txrx->ift_rxd_available);
 3894         MPASS(sctx->isc_txrx->ift_rxd_pkt_get);
 3895         MPASS(sctx->isc_txrx->ift_rxd_refill);
 3896         MPASS(sctx->isc_txrx->ift_rxd_flush);
 3897 
 3898         MPASS(sctx->isc_nrxd_min[0]);
 3899         MPASS(sctx->isc_nrxd_max[0]);
 3900         MPASS(sctx->isc_nrxd_default[0]);
 3901         MPASS(sctx->isc_ntxd_min[0]);
 3902         MPASS(sctx->isc_ntxd_max[0]);
 3903         MPASS(sctx->isc_ntxd_default[0]);
 3904 }
 3905 
 3906 static int
 3907 iflib_register(if_ctx_t ctx)
 3908 {
 3909         if_shared_ctx_t sctx = ctx->ifc_sctx;
 3910         driver_t *driver = sctx->isc_driver;
 3911         device_t dev = ctx->ifc_dev;
 3912         if_t ifp;
 3913 
 3914         _iflib_assert(sctx);
 3915 
 3916         CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev));
 3917 
 3918         ifp = ctx->ifc_ifp = if_gethandle(IFT_ETHER);
 3919         if (ifp == NULL) {
 3920                 device_printf(dev, "can not allocate ifnet structure\n");
 3921                 return (ENOMEM);
 3922         }
 3923 
 3924         /*
 3925          * Initialize our context's device specific methods
 3926          */
 3927         kobj_init((kobj_t) ctx, (kobj_class_t) driver);
 3928         kobj_class_compile((kobj_class_t) driver);
 3929         driver->refs++;
 3930 
 3931         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 3932         if_setsoftc(ifp, ctx);
 3933         if_setdev(ifp, dev);
 3934         if_setinitfn(ifp, iflib_if_init);
 3935         if_setioctlfn(ifp, iflib_if_ioctl);
 3936         if_settransmitfn(ifp, iflib_if_transmit);
 3937         if_setqflushfn(ifp, iflib_if_qflush);
 3938         if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
 3939 
 3940         if_setcapabilities(ifp, 0);
 3941         if_setcapenable(ifp, 0);
 3942 
 3943         ctx->ifc_vlan_attach_event =
 3944                 EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx,
 3945                                                           EVENTHANDLER_PRI_FIRST);
 3946         ctx->ifc_vlan_detach_event =
 3947                 EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx,
 3948                                                           EVENTHANDLER_PRI_FIRST);
 3949 
 3950         ifmedia_init(&ctx->ifc_media, IFM_IMASK,
 3951                                          iflib_media_change, iflib_media_status);
 3952 
 3953         return (0);
 3954 }
 3955 
 3956 
 3957 static int
 3958 iflib_queues_alloc(if_ctx_t ctx)
 3959 {
 3960         if_shared_ctx_t sctx = ctx->ifc_sctx;
 3961         if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 3962         device_t dev = ctx->ifc_dev;
 3963         int nrxqsets = scctx->isc_nrxqsets;
 3964         int ntxqsets = scctx->isc_ntxqsets;
 3965         iflib_txq_t txq;
 3966         iflib_rxq_t rxq;
 3967         iflib_fl_t fl = NULL;
 3968         int i, j, cpu, err, txconf, rxconf;
 3969         iflib_dma_info_t ifdip;
 3970         uint32_t *rxqsizes = scctx->isc_rxqsizes;
 3971         uint32_t *txqsizes = scctx->isc_txqsizes;
 3972         uint8_t nrxqs = sctx->isc_nrxqs;
 3973         uint8_t ntxqs = sctx->isc_ntxqs;
 3974         int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1;
 3975         caddr_t *vaddrs;
 3976         uint64_t *paddrs;
 3977         struct ifmp_ring **brscp;
 3978         int nbuf_rings = 1; /* XXX determine dynamically */
 3979 
 3980         KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1"));
 3981         KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1"));
 3982 
 3983         brscp = NULL;
 3984         txq = NULL;
 3985         rxq = NULL;
 3986 
 3987 /* Allocate the TX ring struct memory */
 3988         if (!(txq =
 3989             (iflib_txq_t) malloc(sizeof(struct iflib_txq) *
 3990             ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
 3991                 device_printf(dev, "Unable to allocate TX ring memory\n");
 3992                 err = ENOMEM;
 3993                 goto fail;
 3994         }
 3995 
 3996         /* Now allocate the RX */
 3997         if (!(rxq =
 3998             (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) *
 3999             nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
 4000                 device_printf(dev, "Unable to allocate RX ring memory\n");
 4001                 err = ENOMEM;
 4002                 goto rx_fail;
 4003         }
 4004         if (!(brscp = malloc(sizeof(void *) * nbuf_rings * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
 4005                 device_printf(dev, "Unable to buf_ring_sc * memory\n");
 4006                 err = ENOMEM;
 4007                 goto rx_fail;
 4008         }
 4009 
 4010         ctx->ifc_txqs = txq;
 4011         ctx->ifc_rxqs = rxq;
 4012 
 4013         /*
 4014          * XXX handle allocation failure
 4015          */
 4016         for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) {
 4017                 /* Set up some basics */
 4018 
 4019                 if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) {
 4020                         device_printf(dev, "failed to allocate iflib_dma_info\n");
 4021                         err = ENOMEM;
 4022                         goto err_tx_desc;
 4023                 }
 4024                 txq->ift_ifdi = ifdip;
 4025                 for (j = 0; j < ntxqs; j++, ifdip++) {
 4026                         if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, BUS_DMA_NOWAIT)) {
 4027                                 device_printf(dev, "Unable to allocate Descriptor memory\n");
 4028                                 err = ENOMEM;
 4029                                 goto err_tx_desc;
 4030                         }
 4031                         bzero((void *)ifdip->idi_vaddr, txqsizes[j]);
 4032                 }
 4033                 txq->ift_ctx = ctx;
 4034                 txq->ift_id = i;
 4035                 if (sctx->isc_flags & IFLIB_HAS_TXCQ) {
 4036                         txq->ift_br_offset = 1;
 4037                 } else {
 4038                         txq->ift_br_offset = 0;
 4039                 }
 4040                 /* XXX fix this */
 4041                 txq->ift_timer.c_cpu = cpu;
 4042                 txq->ift_db_check.c_cpu = cpu;
 4043                 txq->ift_nbr = nbuf_rings;
 4044 
 4045                 if (iflib_txsd_alloc(txq)) {
 4046                         device_printf(dev, "Critical Failure setting up TX buffers\n");
 4047                         err = ENOMEM;
 4048                         goto err_tx_desc;
 4049                 }
 4050 
 4051                 /* Initialize the TX lock */
 4052                 snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:tx(%d):callout",
 4053                     device_get_nameunit(dev), txq->ift_id);
 4054                 mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF);
 4055                 callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0);
 4056                 callout_init_mtx(&txq->ift_db_check, &txq->ift_mtx, 0);
 4057 
 4058                 snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db",
 4059                          device_get_nameunit(dev), txq->ift_id);
 4060                 TXDB_LOCK_INIT(txq);
 4061 
 4062                 txq->ift_br = brscp + i*nbuf_rings;
 4063                 for (j = 0; j < nbuf_rings; j++) {
 4064                         err = ifmp_ring_alloc(&txq->ift_br[j], 2048, txq, iflib_txq_drain,
 4065                                               iflib_txq_can_drain, M_IFLIB, M_WAITOK);
 4066                         if (err) {
 4067                                 /* XXX free any allocated rings */
 4068                                 device_printf(dev, "Unable to allocate buf_ring\n");
 4069                                 goto err_tx_desc;
 4070                         }
 4071                 }
 4072         }
 4073 
 4074         for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) {
 4075                 /* Set up some basics */
 4076 
 4077                 if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) {
 4078                         device_printf(dev, "failed to allocate iflib_dma_info\n");
 4079                         err = ENOMEM;
 4080                         goto err_tx_desc;
 4081                 }
 4082 
 4083                 rxq->ifr_ifdi = ifdip;
 4084                 for (j = 0; j < nrxqs; j++, ifdip++) {
 4085                         if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, BUS_DMA_NOWAIT)) {
 4086                                 device_printf(dev, "Unable to allocate Descriptor memory\n");
 4087                                 err = ENOMEM;
 4088                                 goto err_tx_desc;
 4089                         }
 4090                         bzero((void *)ifdip->idi_vaddr, rxqsizes[j]);
 4091                 }
 4092                 rxq->ifr_ctx = ctx;
 4093                 rxq->ifr_id = i;
 4094                 if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
 4095                         rxq->ifr_fl_offset = 1;
 4096                 } else {
 4097                         rxq->ifr_fl_offset = 0;
 4098                 }
 4099                 rxq->ifr_nfl = nfree_lists;
 4100                 if (!(fl =
 4101                           (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) {
 4102                         device_printf(dev, "Unable to allocate free list memory\n");
 4103                         err = ENOMEM;
 4104                         goto err_tx_desc;
 4105                 }
 4106                 rxq->ifr_fl = fl;
 4107                 for (j = 0; j < nfree_lists; j++) {
 4108                         rxq->ifr_fl[j].ifl_rxq = rxq;
 4109                         rxq->ifr_fl[j].ifl_id = j;
 4110                         rxq->ifr_fl[j].ifl_ifdi =
 4111                             &rxq->ifr_ifdi[j + rxq->ifr_fl_offset];
 4112                 }
 4113         /* Allocate receive buffers for the ring*/
 4114                 if (iflib_rxsd_alloc(rxq)) {
 4115                         device_printf(dev,
 4116                             "Critical Failure setting up receive buffers\n");
 4117                         err = ENOMEM;
 4118                         goto err_rx_desc;
 4119                 }
 4120         }
 4121 
 4122         /* TXQs */
 4123         vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK);
 4124         paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK);
 4125         for (i = 0; i < ntxqsets; i++) {
 4126                 iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi;
 4127 
 4128                 for (j = 0; j < ntxqs; j++, di++) {
 4129                         vaddrs[i*ntxqs + j] = di->idi_vaddr;
 4130                         paddrs[i*ntxqs + j] = di->idi_paddr;
 4131                 }
 4132         }
 4133         if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) {
 4134                 device_printf(ctx->ifc_dev, "device queue allocation failed\n");
 4135                 iflib_tx_structures_free(ctx);
 4136                 free(vaddrs, M_IFLIB);
 4137                 free(paddrs, M_IFLIB);
 4138                 goto err_rx_desc;
 4139         }
 4140         free(vaddrs, M_IFLIB);
 4141         free(paddrs, M_IFLIB);
 4142 
 4143         /* RXQs */
 4144         vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK);
 4145         paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK);
 4146         for (i = 0; i < nrxqsets; i++) {
 4147                 iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi;
 4148 
 4149                 for (j = 0; j < nrxqs; j++, di++) {
 4150                         vaddrs[i*nrxqs + j] = di->idi_vaddr;
 4151                         paddrs[i*nrxqs + j] = di->idi_paddr;
 4152                 }
 4153         }
 4154         if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) {
 4155                 device_printf(ctx->ifc_dev, "device queue allocation failed\n");
 4156                 iflib_tx_structures_free(ctx);
 4157                 free(vaddrs, M_IFLIB);
 4158                 free(paddrs, M_IFLIB);
 4159                 goto err_rx_desc;
 4160         }
 4161         free(vaddrs, M_IFLIB);
 4162         free(paddrs, M_IFLIB);
 4163 
 4164         return (0);
 4165 
 4166 /* XXX handle allocation failure changes */
 4167 err_rx_desc:
 4168 err_tx_desc:
 4169         if (ctx->ifc_rxqs != NULL)
 4170                 free(ctx->ifc_rxqs, M_IFLIB);
 4171         ctx->ifc_rxqs = NULL;
 4172         if (ctx->ifc_txqs != NULL)
 4173                 free(ctx->ifc_txqs, M_IFLIB);
 4174         ctx->ifc_txqs = NULL;
 4175 rx_fail:
 4176         if (brscp != NULL)
 4177                 free(brscp, M_IFLIB);
 4178         if (rxq != NULL)
 4179                 free(rxq, M_IFLIB);
 4180         if (txq != NULL)
 4181                 free(txq, M_IFLIB);
 4182 fail:
 4183         return (err);
 4184 }
 4185 
 4186 static int
 4187 iflib_tx_structures_setup(if_ctx_t ctx)
 4188 {
 4189         iflib_txq_t txq = ctx->ifc_txqs;
 4190         int i;
 4191 
 4192         for (i = 0; i < NTXQSETS(ctx); i++, txq++)
 4193                 iflib_txq_setup(txq);
 4194 
 4195         return (0);
 4196 }
 4197 
 4198 static void
 4199 iflib_tx_structures_free(if_ctx_t ctx)
 4200 {
 4201         iflib_txq_t txq = ctx->ifc_txqs;
 4202         int i, j;
 4203 
 4204         for (i = 0; i < NTXQSETS(ctx); i++, txq++) {
 4205                 iflib_txq_destroy(txq);
 4206                 for (j = 0; j < ctx->ifc_nhwtxqs; j++)
 4207                         iflib_dma_free(&txq->ift_ifdi[j]);
 4208         }
 4209         free(ctx->ifc_txqs, M_IFLIB);
 4210         ctx->ifc_txqs = NULL;
 4211         IFDI_QUEUES_FREE(ctx);
 4212 }
 4213 
 4214 /*********************************************************************
 4215  *
 4216  *  Initialize all receive rings.
 4217  *
 4218  **********************************************************************/
 4219 static int
 4220 iflib_rx_structures_setup(if_ctx_t ctx)
 4221 {
 4222         iflib_rxq_t rxq = ctx->ifc_rxqs;
 4223         int q;
 4224 #if defined(INET6) || defined(INET)
 4225         int i, err;
 4226 #endif
 4227 
 4228         for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) {
 4229 #if defined(INET6) || defined(INET)
 4230                 tcp_lro_free(&rxq->ifr_lc);
 4231                 if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp,
 4232                     TCP_LRO_ENTRIES, min(1024,
 4233                     ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) {
 4234                         device_printf(ctx->ifc_dev, "LRO Initialization failed!\n");
 4235                         goto fail;
 4236                 }
 4237                 rxq->ifr_lro_enabled = TRUE;
 4238 #endif
 4239                 IFDI_RXQ_SETUP(ctx, rxq->ifr_id);
 4240         }
 4241         return (0);
 4242 #if defined(INET6) || defined(INET)
 4243 fail:
 4244         /*
 4245          * Free RX software descriptors allocated so far, we will only handle
 4246          * the rings that completed, the failing case will have
 4247          * cleaned up for itself. 'q' failed, so its the terminus.
 4248          */
 4249         rxq = ctx->ifc_rxqs;
 4250         for (i = 0; i < q; ++i, rxq++) {
 4251                 iflib_rx_sds_free(rxq);
 4252                 rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0;
 4253         }
 4254         return (err);
 4255 #endif
 4256 }
 4257 
 4258 /*********************************************************************
 4259  *
 4260  *  Free all receive rings.
 4261  *
 4262  **********************************************************************/
 4263 static void
 4264 iflib_rx_structures_free(if_ctx_t ctx)
 4265 {
 4266         iflib_rxq_t rxq = ctx->ifc_rxqs;
 4267 
 4268         for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) {
 4269                 iflib_rx_sds_free(rxq);
 4270         }
 4271 }
 4272 
 4273 static int
 4274 iflib_qset_structures_setup(if_ctx_t ctx)
 4275 {
 4276         int err;
 4277 
 4278         if ((err = iflib_tx_structures_setup(ctx)) != 0)
 4279                 return (err);
 4280 
 4281         if ((err = iflib_rx_structures_setup(ctx)) != 0) {
 4282                 device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err);
 4283                 iflib_tx_structures_free(ctx);
 4284                 iflib_rx_structures_free(ctx);
 4285         }
 4286         return (err);
 4287 }
 4288 
 4289 int
 4290 iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
 4291                                 driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, char *name)
 4292 {
 4293 
 4294         return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name));
 4295 }
 4296 
 4297 static void
 4298 find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid)
 4299 {
 4300         int i, cpuid;
 4301 
 4302         CPU_COPY(&ctx->ifc_cpus, cpus);
 4303         /* clear up to the qid'th bit */
 4304         for (i = 0; i < qid; i++) {
 4305                 cpuid = CPU_FFS(cpus);
 4306                 CPU_CLR(cpuid, cpus);
 4307         }
 4308 }
 4309 
 4310 int
 4311 iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
 4312                                                 iflib_intr_type_t type, driver_filter_t *filter,
 4313                                                 void *filter_arg, int qid, char *name)
 4314 {
 4315         struct grouptask *gtask;
 4316         struct taskqgroup *tqg;
 4317         iflib_filter_info_t info;
 4318         cpuset_t cpus;
 4319         gtask_fn_t *fn;
 4320         int tqrid, err;
 4321         void *q;
 4322 
 4323         info = &ctx->ifc_filter_info;
 4324 
 4325         switch (type) {
 4326         /* XXX merge tx/rx for netmap? */
 4327         case IFLIB_INTR_TX:
 4328                 q = &ctx->ifc_txqs[qid];
 4329                 info = &ctx->ifc_txqs[qid].ift_filter_info;
 4330                 gtask = &ctx->ifc_txqs[qid].ift_task;
 4331                 tqg = qgroup_if_io_tqg;
 4332                 tqrid = irq->ii_rid;
 4333                 fn = _task_fn_tx;
 4334                 break;
 4335         case IFLIB_INTR_RX:
 4336                 q = &ctx->ifc_rxqs[qid];
 4337                 info = &ctx->ifc_rxqs[qid].ifr_filter_info;
 4338                 gtask = &ctx->ifc_rxqs[qid].ifr_task;
 4339                 tqg = qgroup_if_io_tqg;
 4340                 tqrid = irq->ii_rid;
 4341                 fn = _task_fn_rx;
 4342                 break;
 4343         case IFLIB_INTR_ADMIN:
 4344                 q = ctx;
 4345                 info = &ctx->ifc_filter_info;
 4346                 gtask = &ctx->ifc_admin_task;
 4347                 tqg = qgroup_if_config_tqg;
 4348                 tqrid = -1;
 4349                 fn = _task_fn_admin;
 4350                 break;
 4351         default:
 4352                 panic("unknown net intr type");
 4353         }
 4354         GROUPTASK_INIT(gtask, 0, fn, q);
 4355 
 4356         info->ifi_filter = filter;
 4357         info->ifi_filter_arg = filter_arg;
 4358         info->ifi_task = gtask;
 4359 
 4360         /* XXX query cpu that rid belongs to */
 4361 
 4362         err = _iflib_irq_alloc(ctx, irq, rid, iflib_fast_intr, NULL, info,  name);
 4363         if (err != 0)
 4364                 return (err);
 4365         if (tqrid != -1) {
 4366                 find_nth(ctx, &cpus, qid);
 4367                 taskqgroup_attach_cpu(tqg, gtask, q, CPU_FFS(&cpus), irq->ii_rid, name);
 4368         } else
 4369                 taskqgroup_attach(tqg, gtask, q, tqrid, name);
 4370 
 4371 
 4372         return (0);
 4373 }
 4374 
 4375 void
 4376 iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type,  void *arg, int qid, char *name)
 4377 {
 4378         struct grouptask *gtask;
 4379         struct taskqgroup *tqg;
 4380         gtask_fn_t *fn;
 4381         void *q;
 4382 
 4383         switch (type) {
 4384         case IFLIB_INTR_TX:
 4385                 q = &ctx->ifc_txqs[qid];
 4386                 gtask = &ctx->ifc_txqs[qid].ift_task;
 4387                 tqg = qgroup_if_io_tqg;
 4388                 fn = _task_fn_tx;
 4389                 break;
 4390         case IFLIB_INTR_RX:
 4391                 q = &ctx->ifc_rxqs[qid];
 4392                 gtask = &ctx->ifc_rxqs[qid].ifr_task;
 4393                 tqg = qgroup_if_io_tqg;
 4394                 fn = _task_fn_rx;
 4395                 break;
 4396         case IFLIB_INTR_ADMIN:
 4397                 q = ctx;
 4398                 gtask = &ctx->ifc_admin_task;
 4399                 tqg = qgroup_if_config_tqg;
 4400                 rid = -1;
 4401                 fn = _task_fn_admin;
 4402                 break;
 4403         case IFLIB_INTR_IOV:
 4404                 q = ctx;
 4405                 gtask = &ctx->ifc_vflr_task;
 4406                 tqg = qgroup_if_config_tqg;
 4407                 rid = -1;
 4408                 fn = _task_fn_iov;
 4409                 break;
 4410         default:
 4411                 panic("unknown net intr type");
 4412         }
 4413         GROUPTASK_INIT(gtask, 0, fn, q);
 4414         taskqgroup_attach(tqg, gtask, q, rid, name);
 4415 }
 4416 
 4417 void
 4418 iflib_irq_free(if_ctx_t ctx, if_irq_t irq)
 4419 {
 4420         if (irq->ii_tag)
 4421                 bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag);
 4422 
 4423         if (irq->ii_res)
 4424                 bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, irq->ii_rid, irq->ii_res);
 4425 }
 4426 
 4427 static int
 4428 iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, char *name)
 4429 {
 4430         iflib_txq_t txq = ctx->ifc_txqs;
 4431         iflib_rxq_t rxq = ctx->ifc_rxqs;
 4432         if_irq_t irq = &ctx->ifc_legacy_irq;
 4433         iflib_filter_info_t info;
 4434         struct grouptask *gtask;
 4435         struct taskqgroup *tqg;
 4436         gtask_fn_t *fn;
 4437         int tqrid;
 4438         void *q;
 4439         int err;
 4440 
 4441         q = &ctx->ifc_rxqs[0];
 4442         info = &rxq[0].ifr_filter_info;
 4443         gtask = &rxq[0].ifr_task;
 4444         tqg = qgroup_if_io_tqg;
 4445         tqrid = irq->ii_rid = *rid;
 4446         fn = _task_fn_rx;
 4447 
 4448         ctx->ifc_flags |= IFC_LEGACY;
 4449         info->ifi_filter = filter;
 4450         info->ifi_filter_arg = filter_arg;
 4451         info->ifi_task = gtask;
 4452 
 4453         /* We allocate a single interrupt resource */
 4454         if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr, NULL, info, name)) != 0)
 4455                 return (err);
 4456         GROUPTASK_INIT(gtask, 0, fn, q);
 4457         taskqgroup_attach(tqg, gtask, q, tqrid, name);
 4458 
 4459         GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq);
 4460         taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, tqrid, "tx");
 4461         GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
 4462         taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin/link");
 4463 
 4464         return (0);
 4465 }
 4466 
 4467 void
 4468 iflib_led_create(if_ctx_t ctx)
 4469 {
 4470 
 4471         ctx->ifc_led_dev = led_create(iflib_led_func, ctx,
 4472                                                                   device_get_nameunit(ctx->ifc_dev));
 4473 }
 4474 
 4475 void
 4476 iflib_tx_intr_deferred(if_ctx_t ctx, int txqid)
 4477 {
 4478 
 4479         GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task);
 4480 }
 4481 
 4482 void
 4483 iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid)
 4484 {
 4485 
 4486         GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task);
 4487 }
 4488 
 4489 void
 4490 iflib_admin_intr_deferred(if_ctx_t ctx)
 4491 {
 4492 
 4493         GROUPTASK_ENQUEUE(&ctx->ifc_admin_task);
 4494 }
 4495 
 4496 void
 4497 iflib_iov_intr_deferred(if_ctx_t ctx)
 4498 {
 4499 
 4500         GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task);
 4501 }
 4502 
 4503 void
 4504 iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name)
 4505 {
 4506 
 4507         taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name);
 4508 }
 4509 
 4510 void
 4511 iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn,
 4512         char *name)
 4513 {
 4514 
 4515         GROUPTASK_INIT(gtask, 0, fn, ctx);
 4516         taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name);
 4517 }
 4518 
 4519 void
 4520 iflib_config_gtask_deinit(struct grouptask *gtask)
 4521 {
 4522 
 4523         taskqgroup_detach(qgroup_if_config_tqg, gtask); 
 4524 }
 4525 
 4526 void
 4527 iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate)
 4528 {
 4529         if_t ifp = ctx->ifc_ifp;
 4530         iflib_txq_t txq = ctx->ifc_txqs;
 4531 
 4532 
 4533         if_setbaudrate(ifp, baudrate);
 4534 
 4535         /* If link down, disable watchdog */
 4536         if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) {
 4537                 for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++)
 4538                         txq->ift_qstatus = IFLIB_QUEUE_IDLE;
 4539         }
 4540         ctx->ifc_link_state = link_state;
 4541         if_link_state_change(ifp, link_state);
 4542 }
 4543 
 4544 static int
 4545 iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq)
 4546 {
 4547         int credits;
 4548 
 4549         if (ctx->isc_txd_credits_update == NULL)
 4550                 return (0);
 4551 
 4552         if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, txq->ift_cidx_processed, true)) == 0)
 4553                 return (0);
 4554 
 4555         txq->ift_processed += credits;
 4556         txq->ift_cidx_processed += credits;
 4557 
 4558         if (txq->ift_cidx_processed >= txq->ift_size)
 4559                 txq->ift_cidx_processed -= txq->ift_size;
 4560         return (credits);
 4561 }
 4562 
 4563 static int
 4564 iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget)
 4565 {
 4566 
 4567         return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx,
 4568             budget));
 4569 }
 4570 
 4571 void
 4572 iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name,
 4573         const char *description, if_int_delay_info_t info,
 4574         int offset, int value)
 4575 {
 4576         info->iidi_ctx = ctx;
 4577         info->iidi_offset = offset;
 4578         info->iidi_value = value;
 4579         SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev),
 4580             SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)),
 4581             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
 4582             info, 0, iflib_sysctl_int_delay, "I", description);
 4583 }
 4584 
 4585 struct mtx *
 4586 iflib_ctx_lock_get(if_ctx_t ctx)
 4587 {
 4588 
 4589         return (&ctx->ifc_mtx);
 4590 }
 4591 
 4592 static int
 4593 iflib_msix_init(if_ctx_t ctx)
 4594 {
 4595         device_t dev = ctx->ifc_dev;
 4596         if_shared_ctx_t sctx = ctx->ifc_sctx;
 4597         if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 4598         int vectors, queues, rx_queues, tx_queues, queuemsgs, msgs;
 4599         int iflib_num_tx_queues, iflib_num_rx_queues;
 4600         int err, admincnt, bar;
 4601 
 4602         iflib_num_tx_queues = scctx->isc_ntxqsets;
 4603         iflib_num_rx_queues = scctx->isc_nrxqsets;
 4604 
 4605         bar = ctx->ifc_softc_ctx.isc_msix_bar;
 4606         admincnt = sctx->isc_admin_intrcnt;
 4607         /* Override by tuneable */
 4608         if (enable_msix == 0)
 4609                 goto msi;
 4610 
 4611         /*
 4612         ** When used in a virtualized environment
 4613         ** PCI BUSMASTER capability may not be set
 4614         ** so explicity set it here and rewrite
 4615         ** the ENABLE in the MSIX control register
 4616         ** at this point to cause the host to
 4617         ** successfully initialize us.
 4618         */
 4619         {
 4620                 uint16_t pci_cmd_word;
 4621                 int msix_ctrl, rid;
 4622 
 4623                 rid = 0;
 4624                 pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
 4625                 pci_cmd_word |= PCIM_CMD_BUSMASTEREN;
 4626                 pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2);
 4627                 pci_find_cap(dev, PCIY_MSIX, &rid);
 4628                 rid += PCIR_MSIX_CTRL;
 4629                 msix_ctrl = pci_read_config(dev, rid, 2);
 4630                 msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
 4631                 pci_write_config(dev, rid, msix_ctrl, 2);
 4632         }
 4633 
 4634         /*
 4635          * bar == -1 => "trust me I know what I'm doing"
 4636          * Some drivers are for hardware that is so shoddily
 4637          * documented that no one knows which bars are which
 4638          * so the developer has to map all bars. This hack
 4639          * allows shoddy garbage to use msix in this framework.
 4640          */
 4641         if (bar != -1) {
 4642                 ctx->ifc_msix_mem = bus_alloc_resource_any(dev,
 4643                     SYS_RES_MEMORY, &bar, RF_ACTIVE);
 4644                 if (ctx->ifc_msix_mem == NULL) {
 4645                         /* May not be enabled */
 4646                         device_printf(dev, "Unable to map MSIX table \n");
 4647                         goto msi;
 4648                 }
 4649         }
 4650         /* First try MSI/X */
 4651         if ((msgs = pci_msix_count(dev)) == 0) { /* system has msix disabled */
 4652                 device_printf(dev, "System has MSIX disabled \n");
 4653                 bus_release_resource(dev, SYS_RES_MEMORY,
 4654                     bar, ctx->ifc_msix_mem);
 4655                 ctx->ifc_msix_mem = NULL;
 4656                 goto msi;
 4657         }
 4658 #if IFLIB_DEBUG
 4659         /* use only 1 qset in debug mode */
 4660         queuemsgs = min(msgs - admincnt, 1);
 4661 #else
 4662         queuemsgs = msgs - admincnt;
 4663 #endif
 4664         if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) == 0) {
 4665 #ifdef RSS
 4666                 queues = imin(queuemsgs, rss_getnumbuckets());
 4667 #else
 4668                 queues = queuemsgs;
 4669 #endif
 4670                 queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues);
 4671                 device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n",
 4672                                           CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt);
 4673         } else {
 4674                 device_printf(dev, "Unable to fetch CPU list\n");
 4675                 /* Figure out a reasonable auto config value */
 4676                 queues = min(queuemsgs, mp_ncpus);
 4677         }
 4678 #ifdef  RSS
 4679         /* If we're doing RSS, clamp at the number of RSS buckets */
 4680         if (queues > rss_getnumbuckets())
 4681                 queues = rss_getnumbuckets();
 4682 #endif
 4683         if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt)
 4684                 rx_queues = iflib_num_rx_queues;
 4685         else
 4686                 rx_queues = queues;
 4687         /*
 4688          * We want this to be all logical CPUs by default
 4689          */
 4690         if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues)
 4691                 tx_queues = iflib_num_tx_queues;
 4692         else
 4693                 tx_queues = mp_ncpus;
 4694 
 4695         if (ctx->ifc_sysctl_qs_eq_override == 0) {
 4696 #ifdef INVARIANTS
 4697                 if (tx_queues != rx_queues)
 4698                         device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n",
 4699                                       min(rx_queues, tx_queues), min(rx_queues, tx_queues));
 4700 #endif
 4701                 tx_queues = min(rx_queues, tx_queues);
 4702                 rx_queues = min(rx_queues, tx_queues);
 4703         }
 4704 
 4705         device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues);
 4706 
 4707         vectors = rx_queues + admincnt;
 4708         if ((err = pci_alloc_msix(dev, &vectors)) == 0) {
 4709                 device_printf(dev,
 4710                                           "Using MSIX interrupts with %d vectors\n", vectors);
 4711                 scctx->isc_vectors = vectors;
 4712                 scctx->isc_nrxqsets = rx_queues;
 4713                 scctx->isc_ntxqsets = tx_queues;
 4714                 scctx->isc_intr = IFLIB_INTR_MSIX;
 4715 
 4716                 return (vectors);
 4717         } else {
 4718                 device_printf(dev, "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err);
 4719         }
 4720 msi:
 4721         vectors = pci_msi_count(dev);
 4722         scctx->isc_nrxqsets = 1;
 4723         scctx->isc_ntxqsets = 1;
 4724         scctx->isc_vectors = vectors;
 4725         if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) {
 4726                 device_printf(dev,"Using an MSI interrupt\n");
 4727                 scctx->isc_intr = IFLIB_INTR_MSI;
 4728         } else {
 4729                 device_printf(dev,"Using a Legacy interrupt\n");
 4730                 scctx->isc_intr = IFLIB_INTR_LEGACY;
 4731         }
 4732 
 4733         return (vectors);
 4734 }
 4735 
 4736 char * ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" };
 4737 
 4738 static int
 4739 mp_ring_state_handler(SYSCTL_HANDLER_ARGS)
 4740 {
 4741         int rc;
 4742         uint16_t *state = ((uint16_t *)oidp->oid_arg1);
 4743         struct sbuf *sb;
 4744         char *ring_state = "UNKNOWN";
 4745 
 4746         /* XXX needed ? */
 4747         rc = sysctl_wire_old_buffer(req, 0);
 4748         MPASS(rc == 0);
 4749         if (rc != 0)
 4750                 return (rc);
 4751         sb = sbuf_new_for_sysctl(NULL, NULL, 80, req);
 4752         MPASS(sb != NULL);
 4753         if (sb == NULL)
 4754                 return (ENOMEM);
 4755         if (state[3] <= 3)
 4756                 ring_state = ring_states[state[3]];
 4757 
 4758         sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s",
 4759                     state[0], state[1], state[2], ring_state);
 4760         rc = sbuf_finish(sb);
 4761         sbuf_delete(sb);
 4762         return(rc);
 4763 }
 4764 
 4765 enum iflib_ndesc_handler {
 4766         IFLIB_NTXD_HANDLER,
 4767         IFLIB_NRXD_HANDLER,
 4768 };
 4769 
 4770 static int
 4771 mp_ndesc_handler(SYSCTL_HANDLER_ARGS)
 4772 {
 4773         if_ctx_t ctx = (void *)arg1;
 4774         enum iflib_ndesc_handler type = arg2;
 4775         char buf[256] = {0};
 4776         uint16_t *ndesc;
 4777         char *p, *next;
 4778         int nqs, rc, i;
 4779 
 4780         MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER);
 4781 
 4782         nqs = 8;
 4783         switch(type) {
 4784         case IFLIB_NTXD_HANDLER:
 4785                 ndesc = ctx->ifc_sysctl_ntxds;
 4786                 if (ctx->ifc_sctx)
 4787                         nqs = ctx->ifc_sctx->isc_ntxqs;
 4788                 break;
 4789         case IFLIB_NRXD_HANDLER:
 4790                 ndesc = ctx->ifc_sysctl_nrxds;
 4791                 if (ctx->ifc_sctx)
 4792                         nqs = ctx->ifc_sctx->isc_nrxqs;
 4793                 break;
 4794         }
 4795         if (nqs == 0)
 4796                 nqs = 8;
 4797 
 4798         for (i=0; i<8; i++) {
 4799                 if (i >= nqs)
 4800                         break;
 4801                 if (i)
 4802                         strcat(buf, ",");
 4803                 sprintf(strchr(buf, 0), "%d", ndesc[i]);
 4804         }
 4805 
 4806         rc = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 4807         if (rc || req->newptr == NULL)
 4808                 return rc;
 4809 
 4810         for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p;
 4811             i++, p = strsep(&next, " ,")) {
 4812                 ndesc[i] = strtoul(p, NULL, 10);
 4813         }
 4814 
 4815         return(rc);
 4816 }
 4817 
 4818 #define NAME_BUFLEN 32
 4819 static void
 4820 iflib_add_device_sysctl_pre(if_ctx_t ctx)
 4821 {
 4822         device_t dev = iflib_get_dev(ctx);
 4823         struct sysctl_oid_list *child, *oid_list;
 4824         struct sysctl_ctx_list *ctx_list;
 4825         struct sysctl_oid *node;
 4826 
 4827         ctx_list = device_get_sysctl_ctx(dev);
 4828         child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 4829         ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib",
 4830                                                       CTLFLAG_RD, NULL, "IFLIB fields");
 4831         oid_list = SYSCTL_CHILDREN(node);
 4832 
 4833         SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version",
 4834                        CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0,
 4835                        "driver version");
 4836 
 4837         SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs",
 4838                        CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0,
 4839                         "# of txqs to use, 0 => use default #");
 4840         SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs",
 4841                        CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0,
 4842                         "# of rxqs to use, 0 => use default #");
 4843         SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable",
 4844                        CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0,
 4845                        "permit #txq != #rxq");
 4846 
 4847         /* XXX change for per-queue sizes */
 4848         SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
 4849                        CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER,
 4850                        mp_ndesc_handler, "A",
 4851                        "list of # of tx descriptors to use, 0 = use default #");
 4852         SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds",
 4853                        CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER,
 4854                        mp_ndesc_handler, "A",
 4855                        "list of # of rx descriptors to use, 0 = use default #");
 4856 }
 4857 
 4858 static void
 4859 iflib_add_device_sysctl_post(if_ctx_t ctx)
 4860 {
 4861         if_shared_ctx_t sctx = ctx->ifc_sctx;
 4862         if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 4863         device_t dev = iflib_get_dev(ctx);
 4864         struct sysctl_oid_list *child;
 4865         struct sysctl_ctx_list *ctx_list;
 4866         iflib_fl_t fl;
 4867         iflib_txq_t txq;
 4868         iflib_rxq_t rxq;
 4869         int i, j;
 4870         char namebuf[NAME_BUFLEN];
 4871         char *qfmt;
 4872         struct sysctl_oid *queue_node, *fl_node, *node;
 4873         struct sysctl_oid_list *queue_list, *fl_list;
 4874         ctx_list = device_get_sysctl_ctx(dev);
 4875 
 4876         node = ctx->ifc_sysctl_node;
 4877         child = SYSCTL_CHILDREN(node);
 4878 
 4879         if (scctx->isc_ntxqsets > 100)
 4880                 qfmt = "txq%03d";
 4881         else if (scctx->isc_ntxqsets > 10)
 4882                 qfmt = "txq%02d";
 4883         else
 4884                 qfmt = "txq%d";
 4885         for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) {
 4886                 snprintf(namebuf, NAME_BUFLEN, qfmt, i);
 4887                 queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
 4888                                              CTLFLAG_RD, NULL, "Queue Name");
 4889                 queue_list = SYSCTL_CHILDREN(queue_node);
 4890 #if MEMORY_LOGGING
 4891                 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued",
 4892                                 CTLFLAG_RD,
 4893                                 &txq->ift_dequeued, "total mbufs freed");
 4894                 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued",
 4895                                 CTLFLAG_RD,
 4896                                 &txq->ift_enqueued, "total mbufs enqueued");
 4897 #endif
 4898                 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag",
 4899                                    CTLFLAG_RD,
 4900                                    &txq->ift_mbuf_defrag, "# of times m_defrag was called");
 4901                 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups",
 4902                                    CTLFLAG_RD,
 4903                                    &txq->ift_pullups, "# of times m_pullup was called");
 4904                 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed",
 4905                                    CTLFLAG_RD,
 4906                                    &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed");
 4907                 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail",
 4908                                    CTLFLAG_RD,
 4909                                    &txq->ift_no_desc_avail, "# of times no descriptors were available");
 4910                 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed",
 4911                                    CTLFLAG_RD,
 4912                                    &txq->ift_map_failed, "# of times dma map failed");
 4913                 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig",
 4914                                    CTLFLAG_RD,
 4915                                    &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG");
 4916                 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup",
 4917                                    CTLFLAG_RD,
 4918                                    &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG");
 4919                 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx",
 4920                                    CTLFLAG_RD,
 4921                                    &txq->ift_pidx, 1, "Producer Index");
 4922                 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx",
 4923                                    CTLFLAG_RD,
 4924                                    &txq->ift_cidx, 1, "Consumer Index");
 4925                 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed",
 4926                                    CTLFLAG_RD,
 4927                                    &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update");
 4928                 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use",
 4929                                    CTLFLAG_RD,
 4930                                    &txq->ift_in_use, 1, "descriptors in use");
 4931                 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed",
 4932                                    CTLFLAG_RD,
 4933                                    &txq->ift_processed, "descriptors procesed for clean");
 4934                 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned",
 4935                                    CTLFLAG_RD,
 4936                                    &txq->ift_cleaned, "total cleaned");
 4937                 SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state",
 4938                                 CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br[0]->state),
 4939                                 0, mp_ring_state_handler, "A", "soft ring state");
 4940                 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues",
 4941                                        CTLFLAG_RD, &txq->ift_br[0]->enqueues,
 4942                                        "# of enqueues to the mp_ring for this queue");
 4943                 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops",
 4944                                        CTLFLAG_RD, &txq->ift_br[0]->drops,
 4945                                        "# of drops in the mp_ring for this queue");
 4946                 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts",
 4947                                        CTLFLAG_RD, &txq->ift_br[0]->starts,
 4948                                        "# of normal consumer starts in the mp_ring for this queue");
 4949                 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls",
 4950                                        CTLFLAG_RD, &txq->ift_br[0]->stalls,
 4951                                                "# of consumer stalls in the mp_ring for this queue");
 4952                 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts",
 4953                                CTLFLAG_RD, &txq->ift_br[0]->restarts,
 4954                                        "# of consumer restarts in the mp_ring for this queue");
 4955                 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications",
 4956                                        CTLFLAG_RD, &txq->ift_br[0]->abdications,
 4957                                        "# of consumer abdications in the mp_ring for this queue");
 4958 
 4959         }
 4960 
 4961         if (scctx->isc_nrxqsets > 100)
 4962                 qfmt = "rxq%03d";
 4963         else if (scctx->isc_nrxqsets > 10)
 4964                 qfmt = "rxq%02d";
 4965         else
 4966                 qfmt = "rxq%d";
 4967         for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) {
 4968                 snprintf(namebuf, NAME_BUFLEN, qfmt, i);
 4969                 queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
 4970                                              CTLFLAG_RD, NULL, "Queue Name");
 4971                 queue_list = SYSCTL_CHILDREN(queue_node);
 4972                 if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
 4973                         SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx",
 4974                                        CTLFLAG_RD,
 4975                                        &rxq->ifr_cq_pidx, 1, "Producer Index");
 4976                         SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx",
 4977                                        CTLFLAG_RD,
 4978                                        &rxq->ifr_cq_cidx, 1, "Consumer Index");
 4979                 }
 4980                 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
 4981                         snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j);
 4982                         fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf,
 4983                                                      CTLFLAG_RD, NULL, "freelist Name");
 4984                         fl_list = SYSCTL_CHILDREN(fl_node);
 4985                         SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx",
 4986                                        CTLFLAG_RD,
 4987                                        &fl->ifl_pidx, 1, "Producer Index");
 4988                         SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx",
 4989                                        CTLFLAG_RD,
 4990                                        &fl->ifl_cidx, 1, "Consumer Index");
 4991                         SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits",
 4992                                        CTLFLAG_RD,
 4993                                        &fl->ifl_credits, 1, "credits available");
 4994 #if MEMORY_LOGGING
 4995                         SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued",
 4996                                         CTLFLAG_RD,
 4997                                         &fl->ifl_m_enqueued, "mbufs allocated");
 4998                         SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued",
 4999                                         CTLFLAG_RD,
 5000                                         &fl->ifl_m_dequeued, "mbufs freed");
 5001                         SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued",
 5002                                         CTLFLAG_RD,
 5003                                         &fl->ifl_cl_enqueued, "clusters allocated");
 5004                         SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued",
 5005                                         CTLFLAG_RD,
 5006                                         &fl->ifl_cl_dequeued, "clusters freed");
 5007 #endif
 5008 
 5009                 }
 5010         }
 5011 
 5012 }

Cache object: ae0896b42822dcbfd3ec7753f1ae8ae2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.